├── pandas_to_sql
├── engine
│ ├── __init__.py
│ ├── columns
│ │ ├── __init__.py
│ │ ├── column.py
│ │ ├── bool_column.py
│ │ ├── str_column.py
│ │ ├── datetime_column.py
│ │ ├── common.py
│ │ └── numeric_columns.py
│ ├── grouped_table.py
│ └── table.py
├── testing
│ ├── __init__.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── asserters.py
│ │ └── fake_data_creation.py
│ ├── tests
│ │ ├── test_operations_base.py
│ │ ├── test_table_operations.py
│ │ ├── test_select.py
│ │ ├── test_pandas_dataframe_intercepter.py
│ │ ├── test_concat.py
│ │ ├── test_assignment.py
│ │ ├── test_groupby.py
│ │ ├── test_merge.py
│ │ ├── test_datetime.py
│ │ ├── test_str.py
│ │ ├── test_operations_numeric.py
│ │ └── test_operations_compare.py
│ └── conftest.py
├── utils
│ ├── __init__.py
│ ├── helpers.py
│ ├── pandas_interceptor.py
│ └── pandas_dataframe_intercepter.py
├── conventions
│ ├── __init__.py
│ └── groupby_conventions.py
└── __init__.py
├── .gitignore
├── environment.yml
├── .github
└── workflows
│ ├── tests.yml
│ └── publish-to-pypi.yml
├── setup.py
├── LICENSE
├── example_runner.py
├── README.md
└── pandas_to_sql_colab_example.ipynb
/pandas_to_sql/engine/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pandas_to_sql/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pandas_to_sql/conventions/__init__.py:
--------------------------------------------------------------------------------
1 | from pandas_to_sql.conventions.groupby_conventions import flatten_grouped_dataframe
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__/*
2 | **/.pytest_cache/*
3 | .vscode/
4 | **/env/*
5 | local/*
6 | build/*
7 | dist/*
8 | **.egg-info
9 | **.db
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: pandas-to-sql-dev
2 | channels:
3 | - defaults
4 | - conda-forge
5 | dependencies:
6 | # required
7 | - setuptools
8 | - python=3.7
9 | - numpy>=1.19
10 | - pandas>=1.1
11 | # testing
12 | - pytest>=5.0.1
13 | - sqlalchemy
14 |
15 |
--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/column.py:
--------------------------------------------------------------------------------
1 |
2 | class Column:
3 | dtype = None
4 | sql_string = None
5 |
6 | def __init__(self, dtype=None, sql_string=None):
7 | self.dtype = dtype
8 | self.sql_string = sql_string
9 |
10 | def __copy__(self):
11 | return type(self)(self.sql_string)
12 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_operations_base.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pandas_to_sql.testing.utils.asserters import assert_
3 | from copy import copy
4 |
5 | def test_copy():
6 | df = pytest.df1
7 | df2 = copy(df)
8 | df['new_value'] = df.random_float > 10 # some unrelated operation
9 | assert_(df2)
10 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_table_operations.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | import pytest
3 | from pandas_to_sql.testing.utils.asserters import assert_
4 |
5 |
6 | def test_rename():
7 | df = pytest.df1
8 | df = df.rename(columns={'random_int': 'random_int_2',
9 | 'random_str': 'random_str_2'})
10 | assert_(df)
11 |
12 | def test_drop():
13 | df = pytest.df1
14 | df = df.drop(columns=['random_int', 'random_str'])
15 | assert_(df)
16 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_select.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pandas_to_sql.testing.utils.asserters import assert_
3 |
4 |
5 | def test_select_inline():
6 | assert_(pytest.df1[['random_int', 'random_float']])
7 |
8 |
9 | def test_select_not_inline():
10 | df = pytest.df1[['random_int', 'random_float']]
11 | assert_(df)
12 |
13 |
14 | def test_select_multiple_times():
15 | df = pytest.df1[['random_int', 'random_datetime','random_bool']]
16 | df = df[['random_datetime']]
17 | assert_(df)
18 |
--------------------------------------------------------------------------------
/pandas_to_sql/__init__.py:
--------------------------------------------------------------------------------
1 | from pandas_to_sql.engine.table import create_table_from_schema
2 | from pandas_to_sql.utils.helpers import create_schema_from_df
3 | from pandas_to_sql.utils.pandas_dataframe_intercepter import PandasDataFrameIntercepter
4 | from pandas_to_sql.utils.pandas_interceptor import PandasIntercepter
5 |
6 |
7 | def wrap_df(df, table_name):
8 | t = create_table_from_schema(table_name=table_name, schema=create_schema_from_df(df))
9 | return PandasDataFrameIntercepter(df, t)
10 |
11 |
12 | def wrap_pd(pd):
13 | return PandasIntercepter(pd)
--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/bool_column.py:
--------------------------------------------------------------------------------
1 | from pandas_to_sql.engine.columns.column import Column
2 | from pandas_to_sql.engine.columns.common import value_to_sql_string, add_common_operators_to_class
3 |
4 |
5 | class BoolColumn(Column):
6 | def __init__(self, sql_string):
7 | super().__init__(dtype='BOOL', sql_string=sql_string)
8 |
9 | def __neg__(self):
10 | return BoolColumn(sql_string=f'(NOT({value_to_sql_string(self)}))')
11 |
12 | def __invert__(self):
13 | return BoolColumn(sql_string=f'(NOT({value_to_sql_string(self)}))')
14 |
15 |
16 | add_common_operators_to_class(BoolColumn)
17 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_pandas_dataframe_intercepter.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pandas_to_sql.testing.conftest import TABLE_NAME_1
3 |
4 | def test_columns_attribute():
5 | expected = pytest.df1.df_pandas.columns
6 | actual = pytest.df1.columns
7 | assert type(expected) == type(actual)
8 | assert set(expected) == set(actual)
9 |
10 |
11 | def test_get_sql_string_attribute():
12 | expected = '''SELECT (random_int) AS random_int, (random_float) AS random_float, (random_bool) AS random_bool, (random_datetime) AS random_datetime, (random_str) AS random_str FROM random_data_1'''
13 | assert expected == pytest.df1.get_sql_string()
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_concat.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pandas_to_sql.testing.utils.asserters import assert_
3 | from pandas_to_sql.conventions import flatten_grouped_dataframe
4 | from copy import copy
5 | import pandas as pd
6 | import pandas_to_sql
7 |
8 | def test_concat_simple():
9 | df = pytest.df1
10 |
11 | pd_wrapped = pandas_to_sql.wrap_pd(pd)
12 |
13 | df2 = pd_wrapped.concat([df, df, df])
14 |
15 | assert_(df2)
16 |
17 |
18 | def test_concat_simple_with_copy():
19 | df = pytest.df1
20 |
21 | pd_wrapped = pandas_to_sql.wrap_pd(pd)
22 |
23 | df2 = pd_wrapped.concat([df, copy(df), copy(df)])
24 |
25 | assert_(df2)
26 |
27 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build-linux:
7 | runs-on: ubuntu-latest
8 | strategy:
9 | max-parallel: 5
10 |
11 | steps:
12 | - uses: actions/checkout@v2
13 | - name: Set up Python 3.8
14 | uses: actions/setup-python@v2
15 | with:
16 | python-version: 3.8
17 | - name: Install dependencies
18 | run: |
19 | # $CONDA is an environment variable pointing to the root of the miniconda directory
20 | $CONDA/bin/conda env update --file environment.yml --name base
21 | - name: Test with pytest
22 | run: |
23 | conda install pytest
24 | $CONDA/bin/pytest
25 |
--------------------------------------------------------------------------------
/pandas_to_sql/utils/helpers.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 |
4 | ## Types
5 | def convert_df_type(col_type):
6 | if pd.api.types.is_bool_dtype(col_type): return 'BOOL'
7 | elif pd.api.types.is_integer_dtype(col_type): return 'INT'
8 | elif pd.api.types.is_numeric_dtype(col_type): return 'FLOAT'
9 | elif pd.api.types.is_string_dtype(col_type): return 'VARCHAR'
10 | elif pd.api.types.is_datetime64_any_dtype(col_type): return 'DATETIME'
11 | else: raise Exception(f"could not convert column type. got: {str(col_type)}")
12 |
13 |
14 | def create_schema_from_df(df):
15 | schema = {}
16 | for col_name, col_type in df.dtypes.items():
17 | schema[col_name] = convert_df_type(col_type)
18 | return schema
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import setuptools
3 |
4 |
5 | v = os.environ['RELEASE_VERSION']
6 | print('Version: ', v)
7 |
8 | with open("README.md", "r") as fh:
9 | long_description = fh.read()
10 |
11 | setuptools.setup(
12 | name="pandas-to-sql", # Replace with your own username
13 | version=v,
14 | author="Amir",
15 | author_email="amirpupko@gmail.com",
16 | description="Convert pandas dataframe manipulations to sql query string",
17 | long_description=long_description,
18 | long_description_content_type="text/markdown",
19 | url="https://github.com/AmirPupko/pandas-to-sql",
20 | packages=setuptools.find_packages(),
21 | classifiers=[
22 | "Programming Language :: Python :: 3",
23 | "License :: OSI Approved :: MIT License",
24 | "Operating System :: OS Independent",
25 | ],
26 | python_requires='>=3.6',
27 | )
--------------------------------------------------------------------------------
/pandas_to_sql/conventions/groupby_conventions.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from pandas_to_sql.utils.pandas_dataframe_intercepter import PandasDataFrameIntercepter
3 | from copy import copy
4 |
5 | def flatten_grouped_dataframe(df):
6 | if not isinstance(df, PandasDataFrameIntercepter):
7 | raise Exception(f"can only get type {str(type(PandasDataFrameIntercepter))}")
8 |
9 | df_c = copy(df.df_pandas)
10 | if isinstance(df_c, pd.core.series.Series):
11 | series_name = df_c.name
12 | new_col_name = list(filter(lambda k: k.startswith(series_name), df.df_sql_convert_table.columns.keys()))[0]
13 | df_c = df_c.reset_index().rename(columns={series_name: new_col_name})
14 | else:
15 | df_c.columns = df_c.columns.map('_'.join)
16 | df_c = df_c.reset_index()
17 | return PandasDataFrameIntercepter(df_c, copy(df.df_sql_convert_table))
18 |
--------------------------------------------------------------------------------
/.github/workflows/publish-to-pypi.yml:
--------------------------------------------------------------------------------
1 | # This workflows will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Publish to PyPI
5 |
6 | on:
7 | release:
8 | types: [created]
9 |
10 | jobs:
11 | deploy:
12 |
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Set up Python
18 | uses: actions/setup-python@v2
19 | with:
20 | python-version: '3.x'
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install setuptools wheel twine
25 | - name: Build and publish
26 | env:
27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 | RELEASE_VERSION: ${{ github.event.release.tag_name }}
30 | run: |
31 | python setup.py sdist bdist_wheel
32 | twine upload dist/*
33 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/conftest.py:
--------------------------------------------------------------------------------
1 |
2 | from copy import copy
3 | import sqlite3
4 | import pytest
5 | from pandas_to_sql import wrap_df
6 | from pandas_to_sql.testing.utils import fake_data_creation
7 |
8 | sql_connection = sqlite3.connect('./example.db') #create db
9 |
10 | TABLE_NAME_1 = 'random_data_1'
11 | DF1, SCHEMA_1 = fake_data_creation.create_fake_dataset()
12 | DF1.to_sql(TABLE_NAME_1, sql_connection, if_exists='replace', index=False, dtype=SCHEMA_1)
13 |
14 | TABLE_NAME_2 = 'random_data_2'
15 | DF2, SCHEMA_2 = fake_data_creation.create_fake_dataset()
16 | DF2.columns = DF2.columns.map(lambda c: c + '_2')
17 | DF2.to_sql(TABLE_NAME_2, sql_connection, if_exists='replace', index=False, dtype=SCHEMA_2)
18 |
19 | def pytest_configure():
20 | pytest.df1 = None
21 | pytest.df2 = None
22 | pytest.sql_connection = sql_connection
23 |
24 | @pytest.fixture(scope="function", autouse=True)
25 | def run_around_tests():
26 | # print('\nhere\n')
27 | pytest.df1 = wrap_df(copy(DF1), TABLE_NAME_1)
28 | pytest.df2 = wrap_df(copy(DF2), TABLE_NAME_2)
29 | yield
30 | # run after function
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 AmirPupko
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_assignment.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | import pytest
3 | from pandas_to_sql.testing.utils.asserters import assert_
4 |
5 |
6 | def test_assign():
7 | df = pytest.df1
8 | df['new_value'] = df.random_float + 10
9 | assert_(df)
10 |
11 |
12 | def test_assign2():
13 | df = pytest.df1
14 | df['new_value'] = df.random_bool
15 | assert_(df)
16 |
17 | def test_assign3():
18 | df = pytest.df1
19 | df['new_value'] = df.random_bool
20 | df2 = df[['new_value','random_float']]
21 | assert_(df2)
22 |
23 | def test_assignment_int():
24 | df = pytest.df1
25 | df['new_value2'] = 4
26 | assert_(df)
27 |
28 | def test_assignment_float():
29 | df = pytest.df1
30 | df['new_value2'] = 23.132
31 | assert_(df)
32 |
33 | def test_assignment_bool():
34 | df = pytest.df1
35 | df['new_value2'] = True
36 | assert_(df)
37 |
38 | def test_assignment_str():
39 | df = pytest.df1
40 | df['new_value2'] = 'some_str'
41 | assert_(df)
42 |
43 | def test_assignment_datetime():
44 | df = pytest.df1
45 | df['new_value'] = datetime(1970, 1, 1)
46 | assert_(df)
47 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/utils/asserters.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from pandas.testing import assert_frame_equal
3 | import pytest
4 |
5 |
6 | def assert_dataframes_equals(expected, actual):
7 | assert expected.shape==actual.shape
8 | assert set(expected.columns) == set(actual.columns)
9 | columns_order = list(expected.columns)
10 | a = actual[columns_order].sort_values(by=list(actual.columns)).reset_index(drop=True)
11 | e = expected[columns_order].sort_values(by=list(actual.columns)).reset_index(drop=True)
12 | assert_frame_equal(e, a, check_dtype=False)
13 |
14 |
15 | def get_expected_and_actual(df):
16 | actual_query_string = df.df_sql_convert_table.get_sql_string()
17 | actual_columns = df.df_sql_convert_table.columns
18 | datetime_columns = [c for c in actual_columns.keys() if actual_columns[c].dtype == 'DATETIME']
19 |
20 | df_actual = pd.read_sql_query(actual_query_string, pytest.sql_connection, parse_dates=datetime_columns)
21 | df_expected = df.df_pandas
22 |
23 | return df_expected, df_actual
24 |
25 | def assert_(df):
26 | df_expected, df_actual = get_expected_and_actual(df)
27 |
28 | # i = df_expected.new_value != df_actual.new_value
29 | # a=df_expected[i][:3]
30 | # b=df_expected[i][:3]
31 |
32 | assert_dataframes_equals(df_expected, df_actual)
33 |
--------------------------------------------------------------------------------
/example_runner.py:
--------------------------------------------------------------------------------
1 | from copy import copy
2 | import sqlite3
3 | import pandas as pd
4 | import pandas_to_sql
5 | from pandas_to_sql.testing.utils.fake_data_creation import create_fake_dataset
6 | from pandas_to_sql.conventions import flatten_grouped_dataframe
7 |
8 | # table_name = 'random_data'
9 | # df, _ = create_fake_dataset()
10 | # df_ = pandas_to_sql.wrap_df(df, table_name)
11 | # df2 = df_.groupby('random_int').agg({'random_float':['mean','sum','count'], 'random_str':', '.join})
12 | # df2 = flatten_grouped_dataframe(df2)
13 | # print(df2.get_sql_string())
14 |
15 | iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')
16 | table_name = 'iris'
17 | sql_connection = sqlite3.connect('./iris.db') #create db
18 | iris.to_sql(table_name, sql_connection, if_exists='replace', index=False)
19 |
20 | df = pandas_to_sql.wrap_df(iris, table_name)
21 | pd_wrapped = pandas_to_sql.wrap_pd(pd)
22 |
23 | df_ = copy(df)
24 | df_['sepal_width_rounded'] = df_.sepal_width.round()
25 | df_1 = df_[df_.species=='setosa'].reset_index(drop=True)
26 | df_2 = df_[df_.species=='versicolor'].reset_index(drop=True)
27 |
28 | some_df = pd_wrapped.concat([df_1, df_2]).reset_index(drop=True)
29 |
30 | sql_string = some_df.get_sql_string()
31 |
32 | df_from_sql_database = pd.read_sql_query(sql_string, sql_connection)
33 | df_pandas = some_df.df_pandas
34 |
35 | from pandas_to_sql.testing.utils.asserters import assert_dataframes_equals
36 | assert_dataframes_equals(df_pandas, df_from_sql_database)
37 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_groupby.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pandas_to_sql.testing.utils.asserters import assert_
3 | from pandas_to_sql.conventions import flatten_grouped_dataframe
4 |
5 |
6 |
7 | def test_groupby_mean():
8 | df2 = pytest.df1.groupby('random_int').random_float.mean()
9 | assert_(flatten_grouped_dataframe(df2))
10 |
11 | def test_groupby_sum():
12 | df2 = pytest.df1.groupby('random_int').random_float.sum()
13 | assert_(flatten_grouped_dataframe(df2))
14 |
15 | def test_groupby_count():
16 | df2 = pytest.df1.groupby('random_int').random_float.count()
17 | assert_(flatten_grouped_dataframe(df2))
18 |
19 |
20 | def test_groupby_agg_1():
21 | df2 = pytest.df1 \
22 | .groupby('random_int') \
23 | .agg({'random_float':['mean','sum','count'], 'random_str':', '.join})
24 | assert_(flatten_grouped_dataframe(df2))
25 |
26 | def test_groupby_agg_2():
27 | df2 = pytest.df1 \
28 | .groupby('random_bool') \
29 | .agg({'random_int':['mean','sum','count'], 'random_str':[', '.join]})
30 | assert_(flatten_grouped_dataframe(df2))
31 |
32 |
33 | def test_groupby_multiple_on():
34 | df2 = pytest.df1 \
35 | .groupby(['random_bool', 'random_int']).random_float.mean()
36 | assert_(flatten_grouped_dataframe(df2))
37 |
38 | def test_groupby_multiple_on_agg():
39 | df2 = pytest.df1 \
40 | .groupby(['random_bool', 'random_int']) \
41 | .agg({'random_float': ['count','sum']})
42 | assert_(flatten_grouped_dataframe(df2))
--------------------------------------------------------------------------------
/pandas_to_sql/utils/pandas_interceptor.py:
--------------------------------------------------------------------------------
1 | from copy import copy
2 | import operator
3 | from pandas_to_sql.utils.pandas_dataframe_intercepter import PandasDataFrameIntercepter
4 | from pandas_to_sql.engine.table import create_table, Table
5 |
6 | class PandasIntercepter:
7 | def __init__(self, pandas):
8 | self.pandas = pandas
9 |
10 | def concat(self, objs, axis=0):
11 | objs_pandas = list(map(lambda x: x.df_pandas, objs))
12 | a = self.pandas.concat(objs_pandas, axis=axis)
13 | objs_sql_convert = list(map(lambda x: x.df_sql_convert_table, objs))
14 | b = concat(objs_sql_convert, axis=axis)
15 | return PandasDataFrameIntercepter(a,b)
16 |
17 |
18 | def concat(objs, axis=0):
19 | if axis != 0:
20 | raise Exception(f"supporting only axis==0")
21 | for df in objs:
22 | if not isinstance(df, Table):
23 | raise Exception(f'expected Table. got: {str(type(df))}')
24 |
25 | first = None
26 | for columns in list(map(lambda t: set(t.columns.keys()), objs)):
27 | if not first:
28 | first = columns
29 | else:
30 | if columns != first:
31 | raise Exception(f"expected all dataframes to have same columns")
32 |
33 | all_tables_sql_string = list(map(lambda x: x.get_sql_string(), objs))
34 | new_table_sql_string = ' UNION ALL '.join(all_tables_sql_string)
35 | return create_table(table_name='Temp',
36 | columns=copy(objs[0]).columns,
37 | from_sql_string=new_table_sql_string)
38 |
39 |
40 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_merge.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pandas_to_sql.testing.utils.asserters import assert_
3 | from pandas_to_sql.conventions import flatten_grouped_dataframe
4 | from copy import copy
5 |
6 |
7 | def test_merge_inner():
8 | df = pytest.df1
9 | df2 = copy(df)
10 | df2['random_int_plus_3'] = df2.random_int + 3
11 | df2 = df2[df2.random_int < 3]
12 | df2 = df2[['random_int_plus_3','random_str']]
13 | df3 = df.merge(df2, on='random_str', how='inner')
14 | assert_(df3)
15 |
16 |
17 | def test_merge_left():
18 | df = pytest.df1
19 | df2 = copy(df)
20 | df2['random_int_plus_3'] = df2.random_int + 3
21 | df2 = df2[df2.random_int < 3]
22 | df2 = df2[['random_int_plus_3','random_str']]
23 | df3 = df.merge(df2, on='random_str', how='left')
24 | assert_(df3)
25 |
26 |
27 | def test_merge_left_on_right_on_how_inner():
28 | df = pytest.df1
29 | df2 = copy(df)
30 | df2['random_int_plus_3'] = df2.random_int + 3
31 | df2['random_str_2'] = df2.random_str
32 | df2 = df2[df2.random_int < 3]
33 | df2 = df2[['random_int_plus_3','random_str_2']]
34 | df3 = df.merge(df2, left_on='random_str', right_on='random_str_2', how='inner')
35 | assert_(df3)
36 |
37 |
38 | def test_merge_left_on_right_on_how_left():
39 | df = pytest.df1
40 | df2 = copy(df)
41 | df2['random_int_plus_3'] = df2.random_int + 3
42 | df2['random_str_2'] = df2.random_str
43 | df2 = df2[df2.random_int < 3]
44 | df2 = df2[['random_int_plus_3','random_str_2']]
45 | df3 = df.merge(df2, left_on='random_str', right_on='random_str_2', how='left')
46 | assert_(df3)
47 |
48 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/utils/fake_data_creation.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 |
5 | def random_datetimes_or_dates(start, end, out_format='datetime', n=10):
6 | '''
7 | unix timestamp is in ns by default.
8 | I divide the unix time value by 10**9 to make it seconds (or 24*60*60*10**9 to make it days).
9 | The corresponding unit variable is passed to the pd.to_datetime function.
10 | Values for the (divide_by, unit) pair to select is defined by the out_format parameter.
11 | for 1 -> out_format='datetime'
12 | for 2 -> out_format=anything else
13 | '''
14 | (divide_by, unit) = (
15 | 10**9, 's') if out_format == 'datetime' else (24*60*60*10**9, 'D')
16 |
17 | start_u = start.value//divide_by
18 | end_u = end.value//divide_by
19 |
20 | return pd.to_datetime(np.random.randint(start_u, end_u, n), unit=unit)
21 |
22 |
23 | def random_timedelta(start, end, n, unit='D', seed=None):
24 | if not seed: # from piR's answer
25 | np.random.seed(0)
26 |
27 | ndays = (end - start).days + 1
28 | return pd.to_timedelta(np.random.rand(n) * ndays, unit=unit)
29 |
30 |
31 | def create_fake_dataset(start=pd.to_datetime('2015-01-01'), end=pd.to_datetime('2018-01-01')):
32 | df = pd.DataFrame()
33 | df_size = 1000
34 | df_random_columns = {
35 | 'random_int': 'INT',
36 | 'random_float': 'FLOAT',
37 | 'random_bool': 'BOOL',
38 | 'random_datetime': 'DATETIME',
39 | 'random_str': 'VARCHAR',
40 | }
41 | df['random_int'] = np.random.randint(1, 6, df_size)
42 | df['random_float'] = np.random.randn(df_size)
43 | df['random_bool'] = np.random.randn(df_size) > 0
44 | df['random_datetime'] = random_datetimes_or_dates(start, end, n=df_size)
45 | df['random_str'] = pd.util.testing.rands_array(10, df_size)
46 | return df, df_random_columns
47 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_datetime.py:
--------------------------------------------------------------------------------
1 | from datetime import timedelta, datetime
2 | import pytest
3 | from pandas_to_sql.testing.utils.asserters import assert_, get_expected_and_actual
4 | from copy import copy
5 | import pandas as pd
6 | import pandas_to_sql
7 |
8 |
9 | def test_add_days():
10 | df = pytest.df1
11 | df['new_value'] = df.random_datetime + timedelta(days=20)
12 | assert_(df)
13 |
14 | def test_radd_days():
15 | df = pytest.df1
16 | df['new_value'] = timedelta(days=40) + df.random_datetime
17 | assert_(df)
18 |
19 | def test_sub_days():
20 | df = pytest.df1
21 | df['new_value'] = df.random_datetime - timedelta(days=40)
22 | assert_(df)
23 |
24 |
25 | def test_add_zero_time_dateoffset():
26 | df = pytest.df1
27 | df['new_value'] = df.random_datetime + pd.offsets.DateOffset(minutes=0, years=0)
28 | assert_(df)
29 |
30 |
31 | def test_dt_second():
32 | df = pytest.df1
33 | df['seconds'] = df.random_datetime.dt.second
34 | assert_(df)
35 |
36 | def test_dt_month():
37 | df = pytest.df1
38 | df['month'] = df.random_datetime.dt.month
39 | assert_(df)
40 |
41 | def test_dt_day():
42 | df = pytest.df1
43 | df['day'] = df.random_datetime.dt.day
44 | assert_(df)
45 |
46 | def test_dt_hour():
47 | df = pytest.df1
48 | df['hour'] = df.random_datetime.dt.hour
49 | assert_(df)
50 |
51 | def test_dt_year():
52 | df = pytest.df1
53 | df['y'] = df.random_datetime.dt.year
54 | assert_(df)
55 |
56 | def test_dt_dayofweek():
57 | df = pytest.df1
58 | df['dayofweek'] = df.random_datetime.dt.dayofweek
59 | assert_(df)
60 |
61 | def test_dt_week():
62 | df = pytest.df1
63 | df['week'] = df.random_datetime.dt.week
64 | df_expected, df_actual = get_expected_and_actual(df)
65 |
66 | week_diff = (df_expected.week - df_actual.week).value_counts()
67 |
68 | # asserting week error <= 2. 52,53 is modulo
69 | assert (df_expected.week - df_actual.week).isin([0,1,2,52,53]).all()
70 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # pandas-to-sql
3 | **This libaray is not production ready!!**
4 |
5 | ## Intro
6 | Convert [pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) manipulations to sql query string.
7 |
8 | 
9 |
10 | 
11 |
12 | Support:
13 | - [sqlite](https://sqlite.org/)
14 |
15 | ### Try it yourself
16 |
17 | ```python
18 | >>> import pandas as pd
19 | >>> import pandas_to_sql
20 | >>> iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')
21 | >>> df = pandas_to_sql.wrap_df(iris, table_name='iris')
22 | >>> df.get_sql_string()
23 | 'SELECT (sepal_length) AS sepal_length, (sepal_width) AS sepal_width, (petal_length) AS petal_length, (petal_width) AS petal_width, (species) AS species FROM iris'
24 | ```
25 |
26 | ```python
27 | >>> df[df.species == 'setosa'].get_sql_string()
28 | "SELECT (sepal_length) AS sepal_length, (sepal_width) AS sepal_width, (petal_length) AS petal_length, (petal_width) AS petal_width, (species) AS species FROM iris WHERE ((species = 'setosa')) "
29 | ```
30 |
31 | [Here are some more examples](https://github.com/AmirPupko/pandas-to-sql/blob/main/pandas_to_sql_colab_example.ipynb) [](https://colab.research.google.com/github/AmirPupko/pandas-to-sql/blob/main/pandas_to_sql_colab_example.ipynb)
32 |
33 |
34 | ## Installation
35 | `pip install pandas-to-sql`
36 |
37 |
38 | ## Development
39 |
40 | ### Run example
41 | `python example_runner.py`
42 |
43 | ### Tests
44 | `pytest ./pandas_to_sql`
45 |
46 | ### Environment
47 | `conda env create -f environment.yml --prefix ./env`
48 | `conda activate ./env`
49 | `conda env update --prefix ./env -f environment.yml`
50 | `conda remove --prefix ./env --all`
51 |
52 | ### New release
53 | `python setup.py sdist bdist_wheel`
54 | `python -m twine upload --repository pypi --skip-existing dist/*`
55 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_str.py:
--------------------------------------------------------------------------------
1 | from datetime import timedelta, datetime
2 | import pytest
3 | from pandas_to_sql.testing.utils.asserters import assert_, get_expected_and_actual
4 | from copy import copy
5 | import pandas as pd
6 | import pandas_to_sql
7 |
8 |
9 | def test_replace():
10 | df = pytest.df1
11 | df['new_value'] = df.random_str.str.replace('m','v').str.replace('z','_3')
12 | assert_(df)
13 |
14 | def test_lower():
15 | df = pytest.df1
16 | df['new_value'] = df.random_str.str.lower()
17 | assert_(df)
18 |
19 | def test_upper():
20 | df = pytest.df1
21 | df['new_value'] = df.random_str.str.upper()
22 | assert_(df)
23 |
24 | def test_slice1():
25 | df = pytest.df1
26 | df['new_value'] = df.random_str.str.slice(1,3)
27 | assert_(df)
28 |
29 | def test_slice2():
30 | df = pytest.df1
31 | df['new_value'] = df.random_str.str.slice(2)
32 | assert_(df)
33 |
34 | def test_slice3():
35 | df = pytest.df1
36 | df['new_value'] = df.random_str.str.slice(stop=4)
37 | assert_(df)
38 |
39 | def test_slice4():
40 | df = pytest.df1
41 | df['new_value'] = df.random_str.str.slice(-1,-3)
42 | assert_(df)
43 |
44 | def test_strip():
45 | df = pytest.df1
46 | df['new_value'] = df.random_str.str.strip('ABCKSLFjadkj')
47 | assert_(df)
48 |
49 | def test_strip_none_chars():
50 | df = pytest.df1
51 | df['new_value1'] = df.random_str + ' '
52 | df['new_value2'] = df.random_str.str.strip()
53 | assert_(df)
54 |
55 | def test_lstrip():
56 | df = pytest.df1
57 | df['new_value'] = df.random_str.str.lstrip('ABCKSLFjadkj')
58 | assert_(df)
59 |
60 |
61 | def test_rstrip():
62 | df = pytest.df1
63 | df['new_value'] = df.random_str.str.rstrip('ABCKSLFjadkj')
64 | assert_(df)
65 |
66 | def test_len():
67 | df = pytest.df1
68 | df['new_value'] = df.random_str.str.len()
69 | assert_(df)
70 |
71 | def test_contains():
72 | df = pytest.df1
73 | df['new_value1'] = df.random_str.str.contains('a')
74 | df['new_value2'] = df.random_str.str.contains('B')
75 | assert_(df)
76 |
77 | def test_contains_case_false():
78 | df = pytest.df1
79 | df['new_value1'] = df.random_str.str.contains('a', case=False)
80 | df['new_value2'] = df.random_str.str.contains('B', case=False)
81 | assert_(df)
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_operations_numeric.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pandas_to_sql.testing.utils.asserters import assert_
3 |
4 |
5 | def test_add():
6 | df = pytest.df1
7 | df['new_value'] = df.random_float + 10
8 | assert_(df)
9 |
10 | def test_radd():
11 | df = pytest.df1
12 | df['new_value'] = 10 + df.random_float
13 | assert_(df)
14 |
15 | def test_add_str():
16 | df = pytest.df1
17 | df['new_value'] = df.random_str + '_some_other_str'
18 | assert_(df)
19 |
20 | def test_add_str_to_str():
21 | df = pytest.df1
22 | df['new_value'] = df.random_str + '_' + df.random_str
23 | assert_(df)
24 |
25 |
26 | def test_sub():
27 | df = pytest.df1
28 | df['new_value'] = df.random_float - 10
29 | assert_(df)
30 |
31 | def test_rsub():
32 | df = pytest.df1
33 | df['new_value'] = 10 - df.random_float
34 | assert_(df)
35 |
36 |
37 | def test_mul():
38 | df = pytest.df1
39 | df['new_value'] = df.random_float * 2
40 | assert_(df)
41 |
42 | def test_rmul():
43 | df = pytest.df1
44 | df['new_value'] = 2.5 * df.random_int
45 | assert_(df)
46 |
47 | def test_truediv():
48 | df = pytest.df1
49 | df['new_value'] = df.random_int / 2.0
50 | assert_(df)
51 |
52 | def test_truediv2():
53 | df = pytest.df1
54 | df['new_value'] = df.random_float / 2
55 | assert_(df)
56 |
57 | def test_truediv_int_int():
58 | df = pytest.df1
59 | df['new_value'] = df.random_int / 2
60 | assert_(df)
61 |
62 | def test_rtruediv():
63 | df = pytest.df1
64 | df['new_value'] = 2 / df.random_float
65 | assert_(df)
66 |
67 | def test_floordiv():
68 | df = pytest.df1
69 | df['new_value'] = df.random_float // 2.0
70 | assert_(df)
71 |
72 | def test_rfloordiv():
73 | df = pytest.df1
74 | df['new_value'] = 1 // df.random_float
75 | assert_(df)
76 |
77 | def test_round():
78 | df = pytest.df1
79 | df['new_value'] = df.random_float.round()
80 | assert_(df)
81 |
82 | def test_round_with_half_values():
83 | df = pytest.df1
84 | df['a'] = 0.5
85 | df['b'] = 1.5
86 | df['c'] = 2.5
87 | df['d'] = 3.5
88 | df['e'] = -0.5
89 | df['f'] = -1.5
90 | df['g'] = -2.5
91 | df['h'] = -3.5
92 |
93 | for c in ['a','b','c','d','e','f','g','h']:
94 | df[c + '_new'] = df[c].round()
95 |
96 | assert_(df)
97 |
98 |
99 | def test_abs():
100 | df = pytest.df1
101 | df['new_value'] = df.random_float.abs()
102 | assert_(df)
103 |
104 |
--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_operations_compare.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import pytest
3 | from pandas_to_sql import wrap_df
4 | from pandas_to_sql.testing.utils.asserters import assert_
5 |
6 |
7 | def test_gt1():
8 | df = pytest.df1
9 | df['new_value'] = df.random_float > 10
10 | assert_(df)
11 |
12 | def test_gt2():
13 | df = pytest.df1
14 | df['new_value'] = df.random_int > 3
15 | assert_(df)
16 |
17 | def test_abs_float():
18 | df = pytest.df1
19 | df['new_value'] = abs(df.random_float)
20 | assert_(df)
21 |
22 | def test_abs_int():
23 | df = pytest.df1
24 | df['new_value'] = abs(df.random_int)
25 | assert_(df)
26 |
27 | def test_ge():
28 | df = pytest.df1
29 | df['new_value'] = df.random_int >= 3
30 | assert_(df)
31 |
32 | def test_ge():
33 | df = pytest.df1
34 | df['new_value'] = df.random_float >= 0
35 | assert_(df)
36 |
37 | def test_ge2():
38 | df = pytest.df1
39 | df['new_value'] = df.random_int >= 3
40 | assert_(df)
41 |
42 | def test_lt():
43 | df = pytest.df1
44 | df['new_value'] = df.random_int < 3
45 | assert_(df)
46 |
47 | def test_le():
48 | df = pytest.df1
49 | df['new_value'] = df.random_int <= 3
50 | assert_(df)
51 |
52 | def test_eq():
53 | df = pytest.df1
54 | df['new_value'] = df.random_int == 3
55 | assert_(df)
56 |
57 | def test_ne():
58 | df = pytest.df1
59 | df['new_value'] = df.random_int != 3
60 | assert_(df)
61 |
62 | def test_tilde():
63 | df = pytest.df1
64 | df['new_value'] = ~df.random_bool
65 | assert_(df)
66 |
67 | def test_neg_bool():
68 | df = pytest.df1
69 | df['new_value'] = -df.random_bool
70 | assert_(df)
71 |
72 | def test_neg_numeric():
73 | df = pytest.df1
74 | df['new_value'] = -df.random_int
75 | assert_(df)
76 |
77 |
78 | def test_two_conds_and():
79 | df = pytest.df1
80 | df['new_value'] = (df.random_float > 1) & (df.random_float <=2)
81 | assert_(df)
82 |
83 | def test_two_conds_or():
84 | df = pytest.df1
85 | df['new_value'] = (df.random_float > 1) or True
86 | assert_(df)
87 |
88 | def test_fillna():
89 | df = pd.DataFrame({'col':[1,None,.3,-20,None]})
90 | table_name = 'some_fillna_table_name'
91 | df.to_sql(table_name, pytest.sql_connection, if_exists='replace', index=False)
92 | df_ = wrap_df(df, table_name)
93 |
94 | df_['new_value'] = df_.col.fillna(2)
95 |
96 | assert_(df_)
97 |
98 | def test_fillna2():
99 | df = pd.DataFrame({'col':[1,None,.3,-20,None]})
100 | table_name = 'some_fillna_table_name'
101 | df.to_sql(table_name, pytest.sql_connection, if_exists='replace', index=False)
102 | df_ = wrap_df(df, table_name)
103 |
104 | df_['new_value'] = df_.col.fillna('f')
105 |
106 | assert_(df_)
107 |
108 | def test_astype():
109 | df = pytest.df1
110 | df['new_value'] = df.random_float.astype(int)
111 | assert_(df)
112 |
113 |
--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/str_column.py:
--------------------------------------------------------------------------------
1 | from pandas_to_sql.engine.columns.column import Column
2 | from pandas_to_sql.engine.columns.numeric_columns import IntColumn
3 | from pandas_to_sql.engine.columns.common import add_common_operators_to_class, value_to_sql_string, create_column_from_operation
4 |
5 |
6 | class StrColumn(Column):
7 | def __init__(self, sql_string):
8 | super().__init__(dtype='VARCHAR', sql_string=sql_string)
9 |
10 | def __getattribute__(self, attr):
11 | if attr == 'str':
12 | return self
13 | return object.__getattribute__(self, attr)
14 |
15 | def __add__(self, r):
16 | return create_column_from_operation(self, r, StrColumn, '||')
17 |
18 | def __radd__(self, l):
19 | return create_column_from_operation(self, StrColumn, l, '||')
20 |
21 |
22 | add_common_operators_to_class(StrColumn)
23 |
24 | StrColumn.lower = lambda self: StrColumn(sql_string=f'(LOWER({value_to_sql_string(self)}))')
25 | StrColumn.upper = lambda self: StrColumn(sql_string=f'(UPPER({value_to_sql_string(self)}))')
26 |
27 | StrColumn.replace = lambda self, old, new: \
28 | StrColumn(sql_string=f'(REPLACE({value_to_sql_string(self)}, {value_to_sql_string(old)}, {value_to_sql_string(new)}))')
29 |
30 |
31 | def slice_(self, start=None, stop=None,j=None):
32 | if j: raise 'slice "step" not supported'
33 |
34 | start = start if start else 0
35 | start+=1
36 |
37 | if stop:
38 | stop +=1
39 | length = stop - start
40 | s = f'(SUBSTR({value_to_sql_string(self)}, {start}, {length}))'
41 | else:
42 | s = f'(SUBSTR({value_to_sql_string(self)}, {start}))'
43 |
44 | return StrColumn(sql_string=s)
45 |
46 |
47 | StrColumn.slice = slice_
48 |
49 |
50 |
51 |
52 | def strip_(self, op, chars=None):
53 | if not chars:
54 | chars = ' '
55 | if not isinstance(chars, str):
56 | raise f'"chars" must be str. got {str(type(chars))}'
57 |
58 | s = f"({op}({value_to_sql_string(self)}, {value_to_sql_string(chars)}))"
59 | return StrColumn(sql_string=s)
60 |
61 |
62 | StrColumn.strip = lambda self, chars=None: strip_(self, 'TRIM', chars)
63 | StrColumn.lstrip = lambda self, chars=None: strip_(self, 'LTRIM', chars)
64 | StrColumn.rstrip = lambda self, chars=None: strip_(self, 'RTRIM', chars)
65 |
66 | StrColumn.len = lambda self: IntColumn(sql_string=f'(LENGTH({value_to_sql_string(self)}))')
67 |
68 |
69 |
70 | def contains(self, s, case=True):
71 | if not isinstance(s, str):
72 | raise f'"s" must be str. got {str(type(s))}'
73 |
74 | if case==False:
75 | sql_string = f"(INSTR(LOWER({value_to_sql_string(self)}), LOWER({value_to_sql_string(s)})))"
76 | else:
77 | sql_string = f"(INSTR({value_to_sql_string(self)}, {value_to_sql_string(s)}))"
78 |
79 | # sql_string = f"(INSTR({value_to_sql_string(self)}, {value_to_sql_string(s)}))"
80 | sql_string = f"(CAST({sql_string} > 0 AS BOOL))"
81 | return StrColumn(sql_string=sql_string)
82 |
83 |
84 | StrColumn.contains = contains
85 |
--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/datetime_column.py:
--------------------------------------------------------------------------------
1 | from datetime import timedelta
2 | from dateutil.relativedelta import relativedelta
3 | import pandas as pd
4 | from pandas_to_sql.engine.columns.column import Column
5 | from pandas_to_sql.engine.columns.common import add_common_operators_to_class, value_to_sql_string
6 | from pandas_to_sql.engine.columns.numeric_columns import IntColumn, FloatColumn
7 |
8 | time_unit_to_format = {
9 | 'second': '%S',
10 | 'month': '%m',
11 | 'minute': '%M',
12 | 'hour': '%H',
13 | 'week': '%W',
14 | 'year': '%Y',
15 | 'day': '%d',
16 | 'dayofweek': '%w'}
17 |
18 |
19 | class DatetimeColumn(Column):
20 |
21 | def __init__(self, sql_string):
22 | super().__init__(dtype='DATETIME', sql_string=sql_string)
23 |
24 | def __getattribute__(self, attr):
25 | if attr == 'dt':
26 | return self
27 | if attr == 'dayofweek':
28 | return self.extract_dayofweek()
29 | if attr in time_unit_to_format.keys():
30 | return self.extract_time_unit(time_unit_to_format[attr])
31 |
32 | return object.__getattribute__(self, attr)
33 |
34 |
35 | def get_sql_for_format(self, format):
36 | return f"(CAST(strftime('{format}', {value_to_sql_string(self)}) AS INT))"
37 |
38 | def extract_time_unit(self, format):
39 | sql_string = self.get_sql_for_format(format)
40 | return IntColumn(sql_string=sql_string)
41 |
42 | def extract_dayofweek(self):
43 | sql_string = self.get_sql_for_format(time_unit_to_format['dayofweek'])
44 | sql_string = f"( ({sql_string} + 6) % 7)"
45 | return IntColumn(sql_string=sql_string)
46 |
47 |
48 | def __my_add__(col, v):
49 | if isinstance(v, timedelta):
50 | # https://docs.python.org/3/library/datetime.html#datetime.timedelta
51 | sign = '+' if v.days >= 0 else '-'
52 | added_days = f"'{sign}{abs(v.days)} days'"
53 |
54 | sign = '+' if v.seconds >= 0 else '-'
55 | added_seconds = f"'{sign}{abs(v.seconds)} seconds'"
56 |
57 | sql_string = f"(datetime({value_to_sql_string(col)}, {added_days}, {added_seconds}))"
58 | return DatetimeColumn(sql_string=sql_string)
59 | elif isinstance(v, relativedelta):
60 | s = []
61 | for t_type, t_value in v.kwds.items():
62 | sign = '+' if t_value >= 0 else '-'
63 | s.append(f"'{sign}{abs(t_value)} {t_type}'")
64 | sql_string = f"(datetime({value_to_sql_string(col)}, {', '.join(s)}))"
65 | return DatetimeColumn(sql_string=sql_string)
66 | elif isinstance(v, pd.offsets.DateOffset):
67 | s = []
68 | for t_type, t_value in v.kwds.items():
69 | sign = '+' if t_value >= 0 else '-'
70 | s.append(f"'{sign}{abs(t_value)} {t_type}'")
71 | sql_string = f"(datetime({value_to_sql_string(col)}, {', '.join(s)}))"
72 | return DatetimeColumn(sql_string=sql_string)
73 | else:
74 | raise Exception(f'Supporting only timedelta, got {str(type(v))}')
75 |
76 |
77 | add_common_operators_to_class(DatetimeColumn)
78 |
79 | DatetimeColumn.__add__ = __my_add__
80 | DatetimeColumn.__radd__ = lambda self, l: __my_add__(self, l)
81 | DatetimeColumn.__sub__ = lambda self, r: __my_add__(self, -r)
82 |
83 |
--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/common.py:
--------------------------------------------------------------------------------
1 |
2 | import numbers
3 | import operator
4 | from datetime import datetime
5 | from pandas_to_sql.engine.columns.column import Column
6 | from pandas_to_sql.utils.helpers import convert_df_type
7 |
8 | def get_column_class_from_type(col_type):
9 | from pandas_to_sql.engine.columns.bool_column import BoolColumn
10 | from pandas_to_sql.engine.columns.numeric_columns import IntColumn, FloatColumn
11 | from pandas_to_sql.engine.columns.str_column import StrColumn
12 | from pandas_to_sql.engine.columns.datetime_column import DatetimeColumn
13 | if col_type == 'BOOL': return BoolColumn
14 | elif col_type == 'INT': return IntColumn
15 | elif col_type == 'FLOAT': return FloatColumn
16 | elif col_type == 'VARCHAR': return StrColumn
17 | elif col_type == 'DATETIME': return DatetimeColumn
18 | else: raise Exception(f"could not convert column type. got: {str(col_type)}")
19 |
20 |
21 | def value_to_sql_string(value):
22 | if isinstance(value, numbers.Number):
23 | return str(value)
24 | elif isinstance(value, str):
25 | return "'" + value + "'"
26 | elif isinstance(value, datetime):
27 | return f"datetime('{value.strftime('%Y-%m-%d %H:%M:%S')}')"
28 | elif isinstance(value, Column):
29 | return value.sql_string
30 | raise Exception(f"Value not supported. supporting: premitives and {str(Column)}. got {str(type(value))}")
31 |
32 |
33 | def create_column_from_value(v):
34 | from pandas_to_sql.engine.columns.bool_column import BoolColumn
35 | from pandas_to_sql.engine.columns.str_column import StrColumn
36 | from pandas_to_sql.engine.columns.datetime_column import DatetimeColumn
37 | from pandas_to_sql.engine.columns.numeric_columns import IntColumn, FloatColumn
38 | sql_string = value_to_sql_string(v)
39 | if isinstance(v, int): return IntColumn(sql_string)
40 | if isinstance(v, float): return FloatColumn(sql_string)
41 | if isinstance(v, str): return StrColumn(sql_string)
42 | if isinstance(v, bool): return BoolColumn(sql_string)
43 | if isinstance(v, datetime): return DatetimeColumn(sql_string)
44 |
45 | raise Exception(f'trying to set table column with unsupported type. expected types are Column or primitives. got type: {str(type(newvalue))}' )
46 |
47 | def create_column_from_operation(l, r, dtype, op):
48 | return dtype(sql_string=f'({value_to_sql_string(l)} {op} {value_to_sql_string(r)})')
49 |
50 |
51 | def add_common_operators_to_class(class_type):
52 | from pandas_to_sql.engine.columns.bool_column import BoolColumn
53 |
54 | def __lt__(self,other):
55 | return create_column_from_operation(self, other, BoolColumn, '<')
56 |
57 | def __le__(self,other):
58 | return create_column_from_operation(self, other, BoolColumn, '<=')
59 |
60 | def __gt__(self,other):
61 | return create_column_from_operation(self, other, BoolColumn, '>')
62 |
63 | def __ge__(self,other):
64 | return create_column_from_operation(self, other, BoolColumn, '>=')
65 |
66 | def __eq__(self,other):
67 | return create_column_from_operation(self, other, BoolColumn, '=')
68 |
69 | def __ne__(self,other):
70 | return create_column_from_operation(self, other, BoolColumn, '<>')
71 |
72 | def __and__(self,other):
73 | return create_column_from_operation(self, other, BoolColumn, 'AND')
74 |
75 | def __or__(self,other):
76 | return create_column_from_operation(self, other, BoolColumn, 'OR')
77 |
78 | def fillna(self, v):
79 | dtype = type(self)
80 | return dtype(sql_string=f'(IFNULL({value_to_sql_string(self)}, {value_to_sql_string(v)}))')
81 |
82 | def astype(self, t):
83 | tt = convert_df_type(t)
84 | dtype = get_column_class_from_type(tt)
85 | return dtype(sql_string=f'(CAST({value_to_sql_string(self)} AS {tt}))')
86 |
87 |
88 | class_type.__lt__ = __lt__
89 | class_type.__gt__ = __gt__
90 | class_type.__le__ = __le__
91 | class_type.__ge__ = __ge__
92 | class_type.__eq__ = __eq__
93 | class_type.__ne__ = __ne__
94 | class_type.__and__ = __and__
95 | class_type.__or__ = __or__
96 | class_type.fillna = fillna
97 | class_type.astype = astype
98 |
99 |
100 |
101 |
--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/numeric_columns.py:
--------------------------------------------------------------------------------
1 | from pandas_to_sql.engine.columns.column import Column
2 | from pandas_to_sql.engine.columns.common import add_common_operators_to_class, value_to_sql_string, create_column_from_operation
3 |
4 |
5 | class FloatColumn(Column):
6 | def __init__(self, sql_string):
7 | super().__init__(dtype='FLOAT', sql_string=sql_string)
8 |
9 |
10 | class IntColumn(Column):
11 | def __init__(self, sql_string):
12 | super().__init__(dtype='INT', sql_string=sql_string)
13 |
14 |
15 | def __floordiv__(self, r):
16 | # http://sqlite.1065341.n5.nabble.com/floor-help-td46158.html
17 | return FloatColumn(sql_string=f'( ROUND(({value_to_sql_string(self)} / {value_to_sql_string(r)}) - 0.5) )')
18 |
19 | def __rfloordiv__(self, l):
20 | # http://sqlite.1065341.n5.nabble.com/floor-help-td46158.html
21 | return FloatColumn(sql_string=f'( ROUND(({value_to_sql_string(l)} / {value_to_sql_string(self)}) - 0.5) )')
22 |
23 | def is_int(v):
24 | return isinstance(v, int) or isinstance(v, IntColumn)
25 |
26 | def numeric_op_result_from_types(l, r):
27 | x = IntColumn if is_int(l) and is_int(r) else FloatColumn
28 | return x
29 |
30 | def __add__(self, r):
31 | res_column_type = numeric_op_result_from_types(self, r)
32 | return create_column_from_operation(self, r, res_column_type, '+')
33 |
34 | def __radd__(self, l):
35 | res_column_type = numeric_op_result_from_types(l, self)
36 | return create_column_from_operation(l, self, res_column_type, '+')
37 |
38 | def __sub__(self, r):
39 | res_column_type = numeric_op_result_from_types(self, r)
40 | return create_column_from_operation(self, r, res_column_type, '-')
41 |
42 | def __rsub__(self, l):
43 | res_column_type = numeric_op_result_from_types(l, self)
44 | return create_column_from_operation(l, self, res_column_type, '-')
45 |
46 | def __mul__(self, r):
47 | res_column_type = numeric_op_result_from_types(self, r)
48 | return create_column_from_operation(self, r, res_column_type, '*')
49 |
50 | def __rmul__(self, l):
51 | res_column_type = numeric_op_result_from_types(l, self)
52 | return create_column_from_operation(l, self, res_column_type, '*')
53 |
54 | def __truediv__(self, r):
55 | return FloatColumn(sql_string=f'(({value_to_sql_string(self)} + 0.0) / {value_to_sql_string(r)})')
56 |
57 | def __rtruediv__(self, l):
58 | return FloatColumn(sql_string=f'(({value_to_sql_string(l)} + 0.0) / {value_to_sql_string(self)})')
59 |
60 | def __abs__(self):
61 | return type(self)(sql_string=f'ABS({value_to_sql_string(self)})')
62 |
63 | def __neg__(self):
64 | return type(self)(sql_string=f'(-({value_to_sql_string(self)}))')
65 |
66 |
67 | def round_(self):
68 | # https://docs.python.org/3/library/functions.html#round
69 | v = value_to_sql_string(self)
70 | integer_part = f'(CAST({v} AS INT))'
71 | fractional_part = f'(ABS({v}) - ROUND(ABS({v})-0.5))'
72 |
73 | is_integer_part_even = f'({integer_part}%2 == 0)'
74 | is_fractional_part_exactly_half = f'({fractional_part}==.5)'
75 |
76 | simple_round = f'(ROUND({v}))'
77 | round_with_change = f'(CASE WHEN {v}>0 THEN ROUND({v}-0.001) ELSE ROUND({v}+0.001) END)'
78 |
79 | s = f'(CASE WHEN {is_fractional_part_exactly_half} AND {is_integer_part_even} THEN {round_with_change} ELSE {simple_round} END)'
80 | return FloatColumn(sql_string=s)
81 |
82 | def abs_(self):
83 | return type(self)(sql_string=f'ABS({value_to_sql_string(self)})')
84 |
85 |
86 |
87 | add_common_operators_to_class(FloatColumn)
88 | FloatColumn.__add__ = __add__
89 | FloatColumn.__radd__ = __radd__
90 | FloatColumn.__sub__ = __sub__
91 | FloatColumn.__rsub__ = __rsub__
92 | FloatColumn.__mul__ = __mul__
93 | FloatColumn.__rmul__ = __rmul__
94 | FloatColumn.__floordiv__ = __floordiv__
95 | FloatColumn.__rfloordiv__ = __rfloordiv__
96 | FloatColumn.__truediv__ = __truediv__
97 | FloatColumn.__rtruediv__ = __rtruediv__
98 | FloatColumn.__abs__ = __abs__
99 | FloatColumn.__neg__ = __neg__
100 | FloatColumn.round = round_
101 | FloatColumn.abs = abs_
102 |
103 |
104 | add_common_operators_to_class(IntColumn)
105 | IntColumn.__add__ = __add__
106 | IntColumn.__radd__ = __radd__
107 | IntColumn.__sub__ = __sub__
108 | IntColumn.__rsub__ = __rsub__
109 | IntColumn.__mul__ = __mul__
110 | IntColumn.__rmul__ = __rmul__
111 | IntColumn.__floordiv__ = __floordiv__
112 | IntColumn.__rfloordiv__ = __rfloordiv__
113 | IntColumn.__truediv__ = __truediv__
114 | IntColumn.__rtruediv__ = __rtruediv__
115 | IntColumn.__abs__ = __abs__
116 | IntColumn.__neg__ = __neg__
117 | IntColumn.round = round_
118 | IntColumn.abs = abs_
119 |
120 |
--------------------------------------------------------------------------------
/pandas_to_sql/engine/grouped_table.py:
--------------------------------------------------------------------------------
1 | from copy import copy
2 | from pandas_to_sql.engine.columns.column import Column
3 | from pandas_to_sql.engine.columns.common import get_column_class_from_type
4 |
5 | class GroupedTable:
6 | table = None
7 | groupings = None
8 |
9 | def __init__(self, table, groupings):
10 | self.table = table
11 | self.groupings = groupings
12 |
13 | def __copy__(self):
14 | return GroupedTable(copy(self.table), copy(self.groupings))
15 |
16 | def __getitem__(self, key):
17 | if isinstance(key, Column):
18 | raise Exception('Cant filter/where GroupedTable')
19 | if isinstance(key, list):
20 | return GroupedTable(self.table[key], copy(self.groupings))
21 | if isinstance(key, str):
22 | return GroupedTable(self.table[[key]], copy(self.groupings))
23 | raise Exception(f'GroupedTable __getitem__ key type not supported. type: {str(type(key))}')
24 |
25 | def __setitem__(self, key, newvalue):
26 | raise Exception(f'GroupedTable __setitem__ not supported')
27 |
28 | def __getattr__(self, attribute_name):
29 | return self[attribute_name]
30 |
31 | def mean(self):
32 | return self.agg(dict(map(lambda k: (k,'mean'),self.table.columns.keys())))
33 |
34 | def count(self):
35 | return self.agg(dict(map(lambda k: (k,'count'),self.table.columns.keys())))
36 |
37 | def sum(self):
38 | return self.agg(dict(map(lambda k: (k,'sum'),self.table.columns.keys())))
39 |
40 | def agg(self, v):
41 | if isinstance(v, str):
42 | return self.agg(dict(zip(self.table.columns.keys(), v)))
43 | elif isinstance(v, list):
44 | return self.agg(dict(zip(self.table.columns.keys(), v)))
45 | elif isinstance(v, dict):
46 | if len( set(v.keys()) & set(self.groupings.keys()) ) > 0:
47 | raise Exception("grouped table doesnt support same column in 'on' and 'select'")
48 | self_table_copy = copy(self.table)
49 | # create groupby columns query
50 | groupby_select_columns = {}
51 | for column_name in v.keys():
52 | column = self_table_copy[column_name]
53 | operations = v[column_name] if isinstance(v[column_name], list) else [v[column_name]]
54 | for operation in operations:
55 | join_str_seperator = None
56 | operation_column_name_override = None
57 | dtype = None
58 |
59 | if callable(operation) and operation.__qualname__=='str.join':
60 | join_str_seperator = operation.__self__
61 | operation_column_name_override = 'join'
62 | operation = 'group_concat'
63 | # if not isinstance(operation, str):
64 | # raise Exception(f"groupby agg support only str name for operations or ','.join. got: {type(operation)}")
65 | # SUPPORTED_OPERATIONS = ['count','sum','mean','avg']
66 | # if operation not in SUPPORTED_OPERATIONS:
67 | # raise Exception(f"groupby operation '{operation}' is not supported. supported: {', '.join(SUPPORTED_OPERATIONS)}")
68 |
69 | operation = operation.lower()
70 |
71 | if operation=='mean':
72 | dtype = 'FLOAT'
73 | operation = 'avg'
74 | operation_column_name_override = 'mean'
75 | elif operation=='sum' and column.dtype=='VARCHAR':
76 | dtype = 'VARCHAR'
77 | operation = 'group_concat'
78 | join_str_seperator = ''
79 | operation_column_name_override = 'sum'
80 | elif operation=='count' or (operation=='sum' and column.dtype=='INT'):
81 | dtype = 'INT'
82 | else:
83 | dtype = 'FLOAT'
84 |
85 | new_column_name = f'{column_name}_{operation_column_name_override or operation}'
86 | new_sql_string = f'{operation}({column.sql_string})'
87 | if operation=='group_concat':
88 | new_sql_string = f"{operation}({column.sql_string},'{join_str_seperator}')"
89 | t = get_column_class_from_type(dtype)
90 | groupby_select_columns[new_column_name] = t(sql_string=new_sql_string)
91 | groupby_select_columns.update(self.groupings)
92 |
93 | self_table_copy.columns = groupby_select_columns
94 |
95 | # create new table columns
96 | new_table_columns = {}
97 | for k in groupby_select_columns.keys():
98 | t = get_column_class_from_type(groupby_select_columns[k].dtype)
99 | new_table_columns[k] = t(sql_string=k)
100 |
101 | grouping_field = ', '.join(list(map(lambda k: self.groupings[k].sql_string, self.groupings.keys())))
102 |
103 | from pandas_to_sql.engine.table import create_table
104 | return create_table(table_name='Temp',
105 | columns=new_table_columns,
106 | from_sql_string=f'{self_table_copy.get_sql_string()} GROUP BY {grouping_field}',
107 | had_changed=False)
108 |
109 |
--------------------------------------------------------------------------------
/pandas_to_sql/utils/pandas_dataframe_intercepter.py:
--------------------------------------------------------------------------------
1 | from copy import copy
2 | import operator
3 |
4 | class PandasDataFrameIntercepter:
5 | def __init__(self, df_pandas, df_sql_convert_table):
6 | self.df_pandas = df_pandas
7 | self.df_sql_convert_table = df_sql_convert_table
8 |
9 | def __repr__(self):
10 | return self.df_pandas.__repr__()
11 |
12 | def __format__(self, fmt):
13 | return self.df_pandas.__format__(fmt)
14 |
15 | def __str__(self):
16 | return self.df_pandas.__str__()
17 |
18 | @staticmethod
19 | def get_attr_for_df_pandas_if_needed(obj):
20 | if isinstance(obj, PandasDataFrameIntercepter):
21 | return object.__getattribute__(obj, 'df_pandas')
22 | else:
23 | return obj
24 |
25 | @staticmethod
26 | def get_attr_for_df_sql_convert_table_if_needed(obj):
27 | if isinstance(obj, PandasDataFrameIntercepter):
28 | return object.__getattribute__(obj, 'df_sql_convert_table')
29 | else:
30 | return obj
31 |
32 | def __getattribute__(self, name):
33 | if name in ['df_pandas', 'df_sql_convert_table']:
34 | return object.__getattribute__(self, name)
35 |
36 | df_sql_convert_table_attr = self.df_sql_convert_table.__getattribute__(name)
37 | if name=='get_sql_string' and hasattr(df_sql_convert_table_attr, '__call__'):
38 | return lambda *args, **kwargs: df_sql_convert_table_attr(*args, **kwargs)
39 |
40 | df_pandas_attr = self.df_pandas.__getattribute__(name)
41 | if name=='columns' and not hasattr(df_pandas_attr, '__call__'):
42 | return df_pandas_attr
43 |
44 | if hasattr(df_sql_convert_table_attr, '__call__'):
45 | def _(*args, **kwargs):
46 | def __dictionary_map_values(d, func):
47 | return {k: func(v) for k, v in d.items()}
48 |
49 | args_df_pandas = tuple(map(PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed, args))
50 | args_obj_new = tuple(map(PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed, args))
51 |
52 | kwargs_df_pandas = __dictionary_map_values(kwargs, PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed)
53 | kwargs_obj_new = __dictionary_map_values(kwargs, PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed)
54 |
55 | a = df_pandas_attr(*args_df_pandas, **kwargs_df_pandas)
56 | b = df_sql_convert_table_attr(*args_obj_new, **kwargs_obj_new)
57 | return PandasDataFrameIntercepter(a, b)
58 | return _
59 | else:
60 | return PandasDataFrameIntercepter(df_pandas_attr, df_sql_convert_table_attr)
61 |
62 | def __getitem__(self, key):
63 | a = self.df_pandas[PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed(key)]
64 | b = self.df_sql_convert_table[PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed(key)]
65 | return PandasDataFrameIntercepter(a, b)
66 |
67 | def __setitem__(self, key, newvalue):
68 | self.df_pandas[key] = PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed(newvalue)
69 | self.df_sql_convert_table[key] = PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed(newvalue)
70 | return PandasDataFrameIntercepter(self.df_pandas, self.df_sql_convert_table)
71 |
72 | def __getattr__(self, attribute_name):
73 | a = self.df_pandas[attribute_name]
74 | b = self.df_sql_convert_table[attribute_name]
75 | return PandasDataFrameIntercepter(a, b)
76 |
77 | def __copy__(self):
78 | return PandasDataFrameIntercepter(copy(self.df_pandas), copy(self.df_sql_convert_table))
79 |
80 | @staticmethod
81 | def run_operation_and_return(left, right, op):
82 | left_ = PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed(left)
83 | right_ = PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed(right)
84 | a = op(left_, right_)
85 |
86 | left_ = PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed(left)
87 | right_ = PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed(right)
88 | b = op(left_, right_)
89 | return PandasDataFrameIntercepter(a, b)
90 |
91 | @staticmethod
92 | def run_operation_single_and_return(obj, op):
93 | a = PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed(obj)
94 | b = PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed(obj)
95 | a = op(a)
96 | b = op(b)
97 | return PandasDataFrameIntercepter(a, b)
98 |
99 | # comparisons
100 | def __lt__(self,other):
101 | return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.lt)
102 |
103 | def __le__(self,other):
104 | return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.le)
105 |
106 | def __gt__(self,other):
107 | return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.gt)
108 |
109 | def __ge__(self,other):
110 | return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.ge)
111 |
112 | def __eq__(self,other):
113 | return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.eq)
114 |
115 | def __ne__(self,other):
116 | return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.ne)
117 |
118 | def __abs__(self):
119 | return PandasDataFrameIntercepter.run_operation_single_and_return(self, operator.abs)
120 |
121 | def __neg__(self):
122 | return PandasDataFrameIntercepter.run_operation_single_and_return(self, operator.neg)
123 |
124 | def __invert__(self):
125 | return PandasDataFrameIntercepter.run_operation_single_and_return(self, operator.invert)
126 |
127 | def __contains__(self, r):
128 | return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.contains)
129 |
130 | # numeric
131 | def __add__(self, r):
132 | return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.add)
133 |
134 | def __sub__(self, r):
135 | return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.sub)
136 |
137 | def __mul__(self, r):
138 | return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.mul)
139 |
140 | # def __matmul__(self, r):
141 | # return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.matmul)
142 |
143 | def __truediv__(self, r):
144 | return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.truediv)
145 |
146 | def __floordiv__(self, r):
147 | return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.floordiv)
148 |
149 | def __mod__(self, r):
150 | return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.mod)
151 |
152 | def __pow__(self, r):
153 | return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.pow)
154 |
155 | def __and__(self, r):
156 | return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.and_)
157 |
158 | def __or__(self, r):
159 | return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.or_)
160 |
161 | # numeric r
162 | def __radd__(self, l):
163 | return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.add)
164 |
165 | def __rsub__(self, l):
166 | return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.sub)
167 |
168 | def __rmul__(self, l):
169 | return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.mul)
170 |
171 | def __rmatmul__(self, l):
172 | return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.matmul)
173 |
174 | def __rtruediv__(self, l):
175 | return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.truediv)
176 |
177 | def __rfloordiv__(self, l):
178 | return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.floordiv)
179 |
180 | def __rmod__(self, l):
181 | return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.mod)
182 |
183 | def __rpow__(self, l):
184 | return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.pow)
185 |
186 | def __rand__(self, l):
187 | return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.and_)
188 |
189 | def __ror__(self, l):
190 | return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.or_)
191 |
192 |
193 |
194 |
195 |
--------------------------------------------------------------------------------
/pandas_to_sql/engine/table.py:
--------------------------------------------------------------------------------
1 | from copy import copy
2 | from pandas_to_sql.engine.columns.column import Column
3 | from pandas_to_sql.engine.grouped_table import GroupedTable
4 | from pandas_to_sql.engine.columns.common import get_column_class_from_type, create_column_from_value
5 |
6 |
7 | class Table:
8 | table_name = None
9 | columns = None
10 | filters = None
11 | from_sql_string = None
12 | had_changed = None
13 |
14 | def __init__(self, table_name, columns, from_sql_string, filters, had_changed):
15 | self.table_name = table_name
16 | self.columns = columns
17 | self.filters = filters
18 | self.from_sql_string = from_sql_string
19 | self.had_changed = had_changed
20 |
21 | def __getitem__(self, key):
22 | if isinstance(key, Column):
23 | if key.dtype != 'BOOL':
24 | raise Exception('Can only filter/where using column of type BOOL. got %s' % (key.dtype))
25 | return self.where(key)
26 | if isinstance(key, list):
27 | if all(map(lambda x: isinstance(x, str), key)) == False:
28 | raise Exception('List must be all strings. got %s' % (key))
29 | if all(map(lambda x: x in self.columns, key)) == False:
30 | raise Exception('All columns names must be a column in the table. got %s' % (key))
31 | return self.select(key)
32 |
33 | c = copy(self.columns[key])
34 | return c
35 |
36 | def __setitem__(self, key, newvalue):
37 | if isinstance(newvalue, Column) or issubclass(type(newvalue), Column):
38 | self.columns[key] = newvalue
39 | self.had_changed = True
40 | else:
41 | self.columns[key] = create_column_from_value(newvalue)
42 | self.had_changed = True
43 |
44 | def __getattr__(self, attribute_name):
45 | return self[attribute_name]
46 |
47 | def __copy__(self):
48 | columns_copy = {}
49 | for c in self.columns.keys():
50 | columns_copy[c] = self[c] # column deep copy will occur in __getitem__
51 |
52 | filters_copy = []
53 | for f in self.filters: filters_copy.append(copy(f))
54 |
55 | result_table = create_table(table_name=self.table_name,
56 | from_sql_string=self.from_sql_string,
57 | had_changed=self.had_changed,
58 | columns=columns_copy,
59 | filters=filters_copy)
60 | return result_table
61 |
62 | def reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill=''):
63 | return copy(self)
64 |
65 | def to_frame(self):
66 | return copy(self)
67 |
68 | def rename(self, columns):
69 | self.had_changed = True
70 | new_table = copy(self)
71 | new_columns = {}
72 | for col_name, col_value in new_table.columns.items():
73 | if col_name in columns.keys():
74 | new_columns[columns[col_name]] = col_value
75 | else:
76 | new_columns[col_name] = col_value
77 |
78 | new_table.columns = new_columns
79 | return new_table
80 |
81 | def drop(self, columns):
82 | self.had_changed = True
83 | new_table = copy(self)
84 | new_columns = { col_name: col_value
85 | for col_name, col_value in new_table.columns.items()
86 | if col_name not in columns }
87 | new_table.columns = new_columns
88 | return new_table
89 |
90 | def where(self, cond_column):
91 | self.had_changed = True
92 | new_table = copy(self)
93 | new_table.filters.append(cond_column)
94 | return new_table
95 |
96 | def select(self, columns_names):
97 | self.had_changed = True
98 | new_table = copy(self)
99 | # filter only selected columns from columns dictionary
100 | new_table.columns = \
101 | {col_name:col_val for (col_name, col_val) in new_table.columns.items() if col_name in columns_names}
102 | return new_table
103 |
104 | def merge(self, right, how='inner', on=None, left_on=None, right_on=None):
105 | if not isinstance(right, Table):
106 | raise Exception("merge expects right to be of type: %s, got: %s" % (str(type(Table)), str(type(right))))
107 | if how not in ['left', 'inner']:
108 | raise Exception("merge 'how' value must be in [‘left’, ‘inner’]")
109 |
110 | left = copy(self)
111 | right = copy(right)
112 | if len(set(left.columns.keys()) & set(right.columns.keys())) > 1:
113 | raise Exception("merge got duplicates columns in both tables (except 'on' value)")
114 |
115 | left_on_column = None
116 | right_on_column = None
117 | if on and not left_on and not right_on:
118 | left_on_column = on
119 | right_on_column = on
120 | elif left_on and right_on and not on:
121 | left_on_column = left_on
122 | right_on_column = right_on
123 | else:
124 | raise Exception("got unexpected on/left_on/right_on values.")
125 |
126 | if not isinstance(left_on_column, str) or \
127 | not isinstance(right_on_column, str):
128 | raise Exception("'on/left_on/right_on' must be str")
129 |
130 | if left_on_column not in left.columns or right_on_column not in right.columns:
131 | raise Exception("merge 'on/left_on/right_on' value must be in both tables as column")
132 |
133 | left_columns = dict(zip(left.columns.keys(), map(lambda x: left[x], left.columns.keys())))
134 | right_columns = dict(zip(right.columns.keys(), map(lambda x: right[x], right.columns.keys())))
135 |
136 | # creating new table columns
137 | if left_on_column == right_on_column:
138 | right_columns.pop(on)
139 | new_table_columns = {**left_columns, **right_columns}
140 |
141 | # creating new table sql string
142 | single_select_field_format = 't1.%s AS %s'
143 | selected_fields_left = ', '.join(list(map(lambda x: single_select_field_format % (x, x), left_columns.keys())))
144 |
145 | single_select_field_format = 't2.%s AS %s'
146 | selected_fields_right = ', '.join(list(map(lambda x: single_select_field_format % (x, x), right_columns.keys())))
147 |
148 | selected_fields = selected_fields_left
149 | if selected_fields_right:
150 | selected_fields += ', ' + selected_fields_right
151 |
152 | new_table_sql_string = f'SELECT {selected_fields} FROM ({left.get_sql_string()}) AS t1 {how.upper()} JOIN ({right.get_sql_string()}) AS t2 ON t1.{left_on_column}=t2.{right_on_column}'
153 |
154 | return create_table(table_name='Temp',
155 | columns=new_table_columns,
156 | from_sql_string=new_table_sql_string)
157 |
158 | def groupby(self, by):
159 | def __get_column_key(col):
160 | for k in self.columns.keys():
161 | if self.columns[k].sql_string==col.sql_string: return k
162 | raise Exception('groupby got column that is not in table')
163 |
164 | groupings = None
165 | if isinstance(by, str):
166 | groupings = {by:self[by]}
167 | elif isinstance(by, Column):
168 | groupings = {__get_column_key(by): copy(by)}
169 | elif isinstance(by, list):
170 | groupings = {}
171 | for b in by:
172 | if isinstance(b, str): groupings[b] = self[b]
173 | elif isinstance(b, Column): groupings[__get_column_key(by)] = copy(b)
174 | else: raise Exception(f'groupby got unexpected type. expect str or column, got: {str(type(b))}')
175 | else:
176 | raise Exception("groupby 'by' value must be str OR list[str] OR Column OR list[Column]")
177 |
178 | return GroupedTable(copy(self), groupings=groupings)
179 |
180 | def get_sql_string(self):
181 | if self.from_sql_string and not self.had_changed:
182 | return self.from_sql_string
183 |
184 | from_field = None
185 | selected_fields = None
186 | if self.from_sql_string:
187 | from_field = f'({self.from_sql_string}) AS {self.table_name}'
188 | else:
189 | from_field = self.table_name
190 |
191 | single_select_field_format = '(%s) AS %s'
192 | selected_fields = ', '.join(list(map(lambda x: single_select_field_format % (self[x].sql_string, x), self.columns.keys())))
193 |
194 | single_where_field_format = '(%s)'
195 | where_cond = ' AND '.join(list(map(lambda c: single_where_field_format % (c.sql_string), self.filters)))
196 |
197 | if where_cond:
198 | return f'SELECT {selected_fields} FROM {from_field} WHERE {where_cond} '
199 | else:
200 | return f'SELECT {selected_fields} FROM {from_field}'
201 |
202 |
203 |
204 |
205 | def create_table_from_schema(table_name, schema) -> Table:
206 | columns = {}
207 | for column_name in schema.keys():
208 | columns[column_name] = get_column_class_from_type(schema[column_name])(sql_string=column_name)
209 | return create_table(table_name=table_name, columns=columns)
210 |
211 | def create_table(table_name, columns={}, from_sql_string=None, filters=[], had_changed=False) -> Table:
212 | return Table(
213 | table_name=table_name,
214 | columns=columns,
215 | from_sql_string=from_sql_string,
216 | filters=filters,
217 | had_changed=had_changed)
--------------------------------------------------------------------------------
/pandas_to_sql_colab_example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "pandas_to_sql_colab_example.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "authorship_tag": "ABX9TyPTBsf7gZggRD828S1nx250",
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | }
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "view-in-github",
22 | "colab_type": "text"
23 | },
24 | "source": [
25 | ""
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "metadata": {
31 | "id": "5oTOIl8oHBhe",
32 | "colab": {
33 | "base_uri": "https://localhost:8080/"
34 | },
35 | "outputId": "1f6d0bea-3f84-43d3-d43a-ddb55596e920"
36 | },
37 | "source": [
38 | "!pip install pandas-to-sql -U"
39 | ],
40 | "execution_count": 1,
41 | "outputs": [
42 | {
43 | "output_type": "stream",
44 | "text": [
45 | "Requirement already up-to-date: pandas-to-sql in /usr/local/lib/python3.6/dist-packages (0.0.546)\n"
46 | ],
47 | "name": "stdout"
48 | }
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "metadata": {
54 | "id": "sGSsvHC8HaQ0"
55 | },
56 | "source": [
57 | "from copy import copy\r\n",
58 | "import sqlite3\r\n",
59 | "import pandas as pd\r\n",
60 | "import pandas_to_sql\r\n",
61 | "from pandas_to_sql import conventions"
62 | ],
63 | "execution_count": 2,
64 | "outputs": []
65 | },
66 | {
67 | "cell_type": "code",
68 | "metadata": {
69 | "id": "NexlwrknMQGS",
70 | "colab": {
71 | "base_uri": "https://localhost:8080/",
72 | "height": 110
73 | },
74 | "outputId": "3e8f6560-0d38-4ca2-b728-15c0b44dbe69"
75 | },
76 | "source": [
77 | "iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')\r\n",
78 | "table_name = 'iris'\r\n",
79 | "sql_connection = sqlite3.connect('./iris.db') #create db\r\n",
80 | "iris.to_sql(table_name, sql_connection, if_exists='replace', index=False)\r\n",
81 | "iris[:2]"
82 | ],
83 | "execution_count": 3,
84 | "outputs": [
85 | {
86 | "output_type": "execute_result",
87 | "data": {
88 | "text/html": [
89 | "
| \n", 107 | " | sepal_length | \n", 108 | "sepal_width | \n", 109 | "petal_length | \n", 110 | "petal_width | \n", 111 | "species | \n", 112 | "
|---|---|---|---|---|---|
| 0 | \n", 117 | "5.1 | \n", 118 | "3.5 | \n", 119 | "1.4 | \n", 120 | "0.2 | \n", 121 | "setosa | \n", 122 | "
| 1 | \n", 125 | "4.9 | \n", 126 | "3.0 | \n", 127 | "1.4 | \n", 128 | "0.2 | \n", 129 | "setosa | \n", 130 | "