├── pandas_to_sql
    ├── engine
    │   ├── __init__.py
    │   ├── columns
    │   │   ├── __init__.py
    │   │   ├── column.py
    │   │   ├── bool_column.py
    │   │   ├── str_column.py
    │   │   ├── datetime_column.py
    │   │   ├── common.py
    │   │   └── numeric_columns.py
    │   ├── grouped_table.py
    │   └── table.py
    ├── testing
    │   ├── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── asserters.py
    │   │   └── fake_data_creation.py
    │   ├── tests
    │   │   ├── test_operations_base.py
    │   │   ├── test_table_operations.py
    │   │   ├── test_select.py
    │   │   ├── test_pandas_dataframe_intercepter.py
    │   │   ├── test_concat.py
    │   │   ├── test_assignment.py
    │   │   ├── test_groupby.py
    │   │   ├── test_merge.py
    │   │   ├── test_datetime.py
    │   │   ├── test_str.py
    │   │   ├── test_operations_numeric.py
    │   │   └── test_operations_compare.py
    │   └── conftest.py
    ├── utils
    │   ├── __init__.py
    │   ├── helpers.py
    │   ├── pandas_interceptor.py
    │   └── pandas_dataframe_intercepter.py
    ├── conventions
    │   ├── __init__.py
    │   └── groupby_conventions.py
    └── __init__.py
├── .gitignore
├── environment.yml
├── .github
    └── workflows
    │   ├── tests.yml
    │   └── publish-to-pypi.yml
├── setup.py
├── LICENSE
├── example_runner.py
├── README.md
└── pandas_to_sql_colab_example.ipynb


/pandas_to_sql/engine/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pandas_to_sql/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pandas_to_sql/conventions/__init__.py:
--------------------------------------------------------------------------------
1 | from pandas_to_sql.conventions.groupby_conventions import flatten_grouped_dataframe


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__/*
2 | **/.pytest_cache/*
3 | .vscode/
4 | **/env/*
5 | local/*
6 | build/*
7 | dist/*
8 | **.egg-info
9 | **.db


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: pandas-to-sql-dev
 2 | channels:
 3 |   - defaults
 4 |   - conda-forge
 5 | dependencies:
 6 |   # required
 7 |   - setuptools
 8 |   - python=3.7
 9 |   - numpy>=1.19
10 |   - pandas>=1.1
11 |   # testing
12 |   - pytest>=5.0.1
13 |   - sqlalchemy
14 | 
15 | 


--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/column.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Column:
 3 |     dtype = None
 4 |     sql_string = None
 5 | 
 6 |     def __init__(self, dtype=None, sql_string=None):
 7 |         self.dtype = dtype
 8 |         self.sql_string = sql_string
 9 |     
10 |     def __copy__(self):
11 |         return type(self)(self.sql_string)
12 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_operations_base.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pandas_to_sql.testing.utils.asserters import assert_
 3 | from copy import copy
 4 | 
 5 | def test_copy():
 6 |     df = pytest.df1
 7 |     df2 = copy(df)
 8 |     df['new_value'] = df.random_float > 10  # some unrelated operation
 9 |     assert_(df2)
10 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_table_operations.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import pytest
 3 | from pandas_to_sql.testing.utils.asserters import assert_
 4 | 
 5 | 
 6 | def test_rename():
 7 |     df = pytest.df1
 8 |     df = df.rename(columns={'random_int': 'random_int_2',
 9 |                             'random_str': 'random_str_2'})
10 |     assert_(df)
11 | 
12 | def test_drop():
13 |     df = pytest.df1
14 |     df = df.drop(columns=['random_int', 'random_str'])
15 |     assert_(df)
16 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_select.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pandas_to_sql.testing.utils.asserters import assert_
 3 | 
 4 | 
 5 | def test_select_inline():
 6 |     assert_(pytest.df1[['random_int', 'random_float']])
 7 | 
 8 | 
 9 | def test_select_not_inline():
10 |     df = pytest.df1[['random_int', 'random_float']]
11 |     assert_(df)
12 | 
13 | 
14 | def test_select_multiple_times():
15 |     df = pytest.df1[['random_int', 'random_datetime','random_bool']]
16 |     df = df[['random_datetime']]
17 |     assert_(df)
18 | 


--------------------------------------------------------------------------------
/pandas_to_sql/__init__.py:
--------------------------------------------------------------------------------
 1 | from pandas_to_sql.engine.table import create_table_from_schema
 2 | from pandas_to_sql.utils.helpers import  create_schema_from_df
 3 | from pandas_to_sql.utils.pandas_dataframe_intercepter import PandasDataFrameIntercepter
 4 | from pandas_to_sql.utils.pandas_interceptor import PandasIntercepter
 5 | 
 6 | 
 7 | def wrap_df(df, table_name):
 8 |     t = create_table_from_schema(table_name=table_name, schema=create_schema_from_df(df))
 9 |     return PandasDataFrameIntercepter(df, t)
10 | 
11 | 
12 | def wrap_pd(pd):
13 |     return PandasIntercepter(pd)


--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/bool_column.py:
--------------------------------------------------------------------------------
 1 | from pandas_to_sql.engine.columns.column import Column
 2 | from pandas_to_sql.engine.columns.common import value_to_sql_string, add_common_operators_to_class
 3 | 
 4 | 
 5 | class BoolColumn(Column):
 6 |     def __init__(self, sql_string):
 7 |         super().__init__(dtype='BOOL', sql_string=sql_string)
 8 |     
 9 |     def __neg__(self):
10 |         return BoolColumn(sql_string=f'(NOT({value_to_sql_string(self)}))')
11 | 
12 |     def __invert__(self):
13 |         return BoolColumn(sql_string=f'(NOT({value_to_sql_string(self)}))')
14 | 
15 | 
16 | add_common_operators_to_class(BoolColumn)
17 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_pandas_dataframe_intercepter.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pandas_to_sql.testing.conftest import TABLE_NAME_1
 3 | 
 4 | def test_columns_attribute():
 5 |     expected = pytest.df1.df_pandas.columns
 6 |     actual = pytest.df1.columns
 7 |     assert type(expected) == type(actual)
 8 |     assert set(expected) == set(actual)
 9 | 
10 | 
11 | def test_get_sql_string_attribute():
12 |     expected = '''SELECT (random_int) AS random_int, (random_float) AS random_float, (random_bool) AS random_bool, (random_datetime) AS random_datetime, (random_str) AS random_str FROM random_data_1'''
13 |     assert expected == pytest.df1.get_sql_string()


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_concat.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pandas_to_sql.testing.utils.asserters import assert_
 3 | from pandas_to_sql.conventions import flatten_grouped_dataframe
 4 | from copy import copy
 5 | import pandas as pd
 6 | import pandas_to_sql 
 7 | 
 8 | def test_concat_simple():
 9 |     df = pytest.df1
10 | 
11 |     pd_wrapped = pandas_to_sql.wrap_pd(pd)
12 | 
13 |     df2 = pd_wrapped.concat([df, df, df])
14 | 
15 |     assert_(df2)
16 | 
17 | 
18 | def test_concat_simple_with_copy():
19 |     df = pytest.df1
20 | 
21 |     pd_wrapped = pandas_to_sql.wrap_pd(pd)
22 | 
23 |     df2 = pd_wrapped.concat([df, copy(df), copy(df)])
24 | 
25 |     assert_(df2)
26 | 
27 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build-linux:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       max-parallel: 5
10 | 
11 |     steps:
12 |     - uses: actions/checkout@v2
13 |     - name: Set up Python 3.8
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: 3.8
17 |     - name: Install dependencies
18 |       run: |
19 |         # $CONDA is an environment variable pointing to the root of the miniconda directory
20 |         $CONDA/bin/conda env update --file environment.yml --name base
21 |     - name: Test with pytest
22 |       run: |
23 |         conda install pytest
24 |         $CONDA/bin/pytest
25 | 


--------------------------------------------------------------------------------
/pandas_to_sql/utils/helpers.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | ## Types
 5 | def convert_df_type(col_type):
 6 |     if pd.api.types.is_bool_dtype(col_type): return 'BOOL'
 7 |     elif pd.api.types.is_integer_dtype(col_type): return 'INT'
 8 |     elif pd.api.types.is_numeric_dtype(col_type): return 'FLOAT'
 9 |     elif pd.api.types.is_string_dtype(col_type): return 'VARCHAR'
10 |     elif pd.api.types.is_datetime64_any_dtype(col_type): return 'DATETIME'
11 |     else: raise Exception(f"could not convert column type. got: {str(col_type)}")
12 | 
13 | 
14 | def create_schema_from_df(df):        
15 |     schema = {}
16 |     for col_name, col_type in df.dtypes.items():
17 |         schema[col_name] = convert_df_type(col_type)
18 |     return schema


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import setuptools
 3 | 
 4 | 
 5 | v = os.environ['RELEASE_VERSION']
 6 | print('Version: ', v)
 7 | 
 8 | with open("README.md", "r") as fh:
 9 |     long_description = fh.read()
10 | 
11 | setuptools.setup(
12 |     name="pandas-to-sql", # Replace with your own username
13 |     version=v,
14 |     author="Amir",
15 |     author_email="amirpupko@gmail.com",
16 |     description="Convert pandas dataframe manipulations to sql query string",
17 |     long_description=long_description,
18 |     long_description_content_type="text/markdown",
19 |     url="https://github.com/AmirPupko/pandas-to-sql",
20 |     packages=setuptools.find_packages(),
21 |     classifiers=[
22 |         "Programming Language :: Python :: 3",
23 |         "License :: OSI Approved :: MIT License",
24 |         "Operating System :: OS Independent",
25 |     ],
26 |     python_requires='>=3.6',
27 | )


--------------------------------------------------------------------------------
/pandas_to_sql/conventions/groupby_conventions.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pandas_to_sql.utils.pandas_dataframe_intercepter import PandasDataFrameIntercepter
 3 | from copy import copy
 4 | 
 5 | def flatten_grouped_dataframe(df):
 6 |     if not isinstance(df, PandasDataFrameIntercepter):
 7 |         raise Exception(f"can only get type {str(type(PandasDataFrameIntercepter))}")
 8 |     
 9 |     df_c = copy(df.df_pandas)
10 |     if isinstance(df_c, pd.core.series.Series):
11 |         series_name = df_c.name
12 |         new_col_name = list(filter(lambda k: k.startswith(series_name), df.df_sql_convert_table.columns.keys()))[0]
13 |         df_c = df_c.reset_index().rename(columns={series_name: new_col_name})
14 |     else:
15 |         df_c.columns = df_c.columns.map('_'.join)
16 |         df_c = df_c.reset_index()
17 |     return PandasDataFrameIntercepter(df_c, copy(df.df_sql_convert_table))
18 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-to-pypi.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Publish to PyPI
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v2
19 |       with:
20 |         python-version: '3.x'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install setuptools wheel twine
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 |         RELEASE_VERSION: ${{ github.event.release.tag_name }}
30 |       run: |
31 |         python setup.py sdist bdist_wheel
32 |         twine upload dist/*
33 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/conftest.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from copy import copy
 3 | import sqlite3
 4 | import pytest
 5 | from pandas_to_sql import wrap_df
 6 | from pandas_to_sql.testing.utils import fake_data_creation
 7 | 
 8 | sql_connection = sqlite3.connect('./example.db') #create db
 9 | 
10 | TABLE_NAME_1 = 'random_data_1'
11 | DF1, SCHEMA_1 = fake_data_creation.create_fake_dataset()
12 | DF1.to_sql(TABLE_NAME_1, sql_connection, if_exists='replace', index=False, dtype=SCHEMA_1)
13 | 
14 | TABLE_NAME_2 = 'random_data_2'
15 | DF2, SCHEMA_2 = fake_data_creation.create_fake_dataset()
16 | DF2.columns = DF2.columns.map(lambda c: c + '_2')
17 | DF2.to_sql(TABLE_NAME_2, sql_connection, if_exists='replace', index=False, dtype=SCHEMA_2)
18 | 
19 | def pytest_configure():
20 |     pytest.df1 = None
21 |     pytest.df2 = None
22 |     pytest.sql_connection = sql_connection
23 | 
24 | @pytest.fixture(scope="function", autouse=True)
25 | def run_around_tests():
26 |     # print('\nhere\n')
27 |     pytest.df1 = wrap_df(copy(DF1), TABLE_NAME_1)
28 |     pytest.df2 = wrap_df(copy(DF2), TABLE_NAME_2)
29 |     yield
30 |     # run after function
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 AmirPupko
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_assignment.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import pytest
 3 | from pandas_to_sql.testing.utils.asserters import assert_
 4 | 
 5 | 
 6 | def test_assign():
 7 |     df = pytest.df1
 8 |     df['new_value'] = df.random_float + 10
 9 |     assert_(df)
10 | 
11 | 
12 | def test_assign2():
13 |     df = pytest.df1
14 |     df['new_value'] = df.random_bool
15 |     assert_(df)
16 | 
17 | def test_assign3():
18 |     df = pytest.df1
19 |     df['new_value'] = df.random_bool
20 |     df2 = df[['new_value','random_float']]
21 |     assert_(df2)
22 | 
23 | def test_assignment_int():
24 |     df = pytest.df1
25 |     df['new_value2'] = 4
26 |     assert_(df)
27 | 
28 | def test_assignment_float():
29 |     df = pytest.df1
30 |     df['new_value2'] = 23.132
31 |     assert_(df)
32 | 
33 | def test_assignment_bool():
34 |     df = pytest.df1
35 |     df['new_value2'] = True
36 |     assert_(df)
37 | 
38 | def test_assignment_str():
39 |     df = pytest.df1
40 |     df['new_value2'] = 'some_str'
41 |     assert_(df)
42 | 
43 | def test_assignment_datetime():
44 |     df = pytest.df1
45 |     df['new_value'] = datetime(1970, 1, 1)
46 |     assert_(df)
47 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/utils/asserters.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pandas.testing import assert_frame_equal
 3 | import pytest
 4 | 
 5 | 
 6 | def assert_dataframes_equals(expected, actual):
 7 |     assert expected.shape==actual.shape
 8 |     assert set(expected.columns) == set(actual.columns)
 9 |     columns_order = list(expected.columns)
10 |     a = actual[columns_order].sort_values(by=list(actual.columns)).reset_index(drop=True)
11 |     e = expected[columns_order].sort_values(by=list(actual.columns)).reset_index(drop=True)
12 |     assert_frame_equal(e, a, check_dtype=False)
13 | 
14 | 
15 | def get_expected_and_actual(df):
16 |     actual_query_string = df.df_sql_convert_table.get_sql_string()
17 |     actual_columns = df.df_sql_convert_table.columns
18 |     datetime_columns = [c for c in actual_columns.keys() if actual_columns[c].dtype == 'DATETIME']
19 | 
20 |     df_actual = pd.read_sql_query(actual_query_string, pytest.sql_connection, parse_dates=datetime_columns)
21 |     df_expected = df.df_pandas
22 | 
23 |     return df_expected, df_actual
24 | 
25 | def assert_(df):
26 |     df_expected, df_actual = get_expected_and_actual(df)
27 | 
28 |     # i = df_expected.new_value != df_actual.new_value
29 |     # a=df_expected[i][:3]
30 |     # b=df_expected[i][:3]
31 | 
32 |     assert_dataframes_equals(df_expected, df_actual)
33 | 


--------------------------------------------------------------------------------
/example_runner.py:
--------------------------------------------------------------------------------
 1 | from copy import copy
 2 | import sqlite3
 3 | import pandas as pd
 4 | import pandas_to_sql
 5 | from pandas_to_sql.testing.utils.fake_data_creation import create_fake_dataset
 6 | from pandas_to_sql.conventions import flatten_grouped_dataframe
 7 | 
 8 | # table_name = 'random_data'
 9 | # df, _ = create_fake_dataset()
10 | # df_ = pandas_to_sql.wrap_df(df, table_name)
11 | # df2 = df_.groupby('random_int').agg({'random_float':['mean','sum','count'], 'random_str':', '.join})
12 | # df2 = flatten_grouped_dataframe(df2)
13 | # print(df2.get_sql_string())
14 | 
15 | iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')
16 | table_name = 'iris'
17 | sql_connection = sqlite3.connect('./iris.db') #create db
18 | iris.to_sql(table_name, sql_connection, if_exists='replace', index=False)
19 | 
20 | df = pandas_to_sql.wrap_df(iris, table_name)
21 | pd_wrapped = pandas_to_sql.wrap_pd(pd)
22 | 
23 | df_ = copy(df)
24 | df_['sepal_width_rounded'] = df_.sepal_width.round()
25 | df_1 = df_[df_.species=='setosa'].reset_index(drop=True)
26 | df_2 = df_[df_.species=='versicolor'].reset_index(drop=True)
27 | 
28 | some_df = pd_wrapped.concat([df_1, df_2]).reset_index(drop=True)
29 | 
30 | sql_string = some_df.get_sql_string()
31 | 
32 | df_from_sql_database = pd.read_sql_query(sql_string, sql_connection)
33 | df_pandas = some_df.df_pandas
34 | 
35 | from pandas_to_sql.testing.utils.asserters import assert_dataframes_equals
36 | assert_dataframes_equals(df_pandas, df_from_sql_database)
37 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_groupby.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pandas_to_sql.testing.utils.asserters import assert_
 3 | from pandas_to_sql.conventions import flatten_grouped_dataframe
 4 | 
 5 | 
 6 | 
 7 | def test_groupby_mean():
 8 |     df2 = pytest.df1.groupby('random_int').random_float.mean()
 9 |     assert_(flatten_grouped_dataframe(df2))
10 | 
11 | def test_groupby_sum():
12 |     df2 = pytest.df1.groupby('random_int').random_float.sum()
13 |     assert_(flatten_grouped_dataframe(df2))
14 | 
15 | def test_groupby_count():
16 |     df2 = pytest.df1.groupby('random_int').random_float.count()
17 |     assert_(flatten_grouped_dataframe(df2))
18 | 
19 | 
20 | def test_groupby_agg_1():
21 |     df2 = pytest.df1 \
22 |         .groupby('random_int') \
23 |         .agg({'random_float':['mean','sum','count'], 'random_str':', '.join})
24 |     assert_(flatten_grouped_dataframe(df2))
25 | 
26 | def test_groupby_agg_2():
27 |     df2 = pytest.df1 \
28 |         .groupby('random_bool') \
29 |         .agg({'random_int':['mean','sum','count'], 'random_str':[', '.join]})
30 |     assert_(flatten_grouped_dataframe(df2))
31 | 
32 | 
33 | def test_groupby_multiple_on():
34 |     df2 = pytest.df1 \
35 |         .groupby(['random_bool', 'random_int']).random_float.mean()
36 |     assert_(flatten_grouped_dataframe(df2))
37 | 
38 | def test_groupby_multiple_on_agg():
39 |     df2 = pytest.df1 \
40 |         .groupby(['random_bool', 'random_int']) \
41 |         .agg({'random_float': ['count','sum']})
42 |     assert_(flatten_grouped_dataframe(df2))


--------------------------------------------------------------------------------
/pandas_to_sql/utils/pandas_interceptor.py:
--------------------------------------------------------------------------------
 1 | from copy import copy
 2 | import operator
 3 | from pandas_to_sql.utils.pandas_dataframe_intercepter import PandasDataFrameIntercepter
 4 | from pandas_to_sql.engine.table import create_table, Table
 5 | 
 6 | class PandasIntercepter:
 7 |     def __init__(self, pandas):
 8 |         self.pandas = pandas
 9 |     
10 |     def concat(self, objs, axis=0):
11 |         objs_pandas = list(map(lambda x: x.df_pandas, objs))
12 |         a = self.pandas.concat(objs_pandas, axis=axis)
13 |         objs_sql_convert = list(map(lambda x: x.df_sql_convert_table, objs))
14 |         b = concat(objs_sql_convert, axis=axis)
15 |         return PandasDataFrameIntercepter(a,b)
16 | 
17 | 
18 | def concat(objs, axis=0):
19 |     if axis != 0:
20 |         raise Exception(f"supporting only axis==0")
21 |     for df in objs:
22 |         if not isinstance(df, Table):
23 |             raise Exception(f'expected Table. got: {str(type(df))}')
24 |     
25 |     first = None
26 |     for columns in list(map(lambda t: set(t.columns.keys()), objs)):
27 |         if not first:
28 |             first = columns
29 |         else:
30 |             if columns != first:
31 |                 raise Exception(f"expected all dataframes to have same columns")
32 |     
33 |     all_tables_sql_string = list(map(lambda x: x.get_sql_string(), objs))
34 |     new_table_sql_string = ' UNION ALL '.join(all_tables_sql_string)
35 |     return create_table(table_name='Temp',
36 |                         columns=copy(objs[0]).columns,
37 |                         from_sql_string=new_table_sql_string)
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_merge.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pandas_to_sql.testing.utils.asserters import assert_
 3 | from pandas_to_sql.conventions import flatten_grouped_dataframe
 4 | from copy import copy
 5 | 
 6 | 
 7 | def test_merge_inner():
 8 |     df = pytest.df1
 9 |     df2 = copy(df)
10 |     df2['random_int_plus_3'] = df2.random_int + 3
11 |     df2 = df2[df2.random_int < 3]
12 |     df2 = df2[['random_int_plus_3','random_str']]
13 |     df3 = df.merge(df2, on='random_str', how='inner')
14 |     assert_(df3)
15 | 
16 | 
17 | def test_merge_left():
18 |     df = pytest.df1
19 |     df2 = copy(df)
20 |     df2['random_int_plus_3'] = df2.random_int + 3
21 |     df2 = df2[df2.random_int < 3]
22 |     df2 = df2[['random_int_plus_3','random_str']]
23 |     df3 = df.merge(df2, on='random_str', how='left')
24 |     assert_(df3)
25 | 
26 | 
27 | def test_merge_left_on_right_on_how_inner():
28 |     df = pytest.df1
29 |     df2 = copy(df)
30 |     df2['random_int_plus_3'] = df2.random_int + 3
31 |     df2['random_str_2'] = df2.random_str
32 |     df2 = df2[df2.random_int < 3]
33 |     df2 = df2[['random_int_plus_3','random_str_2']]
34 |     df3 = df.merge(df2, left_on='random_str', right_on='random_str_2', how='inner')
35 |     assert_(df3)
36 | 
37 | 
38 | def test_merge_left_on_right_on_how_left():
39 |     df = pytest.df1
40 |     df2 = copy(df)
41 |     df2['random_int_plus_3'] = df2.random_int + 3
42 |     df2['random_str_2'] = df2.random_str
43 |     df2 = df2[df2.random_int < 3]
44 |     df2 = df2[['random_int_plus_3','random_str_2']]
45 |     df3 = df.merge(df2, left_on='random_str', right_on='random_str_2', how='left')
46 |     assert_(df3)
47 | 
48 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/utils/fake_data_creation.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | 
 5 | def random_datetimes_or_dates(start, end, out_format='datetime', n=10):
 6 |     '''   
 7 |     unix timestamp is in ns by default. 
 8 |     I divide the unix time value by 10**9 to make it seconds (or 24*60*60*10**9 to make it days).
 9 |     The corresponding unit variable is passed to the pd.to_datetime function. 
10 |     Values for the (divide_by, unit) pair to select is defined by the out_format parameter.
11 |     for 1 -> out_format='datetime'
12 |     for 2 -> out_format=anything else
13 |     '''
14 |     (divide_by, unit) = (
15 |         10**9, 's') if out_format == 'datetime' else (24*60*60*10**9, 'D')
16 | 
17 |     start_u = start.value//divide_by
18 |     end_u = end.value//divide_by
19 | 
20 |     return pd.to_datetime(np.random.randint(start_u, end_u, n), unit=unit)
21 | 
22 | 
23 | def random_timedelta(start, end, n, unit='D', seed=None):
24 |     if not seed:  # from piR's answer
25 |         np.random.seed(0)
26 | 
27 |     ndays = (end - start).days + 1
28 |     return pd.to_timedelta(np.random.rand(n) * ndays, unit=unit)
29 | 
30 | 
31 | def create_fake_dataset(start=pd.to_datetime('2015-01-01'), end=pd.to_datetime('2018-01-01')):
32 |     df = pd.DataFrame()
33 |     df_size = 1000
34 |     df_random_columns = {
35 |         'random_int': 'INT',
36 |         'random_float': 'FLOAT',
37 |         'random_bool': 'BOOL',
38 |         'random_datetime': 'DATETIME',
39 |         'random_str': 'VARCHAR',
40 |     }
41 |     df['random_int'] = np.random.randint(1, 6, df_size)
42 |     df['random_float'] = np.random.randn(df_size)
43 |     df['random_bool'] = np.random.randn(df_size) > 0
44 |     df['random_datetime'] = random_datetimes_or_dates(start, end, n=df_size)
45 |     df['random_str'] = pd.util.testing.rands_array(10, df_size)
46 |     return df, df_random_columns
47 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_datetime.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta, datetime
 2 | import pytest
 3 | from pandas_to_sql.testing.utils.asserters import assert_, get_expected_and_actual
 4 | from copy import copy
 5 | import pandas as pd
 6 | import pandas_to_sql 
 7 | 
 8 | 
 9 | def test_add_days():
10 |     df = pytest.df1
11 |     df['new_value'] = df.random_datetime + timedelta(days=20)
12 |     assert_(df)
13 | 
14 | def test_radd_days():
15 |     df = pytest.df1
16 |     df['new_value'] = timedelta(days=40) + df.random_datetime
17 |     assert_(df)
18 | 
19 | def test_sub_days():
20 |     df = pytest.df1
21 |     df['new_value'] = df.random_datetime - timedelta(days=40)
22 |     assert_(df)
23 | 
24 | 
25 | def test_add_zero_time_dateoffset():
26 |     df = pytest.df1
27 |     df['new_value'] = df.random_datetime + pd.offsets.DateOffset(minutes=0, years=0)
28 |     assert_(df)
29 | 
30 | 
31 | def test_dt_second():
32 |     df = pytest.df1
33 |     df['seconds'] = df.random_datetime.dt.second
34 |     assert_(df)
35 | 
36 | def test_dt_month():
37 |     df = pytest.df1
38 |     df['month'] = df.random_datetime.dt.month
39 |     assert_(df)
40 | 
41 | def test_dt_day():
42 |     df = pytest.df1
43 |     df['day'] = df.random_datetime.dt.day
44 |     assert_(df)
45 | 
46 | def test_dt_hour():
47 |     df = pytest.df1
48 |     df['hour'] = df.random_datetime.dt.hour
49 |     assert_(df)
50 | 
51 | def test_dt_year():
52 |     df = pytest.df1
53 |     df['y'] = df.random_datetime.dt.year
54 |     assert_(df)
55 | 
56 | def test_dt_dayofweek():
57 |     df = pytest.df1
58 |     df['dayofweek'] = df.random_datetime.dt.dayofweek
59 |     assert_(df)
60 | 
61 | def test_dt_week():
62 |     df = pytest.df1
63 |     df['week'] = df.random_datetime.dt.week
64 |     df_expected, df_actual = get_expected_and_actual(df)
65 | 
66 |     week_diff = (df_expected.week - df_actual.week).value_counts()
67 | 
68 |     # asserting week error <= 2. 52,53 is modulo
69 |     assert (df_expected.week - df_actual.week).isin([0,1,2,52,53]).all()
70 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # pandas-to-sql
 3 | **This libaray is not production ready!!**
 4 | 
 5 | ## Intro
 6 | Convert [pandas DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) manipulations to sql query string.
 7 | 
 8 | ![Tests](https://github.com/AmirPupko/pandas-to-sql/workflows/Tests/badge.svg)
 9 | 
10 | ![Publish to PyPI](https://github.com/AmirPupko/pandas-to-sql/workflows/Publish%20to%20PyPI/badge.svg)
11 | 
12 | Support:
13 |  - [sqlite](https://sqlite.org/)
14 | 
15 | ### Try it yourself
16 | 
17 | ```python
18 | >>> import pandas as pd
19 | >>> import pandas_to_sql
20 | >>> iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')
21 | >>> df = pandas_to_sql.wrap_df(iris,  table_name='iris')
22 | >>> df.get_sql_string()
23 | 'SELECT (sepal_length) AS sepal_length, (sepal_width) AS sepal_width, (petal_length) AS petal_length, (petal_width) AS petal_width, (species) AS species FROM iris'
24 | ```
25 | 
26 | ```python
27 | >>> df[df.species == 'setosa'].get_sql_string()
28 | "SELECT (sepal_length) AS sepal_length, (sepal_width) AS sepal_width, (petal_length) AS petal_length, (petal_width) AS petal_width, (species) AS species FROM iris WHERE ((species = 'setosa')) "
29 | ```
30 | 
31 | [Here are some more examples](https://github.com/AmirPupko/pandas-to-sql/blob/main/pandas_to_sql_colab_example.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AmirPupko/pandas-to-sql/blob/main/pandas_to_sql_colab_example.ipynb)
32 | 
33 | 
34 | ## Installation
35 | `pip install pandas-to-sql`  
36 | 
37 | 
38 | ## Development
39 | 
40 | ### Run example
41 | `python example_runner.py`  
42 | 
43 | ### Tests
44 | `pytest ./pandas_to_sql`  
45 | 
46 | ### Environment
47 | `conda env create -f environment.yml --prefix ./env`  
48 | `conda activate ./env`  
49 | `conda env update --prefix ./env -f environment.yml`  
50 | `conda remove --prefix ./env --all`  
51 | 
52 | ### New release
53 | `python setup.py sdist bdist_wheel`  
54 | `python -m twine upload --repository pypi --skip-existing dist/*`  
55 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_str.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta, datetime
 2 | import pytest
 3 | from pandas_to_sql.testing.utils.asserters import assert_, get_expected_and_actual
 4 | from copy import copy
 5 | import pandas as pd
 6 | import pandas_to_sql 
 7 | 
 8 | 
 9 | def test_replace():
10 |     df = pytest.df1
11 |     df['new_value'] = df.random_str.str.replace('m','v').str.replace('z','_3')
12 |     assert_(df)
13 | 
14 | def test_lower():
15 |     df = pytest.df1
16 |     df['new_value'] = df.random_str.str.lower()
17 |     assert_(df)
18 | 
19 | def test_upper():
20 |     df = pytest.df1
21 |     df['new_value'] = df.random_str.str.upper()
22 |     assert_(df)
23 | 
24 | def test_slice1():
25 |     df = pytest.df1
26 |     df['new_value'] = df.random_str.str.slice(1,3)
27 |     assert_(df)
28 | 
29 | def test_slice2():
30 |     df = pytest.df1
31 |     df['new_value'] = df.random_str.str.slice(2)
32 |     assert_(df)
33 | 
34 | def test_slice3():
35 |     df = pytest.df1
36 |     df['new_value'] = df.random_str.str.slice(stop=4)
37 |     assert_(df)
38 | 
39 | def test_slice4():
40 |     df = pytest.df1
41 |     df['new_value'] = df.random_str.str.slice(-1,-3)
42 |     assert_(df)
43 | 
44 | def test_strip():
45 |     df = pytest.df1
46 |     df['new_value'] = df.random_str.str.strip('ABCKSLFjadkj')
47 |     assert_(df)
48 | 
49 | def test_strip_none_chars():
50 |     df = pytest.df1
51 |     df['new_value1'] = df.random_str + ' '
52 |     df['new_value2'] = df.random_str.str.strip()
53 |     assert_(df)
54 | 
55 | def test_lstrip():
56 |     df = pytest.df1
57 |     df['new_value'] = df.random_str.str.lstrip('ABCKSLFjadkj')
58 |     assert_(df)
59 | 
60 | 
61 | def test_rstrip():
62 |     df = pytest.df1
63 |     df['new_value'] = df.random_str.str.rstrip('ABCKSLFjadkj')
64 |     assert_(df)
65 | 
66 | def test_len():
67 |     df = pytest.df1
68 |     df['new_value'] = df.random_str.str.len()
69 |     assert_(df)
70 | 
71 | def test_contains():
72 |     df = pytest.df1
73 |     df['new_value1'] = df.random_str.str.contains('a')
74 |     df['new_value2'] = df.random_str.str.contains('B')
75 |     assert_(df)
76 | 
77 | def test_contains_case_false():
78 |     df = pytest.df1
79 |     df['new_value1'] = df.random_str.str.contains('a', case=False)
80 |     df['new_value2'] = df.random_str.str.contains('B', case=False)
81 |     assert_(df)


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_operations_numeric.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from pandas_to_sql.testing.utils.asserters import assert_
  3 | 
  4 | 
  5 | def test_add():
  6 |     df = pytest.df1
  7 |     df['new_value'] = df.random_float + 10
  8 |     assert_(df)
  9 | 
 10 | def test_radd():
 11 |     df = pytest.df1
 12 |     df['new_value'] = 10 + df.random_float
 13 |     assert_(df)
 14 | 
 15 | def test_add_str():
 16 |     df = pytest.df1
 17 |     df['new_value'] = df.random_str + '_some_other_str'
 18 |     assert_(df)
 19 | 
 20 | def test_add_str_to_str():
 21 |     df = pytest.df1
 22 |     df['new_value'] = df.random_str + '_' + df.random_str
 23 |     assert_(df)
 24 | 
 25 | 
 26 | def test_sub():
 27 |     df = pytest.df1
 28 |     df['new_value'] = df.random_float - 10
 29 |     assert_(df)
 30 | 
 31 | def test_rsub():
 32 |     df = pytest.df1
 33 |     df['new_value'] = 10 - df.random_float
 34 |     assert_(df)
 35 | 
 36 | 
 37 | def test_mul():
 38 |     df = pytest.df1
 39 |     df['new_value'] = df.random_float * 2
 40 |     assert_(df)
 41 | 
 42 | def test_rmul():
 43 |     df = pytest.df1
 44 |     df['new_value'] = 2.5 * df.random_int
 45 |     assert_(df)
 46 | 
 47 | def test_truediv():
 48 |     df = pytest.df1
 49 |     df['new_value'] = df.random_int / 2.0
 50 |     assert_(df)
 51 | 
 52 | def test_truediv2():
 53 |     df = pytest.df1
 54 |     df['new_value'] = df.random_float / 2
 55 |     assert_(df)
 56 | 
 57 | def test_truediv_int_int():
 58 |     df = pytest.df1
 59 |     df['new_value'] = df.random_int / 2
 60 |     assert_(df)
 61 | 
 62 | def test_rtruediv():
 63 |     df = pytest.df1
 64 |     df['new_value'] = 2 / df.random_float
 65 |     assert_(df)
 66 | 
 67 | def test_floordiv():
 68 |     df = pytest.df1
 69 |     df['new_value'] = df.random_float // 2.0
 70 |     assert_(df)
 71 | 
 72 | def test_rfloordiv():
 73 |     df = pytest.df1
 74 |     df['new_value'] = 1 // df.random_float
 75 |     assert_(df)
 76 | 
 77 | def test_round():
 78 |     df = pytest.df1
 79 |     df['new_value'] = df.random_float.round()
 80 |     assert_(df)
 81 | 
 82 | def test_round_with_half_values():
 83 |     df = pytest.df1
 84 |     df['a'] = 0.5
 85 |     df['b'] = 1.5
 86 |     df['c'] = 2.5
 87 |     df['d'] = 3.5
 88 |     df['e'] = -0.5
 89 |     df['f'] = -1.5
 90 |     df['g'] = -2.5
 91 |     df['h'] = -3.5
 92 | 
 93 |     for c in ['a','b','c','d','e','f','g','h']:
 94 |         df[c + '_new'] = df[c].round()
 95 |     
 96 |     assert_(df)
 97 | 
 98 | 
 99 | def test_abs():
100 |     df = pytest.df1
101 |     df['new_value'] = df.random_float.abs()
102 |     assert_(df)
103 | 
104 | 


--------------------------------------------------------------------------------
/pandas_to_sql/testing/tests/test_operations_compare.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import pytest
  3 | from pandas_to_sql import wrap_df
  4 | from pandas_to_sql.testing.utils.asserters import assert_
  5 | 
  6 | 
  7 | def test_gt1():
  8 |     df = pytest.df1
  9 |     df['new_value'] = df.random_float > 10
 10 |     assert_(df)
 11 | 
 12 | def test_gt2():
 13 |     df = pytest.df1
 14 |     df['new_value'] = df.random_int > 3
 15 |     assert_(df)
 16 | 
 17 | def test_abs_float():
 18 |     df = pytest.df1
 19 |     df['new_value'] = abs(df.random_float)
 20 |     assert_(df)
 21 | 
 22 | def test_abs_int():
 23 |     df = pytest.df1
 24 |     df['new_value'] = abs(df.random_int)
 25 |     assert_(df)
 26 | 
 27 | def test_ge():
 28 |     df = pytest.df1
 29 |     df['new_value'] = df.random_int >= 3
 30 |     assert_(df)
 31 | 
 32 | def test_ge():
 33 |     df = pytest.df1
 34 |     df['new_value'] = df.random_float >= 0
 35 |     assert_(df)
 36 | 
 37 | def test_ge2():
 38 |     df = pytest.df1
 39 |     df['new_value'] = df.random_int >= 3
 40 |     assert_(df)
 41 | 
 42 | def test_lt():
 43 |     df = pytest.df1
 44 |     df['new_value'] = df.random_int < 3
 45 |     assert_(df)
 46 | 
 47 | def test_le():
 48 |     df = pytest.df1
 49 |     df['new_value'] = df.random_int <= 3
 50 |     assert_(df)
 51 | 
 52 | def test_eq():
 53 |     df = pytest.df1
 54 |     df['new_value'] = df.random_int == 3
 55 |     assert_(df)
 56 | 
 57 | def test_ne():
 58 |     df = pytest.df1
 59 |     df['new_value'] = df.random_int != 3
 60 |     assert_(df)
 61 | 
 62 | def test_tilde():
 63 |     df = pytest.df1
 64 |     df['new_value'] = ~df.random_bool
 65 |     assert_(df)
 66 | 
 67 | def test_neg_bool():
 68 |     df = pytest.df1
 69 |     df['new_value'] = -df.random_bool
 70 |     assert_(df)
 71 | 
 72 | def test_neg_numeric():
 73 |     df = pytest.df1
 74 |     df['new_value'] = -df.random_int
 75 |     assert_(df)
 76 | 
 77 | 
 78 | def test_two_conds_and():
 79 |     df = pytest.df1
 80 |     df['new_value'] = (df.random_float > 1) & (df.random_float <=2)
 81 |     assert_(df)
 82 | 
 83 | def test_two_conds_or():
 84 |     df = pytest.df1
 85 |     df['new_value'] = (df.random_float > 1) or True
 86 |     assert_(df)
 87 | 
 88 | def test_fillna():
 89 |     df = pd.DataFrame({'col':[1,None,.3,-20,None]})
 90 |     table_name = 'some_fillna_table_name'
 91 |     df.to_sql(table_name, pytest.sql_connection, if_exists='replace', index=False)
 92 |     df_ = wrap_df(df, table_name)
 93 | 
 94 |     df_['new_value'] = df_.col.fillna(2)
 95 |     
 96 |     assert_(df_)
 97 | 
 98 | def test_fillna2():
 99 |     df = pd.DataFrame({'col':[1,None,.3,-20,None]})
100 |     table_name = 'some_fillna_table_name'
101 |     df.to_sql(table_name, pytest.sql_connection, if_exists='replace', index=False)
102 |     df_ = wrap_df(df, table_name)
103 | 
104 |     df_['new_value'] = df_.col.fillna('f')
105 |     
106 |     assert_(df_)
107 | 
108 | def test_astype():
109 |     df = pytest.df1
110 |     df['new_value'] = df.random_float.astype(int)
111 |     assert_(df)
112 | 
113 | 


--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/str_column.py:
--------------------------------------------------------------------------------
 1 | from pandas_to_sql.engine.columns.column import Column
 2 | from pandas_to_sql.engine.columns.numeric_columns import IntColumn
 3 | from pandas_to_sql.engine.columns.common import add_common_operators_to_class, value_to_sql_string, create_column_from_operation
 4 | 
 5 | 
 6 | class StrColumn(Column):
 7 |     def __init__(self, sql_string):
 8 |         super().__init__(dtype='VARCHAR', sql_string=sql_string)
 9 | 
10 |     def __getattribute__(self, attr):
11 |         if attr == 'str':
12 |             return self
13 |         return object.__getattribute__(self, attr)
14 |     
15 |     def __add__(self, r):
16 |         return create_column_from_operation(self, r, StrColumn, '||')
17 | 
18 |     def __radd__(self, l):
19 |         return create_column_from_operation(self, StrColumn, l, '||')
20 | 
21 | 
22 | add_common_operators_to_class(StrColumn)
23 | 
24 | StrColumn.lower = lambda self: StrColumn(sql_string=f'(LOWER({value_to_sql_string(self)}))')
25 | StrColumn.upper = lambda self: StrColumn(sql_string=f'(UPPER({value_to_sql_string(self)}))')
26 | 
27 | StrColumn.replace = lambda self, old, new: \
28 |     StrColumn(sql_string=f'(REPLACE({value_to_sql_string(self)}, {value_to_sql_string(old)}, {value_to_sql_string(new)}))')
29 | 
30 | 
31 | def slice_(self, start=None, stop=None,j=None):
32 |     if j: raise 'slice "step" not supported'
33 | 
34 |     start = start if start else 0
35 |     start+=1
36 | 
37 |     if stop:
38 |         stop +=1
39 |         length = stop - start
40 |         s = f'(SUBSTR({value_to_sql_string(self)}, {start}, {length}))'
41 |     else:
42 |         s = f'(SUBSTR({value_to_sql_string(self)}, {start}))'
43 | 
44 |     return StrColumn(sql_string=s)
45 | 
46 | 
47 | StrColumn.slice = slice_
48 |     
49 | 
50 | 
51 | 
52 | def strip_(self, op, chars=None):
53 |     if not chars:
54 |         chars = ' '
55 |     if not isinstance(chars, str):
56 |         raise f'"chars" must be str. got {str(type(chars))}'
57 |     
58 |     s = f"({op}({value_to_sql_string(self)}, {value_to_sql_string(chars)}))"
59 |     return StrColumn(sql_string=s)
60 | 
61 | 
62 | StrColumn.strip = lambda self, chars=None: strip_(self, 'TRIM', chars)
63 | StrColumn.lstrip = lambda self, chars=None: strip_(self, 'LTRIM', chars)
64 | StrColumn.rstrip = lambda self, chars=None: strip_(self, 'RTRIM', chars)
65 | 
66 | StrColumn.len = lambda self: IntColumn(sql_string=f'(LENGTH({value_to_sql_string(self)}))')
67 | 
68 | 
69 | 
70 | def contains(self, s, case=True):
71 |     if not isinstance(s, str):
72 |         raise f'"s" must be str. got {str(type(s))}'
73 |     
74 |     if case==False:
75 |         sql_string = f"(INSTR(LOWER({value_to_sql_string(self)}), LOWER({value_to_sql_string(s)})))"
76 |     else:
77 |         sql_string = f"(INSTR({value_to_sql_string(self)}, {value_to_sql_string(s)}))"
78 |     
79 |     # sql_string = f"(INSTR({value_to_sql_string(self)}, {value_to_sql_string(s)}))"
80 |     sql_string = f"(CAST({sql_string} > 0 AS BOOL))"
81 |     return StrColumn(sql_string=sql_string)
82 | 
83 | 
84 | StrColumn.contains = contains
85 | 


--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/datetime_column.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta
 2 | from dateutil.relativedelta import relativedelta
 3 | import pandas as pd
 4 | from pandas_to_sql.engine.columns.column import Column
 5 | from pandas_to_sql.engine.columns.common import add_common_operators_to_class, value_to_sql_string
 6 | from pandas_to_sql.engine.columns.numeric_columns import IntColumn, FloatColumn
 7 | 
 8 | time_unit_to_format = {
 9 |     'second': '%S',
10 |     'month': '%m',
11 |     'minute': '%M',
12 |     'hour': '%H',
13 |     'week': '%W',
14 |     'year': '%Y',
15 |     'day': '%d',
16 |     'dayofweek': '%w'}
17 | 
18 | 
19 | class DatetimeColumn(Column):
20 | 
21 |     def __init__(self, sql_string):
22 |         super().__init__(dtype='DATETIME', sql_string=sql_string)
23 | 
24 |     def __getattribute__(self, attr):
25 |         if attr == 'dt':
26 |             return self
27 |         if attr == 'dayofweek':
28 |             return self.extract_dayofweek()
29 |         if attr in time_unit_to_format.keys():
30 |             return self.extract_time_unit(time_unit_to_format[attr])
31 | 
32 |         return object.__getattribute__(self, attr)
33 | 
34 | 
35 |     def get_sql_for_format(self, format):
36 |         return f"(CAST(strftime('{format}', {value_to_sql_string(self)}) AS INT))"
37 |     
38 |     def extract_time_unit(self, format):
39 |         sql_string = self.get_sql_for_format(format)
40 |         return IntColumn(sql_string=sql_string)
41 |     
42 |     def extract_dayofweek(self):
43 |         sql_string = self.get_sql_for_format(time_unit_to_format['dayofweek'])
44 |         sql_string = f"( ({sql_string} + 6) % 7)"
45 |         return IntColumn(sql_string=sql_string)
46 | 
47 | 
48 | def __my_add__(col, v):
49 |     if isinstance(v, timedelta):
50 |         # https://docs.python.org/3/library/datetime.html#datetime.timedelta
51 |         sign = '+' if v.days >= 0 else '-'
52 |         added_days = f"'{sign}{abs(v.days)} days'"
53 | 
54 |         sign = '+' if v.seconds >= 0 else '-'
55 |         added_seconds = f"'{sign}{abs(v.seconds)} seconds'"
56 | 
57 |         sql_string = f"(datetime({value_to_sql_string(col)}, {added_days}, {added_seconds}))"
58 |         return DatetimeColumn(sql_string=sql_string)
59 |     elif isinstance(v, relativedelta):
60 |         s = []
61 |         for t_type, t_value in v.kwds.items():
62 |             sign = '+' if t_value >= 0 else '-'
63 |             s.append(f"'{sign}{abs(t_value)} {t_type}'")
64 |         sql_string = f"(datetime({value_to_sql_string(col)}, {', '.join(s)}))"
65 |         return DatetimeColumn(sql_string=sql_string)
66 |     elif isinstance(v, pd.offsets.DateOffset):
67 |         s = []
68 |         for t_type, t_value in v.kwds.items():
69 |             sign = '+' if t_value >= 0 else '-'
70 |             s.append(f"'{sign}{abs(t_value)} {t_type}'")
71 |         sql_string = f"(datetime({value_to_sql_string(col)}, {', '.join(s)}))"
72 |         return DatetimeColumn(sql_string=sql_string)
73 |     else:
74 |         raise Exception(f'Supporting only timedelta, got {str(type(v))}')
75 | 
76 | 
77 | add_common_operators_to_class(DatetimeColumn)
78 | 
79 | DatetimeColumn.__add__ = __my_add__
80 | DatetimeColumn.__radd__ = lambda self, l: __my_add__(self, l)
81 | DatetimeColumn.__sub__ = lambda self, r: __my_add__(self, -r)
82 | 
83 | 


--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/common.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numbers
  3 | import operator
  4 | from datetime import datetime
  5 | from pandas_to_sql.engine.columns.column import Column
  6 | from pandas_to_sql.utils.helpers import convert_df_type
  7 | 
  8 | def get_column_class_from_type(col_type):
  9 |     from pandas_to_sql.engine.columns.bool_column import BoolColumn
 10 |     from pandas_to_sql.engine.columns.numeric_columns import IntColumn, FloatColumn
 11 |     from pandas_to_sql.engine.columns.str_column import StrColumn
 12 |     from pandas_to_sql.engine.columns.datetime_column import DatetimeColumn
 13 |     if col_type == 'BOOL': return BoolColumn
 14 |     elif col_type == 'INT': return IntColumn
 15 |     elif col_type == 'FLOAT': return FloatColumn
 16 |     elif col_type == 'VARCHAR': return StrColumn
 17 |     elif col_type == 'DATETIME': return DatetimeColumn
 18 |     else: raise Exception(f"could not convert column type. got: {str(col_type)}")
 19 | 
 20 | 
 21 | def value_to_sql_string(value):
 22 |     if isinstance(value, numbers.Number):
 23 |         return str(value)
 24 |     elif isinstance(value, str):
 25 |         return "'" + value + "'"
 26 |     elif isinstance(value, datetime):
 27 |         return f"datetime('{value.strftime('%Y-%m-%d %H:%M:%S')}')"
 28 |     elif isinstance(value, Column):
 29 |         return value.sql_string
 30 |     raise Exception(f"Value not supported. supporting: premitives and {str(Column)}. got {str(type(value))}")
 31 | 
 32 | 
 33 | def create_column_from_value(v):
 34 |     from pandas_to_sql.engine.columns.bool_column import BoolColumn
 35 |     from pandas_to_sql.engine.columns.str_column import StrColumn
 36 |     from pandas_to_sql.engine.columns.datetime_column import DatetimeColumn
 37 |     from pandas_to_sql.engine.columns.numeric_columns import IntColumn, FloatColumn
 38 |     sql_string = value_to_sql_string(v)
 39 |     if isinstance(v, int): return IntColumn(sql_string)
 40 |     if isinstance(v, float): return FloatColumn(sql_string)
 41 |     if isinstance(v, str): return StrColumn(sql_string)
 42 |     if isinstance(v, bool): return BoolColumn(sql_string)
 43 |     if isinstance(v, datetime): return DatetimeColumn(sql_string)
 44 |     
 45 |     raise Exception(f'trying to set table column with unsupported type. expected types are Column or primitives. got type: {str(type(newvalue))}' )
 46 | 
 47 | def create_column_from_operation(l, r, dtype, op):
 48 |     return dtype(sql_string=f'({value_to_sql_string(l)} {op} {value_to_sql_string(r)})')  
 49 | 
 50 | 
 51 | def add_common_operators_to_class(class_type):
 52 |     from pandas_to_sql.engine.columns.bool_column import BoolColumn
 53 | 
 54 |     def __lt__(self,other):
 55 |         return create_column_from_operation(self, other, BoolColumn, '<')
 56 | 
 57 |     def __le__(self,other):
 58 |         return create_column_from_operation(self, other, BoolColumn, '<=')
 59 | 
 60 |     def __gt__(self,other):
 61 |         return create_column_from_operation(self, other, BoolColumn, '>')
 62 | 
 63 |     def __ge__(self,other):
 64 |         return create_column_from_operation(self, other, BoolColumn, '>=')
 65 | 
 66 |     def __eq__(self,other):
 67 |         return create_column_from_operation(self, other, BoolColumn, '=')
 68 | 
 69 |     def __ne__(self,other):
 70 |         return create_column_from_operation(self, other, BoolColumn, '<>')
 71 |     
 72 |     def __and__(self,other):
 73 |         return create_column_from_operation(self, other, BoolColumn, 'AND')
 74 |     
 75 |     def __or__(self,other):
 76 |         return create_column_from_operation(self, other, BoolColumn, 'OR')
 77 |     
 78 |     def fillna(self, v):
 79 |         dtype = type(self)
 80 |         return dtype(sql_string=f'(IFNULL({value_to_sql_string(self)}, {value_to_sql_string(v)}))')  
 81 | 
 82 |     def astype(self, t):
 83 |         tt = convert_df_type(t)
 84 |         dtype = get_column_class_from_type(tt)
 85 |         return dtype(sql_string=f'(CAST({value_to_sql_string(self)} AS {tt}))')  
 86 | 
 87 | 
 88 |     class_type.__lt__ = __lt__
 89 |     class_type.__gt__ = __gt__
 90 |     class_type.__le__ = __le__
 91 |     class_type.__ge__ = __ge__
 92 |     class_type.__eq__ = __eq__
 93 |     class_type.__ne__ = __ne__
 94 |     class_type.__and__ = __and__
 95 |     class_type.__or__ = __or__
 96 |     class_type.fillna = fillna
 97 |     class_type.astype = astype
 98 |     
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/pandas_to_sql/engine/columns/numeric_columns.py:
--------------------------------------------------------------------------------
  1 | from pandas_to_sql.engine.columns.column import Column
  2 | from pandas_to_sql.engine.columns.common import add_common_operators_to_class, value_to_sql_string, create_column_from_operation
  3 | 
  4 | 
  5 | class FloatColumn(Column):
  6 |     def __init__(self, sql_string):
  7 |         super().__init__(dtype='FLOAT', sql_string=sql_string)
  8 | 
  9 | 
 10 | class IntColumn(Column):
 11 |     def __init__(self, sql_string):
 12 |         super().__init__(dtype='INT', sql_string=sql_string)
 13 | 
 14 | 
 15 | def __floordiv__(self, r):
 16 |     # http://sqlite.1065341.n5.nabble.com/floor-help-td46158.html
 17 |     return FloatColumn(sql_string=f'( ROUND(({value_to_sql_string(self)} / {value_to_sql_string(r)}) - 0.5) )')
 18 | 
 19 | def __rfloordiv__(self, l):
 20 |     # http://sqlite.1065341.n5.nabble.com/floor-help-td46158.html
 21 |     return FloatColumn(sql_string=f'( ROUND(({value_to_sql_string(l)} / {value_to_sql_string(self)}) - 0.5) )')
 22 | 
 23 | def is_int(v):
 24 |     return isinstance(v, int) or isinstance(v, IntColumn)
 25 | 
 26 | def numeric_op_result_from_types(l, r):
 27 |     x = IntColumn if is_int(l) and is_int(r) else FloatColumn
 28 |     return x
 29 | 
 30 | def __add__(self, r):
 31 |     res_column_type = numeric_op_result_from_types(self, r)
 32 |     return create_column_from_operation(self, r, res_column_type, '+')
 33 | 
 34 | def __radd__(self, l):
 35 |     res_column_type = numeric_op_result_from_types(l, self)
 36 |     return create_column_from_operation(l, self, res_column_type, '+')
 37 | 
 38 | def __sub__(self, r):
 39 |     res_column_type = numeric_op_result_from_types(self, r)
 40 |     return create_column_from_operation(self, r, res_column_type, '-')
 41 | 
 42 | def __rsub__(self, l):
 43 |     res_column_type = numeric_op_result_from_types(l, self)
 44 |     return create_column_from_operation(l, self, res_column_type, '-')
 45 | 
 46 | def __mul__(self, r):
 47 |     res_column_type = numeric_op_result_from_types(self, r)
 48 |     return create_column_from_operation(self, r, res_column_type, '*')
 49 | 
 50 | def __rmul__(self, l):
 51 |     res_column_type = numeric_op_result_from_types(l, self)
 52 |     return create_column_from_operation(l, self, res_column_type, '*')
 53 | 
 54 | def __truediv__(self, r):
 55 |     return FloatColumn(sql_string=f'(({value_to_sql_string(self)} + 0.0) / {value_to_sql_string(r)})')  
 56 | 
 57 | def __rtruediv__(self, l):
 58 |     return FloatColumn(sql_string=f'(({value_to_sql_string(l)} + 0.0) / {value_to_sql_string(self)})')  
 59 | 
 60 | def __abs__(self):
 61 |     return type(self)(sql_string=f'ABS({value_to_sql_string(self)})')  
 62 | 
 63 | def __neg__(self):
 64 |     return type(self)(sql_string=f'(-({value_to_sql_string(self)}))')
 65 | 
 66 | 
 67 | def round_(self):
 68 |     # https://docs.python.org/3/library/functions.html#round
 69 |     v = value_to_sql_string(self)
 70 |     integer_part = f'(CAST({v} AS INT))'
 71 |     fractional_part = f'(ABS({v}) - ROUND(ABS({v})-0.5))'
 72 | 
 73 |     is_integer_part_even = f'({integer_part}%2 == 0)'
 74 |     is_fractional_part_exactly_half = f'({fractional_part}==.5)'
 75 | 
 76 |     simple_round = f'(ROUND({v}))'
 77 |     round_with_change = f'(CASE WHEN {v}>0 THEN ROUND({v}-0.001) ELSE ROUND({v}+0.001) END)'
 78 | 
 79 |     s = f'(CASE WHEN {is_fractional_part_exactly_half} AND {is_integer_part_even} THEN {round_with_change} ELSE {simple_round} END)'
 80 |     return FloatColumn(sql_string=s)
 81 | 
 82 | def abs_(self):
 83 |     return type(self)(sql_string=f'ABS({value_to_sql_string(self)})')  
 84 | 
 85 | 
 86 | 
 87 | add_common_operators_to_class(FloatColumn)
 88 | FloatColumn.__add__ = __add__
 89 | FloatColumn.__radd__ = __radd__
 90 | FloatColumn.__sub__ = __sub__
 91 | FloatColumn.__rsub__ = __rsub__
 92 | FloatColumn.__mul__ = __mul__
 93 | FloatColumn.__rmul__ = __rmul__
 94 | FloatColumn.__floordiv__ = __floordiv__
 95 | FloatColumn.__rfloordiv__ = __rfloordiv__
 96 | FloatColumn.__truediv__ = __truediv__
 97 | FloatColumn.__rtruediv__ = __rtruediv__
 98 | FloatColumn.__abs__ = __abs__
 99 | FloatColumn.__neg__ = __neg__
100 | FloatColumn.round = round_
101 | FloatColumn.abs = abs_
102 | 
103 | 
104 | add_common_operators_to_class(IntColumn)
105 | IntColumn.__add__ = __add__
106 | IntColumn.__radd__ = __radd__
107 | IntColumn.__sub__ = __sub__
108 | IntColumn.__rsub__ = __rsub__
109 | IntColumn.__mul__ = __mul__
110 | IntColumn.__rmul__ = __rmul__
111 | IntColumn.__floordiv__ = __floordiv__
112 | IntColumn.__rfloordiv__ = __rfloordiv__
113 | IntColumn.__truediv__ = __truediv__
114 | IntColumn.__rtruediv__ = __rtruediv__
115 | IntColumn.__abs__ = __abs__
116 | IntColumn.__neg__ = __neg__
117 | IntColumn.round = round_
118 | IntColumn.abs = abs_
119 | 
120 | 


--------------------------------------------------------------------------------
/pandas_to_sql/engine/grouped_table.py:
--------------------------------------------------------------------------------
  1 | from copy import copy
  2 | from pandas_to_sql.engine.columns.column import Column
  3 | from pandas_to_sql.engine.columns.common import get_column_class_from_type
  4 | 
  5 | class GroupedTable:
  6 |     table = None
  7 |     groupings = None
  8 |     
  9 |     def __init__(self, table, groupings):
 10 |         self.table = table
 11 |         self.groupings = groupings
 12 | 
 13 |     def __copy__(self):
 14 |         return GroupedTable(copy(self.table), copy(self.groupings))
 15 | 
 16 |     def __getitem__(self, key):
 17 |         if isinstance(key, Column):
 18 |             raise Exception('Cant filter/where GroupedTable')
 19 |         if isinstance(key, list):
 20 |             return GroupedTable(self.table[key], copy(self.groupings))
 21 |         if isinstance(key, str):
 22 |             return GroupedTable(self.table[[key]], copy(self.groupings))
 23 |         raise Exception(f'GroupedTable __getitem__ key type not supported. type: {str(type(key))}')
 24 | 
 25 |     def __setitem__(self, key, newvalue):
 26 |         raise Exception(f'GroupedTable __setitem__ not supported')
 27 |         
 28 |     def __getattr__(self, attribute_name):
 29 |         return self[attribute_name]
 30 |     
 31 |     def mean(self):
 32 |         return self.agg(dict(map(lambda k: (k,'mean'),self.table.columns.keys())))
 33 | 
 34 |     def count(self):
 35 |         return self.agg(dict(map(lambda k: (k,'count'),self.table.columns.keys())))
 36 | 
 37 |     def sum(self):
 38 |         return self.agg(dict(map(lambda k: (k,'sum'),self.table.columns.keys())))
 39 | 
 40 |     def agg(self, v):
 41 |         if isinstance(v, str):
 42 |             return self.agg(dict(zip(self.table.columns.keys(), v)))
 43 |         elif isinstance(v, list):
 44 |             return self.agg(dict(zip(self.table.columns.keys(), v)))
 45 |         elif isinstance(v, dict):
 46 |             if len( set(v.keys()) & set(self.groupings.keys()) ) > 0:
 47 |                 raise Exception("grouped table doesnt support same column in 'on' and 'select'")
 48 |             self_table_copy = copy(self.table)
 49 |             # create groupby columns query
 50 |             groupby_select_columns = {}
 51 |             for column_name in v.keys():
 52 |                 column = self_table_copy[column_name]
 53 |                 operations = v[column_name] if isinstance(v[column_name], list) else [v[column_name]]
 54 |                 for operation in operations:
 55 |                     join_str_seperator = None
 56 |                     operation_column_name_override = None
 57 |                     dtype = None
 58 |                     
 59 |                     if callable(operation) and operation.__qualname__=='str.join':
 60 |                         join_str_seperator = operation.__self__
 61 |                         operation_column_name_override = 'join'
 62 |                         operation = 'group_concat'
 63 | #                     if not isinstance(operation, str):
 64 | #                         raise Exception(f"groupby agg support only str name for operations or ','.join. got: {type(operation)}")
 65 | #                     SUPPORTED_OPERATIONS = ['count','sum','mean','avg']
 66 | #                     if operation not in SUPPORTED_OPERATIONS:
 67 | #                         raise Exception(f"groupby operation '{operation}' is not supported. supported: {', '.join(SUPPORTED_OPERATIONS)}")
 68 | 
 69 |                     operation = operation.lower()
 70 |                     
 71 |                     if operation=='mean':
 72 |                         dtype = 'FLOAT'
 73 |                         operation = 'avg'
 74 |                         operation_column_name_override = 'mean'    
 75 |                     elif operation=='sum' and column.dtype=='VARCHAR':
 76 |                         dtype = 'VARCHAR'
 77 |                         operation = 'group_concat'
 78 |                         join_str_seperator = ''
 79 |                         operation_column_name_override = 'sum'
 80 |                     elif operation=='count' or (operation=='sum' and column.dtype=='INT'):
 81 |                         dtype = 'INT'
 82 |                     else:
 83 |                         dtype = 'FLOAT'
 84 |                     
 85 |                     new_column_name = f'{column_name}_{operation_column_name_override or operation}'
 86 |                     new_sql_string = f'{operation}({column.sql_string})'
 87 |                     if operation=='group_concat':
 88 |                         new_sql_string = f"{operation}({column.sql_string},'{join_str_seperator}')"
 89 |                     t = get_column_class_from_type(dtype)
 90 |                     groupby_select_columns[new_column_name] = t(sql_string=new_sql_string)
 91 |             groupby_select_columns.update(self.groupings)
 92 |             
 93 |             self_table_copy.columns = groupby_select_columns
 94 | 
 95 |             # create new table columns
 96 |             new_table_columns = {}
 97 |             for k in groupby_select_columns.keys():
 98 |                 t = get_column_class_from_type(groupby_select_columns[k].dtype)
 99 |                 new_table_columns[k] = t(sql_string=k)
100 | 
101 |             grouping_field = ', '.join(list(map(lambda k: self.groupings[k].sql_string, self.groupings.keys())))
102 |             
103 |             from pandas_to_sql.engine.table import create_table
104 |             return create_table(table_name='Temp',
105 |                          columns=new_table_columns,
106 |                          from_sql_string=f'{self_table_copy.get_sql_string()} GROUP BY {grouping_field}',
107 |                          had_changed=False)
108 | 
109 | 


--------------------------------------------------------------------------------
/pandas_to_sql/utils/pandas_dataframe_intercepter.py:
--------------------------------------------------------------------------------
  1 | from copy import copy
  2 | import operator
  3 | 
  4 | class PandasDataFrameIntercepter:
  5 |     def __init__(self, df_pandas, df_sql_convert_table):
  6 |         self.df_pandas = df_pandas
  7 |         self.df_sql_convert_table = df_sql_convert_table
  8 | 
  9 |     def __repr__(self):
 10 |         return self.df_pandas.__repr__()
 11 | 
 12 |     def __format__(self, fmt):
 13 |         return self.df_pandas.__format__(fmt)
 14 | 
 15 |     def __str__(self):
 16 |         return self.df_pandas.__str__()
 17 |     
 18 |     @staticmethod
 19 |     def get_attr_for_df_pandas_if_needed(obj):
 20 |         if isinstance(obj, PandasDataFrameIntercepter):
 21 |             return object.__getattribute__(obj, 'df_pandas')
 22 |         else:
 23 |             return obj
 24 | 
 25 |     @staticmethod
 26 |     def get_attr_for_df_sql_convert_table_if_needed(obj):
 27 |         if isinstance(obj, PandasDataFrameIntercepter):
 28 |             return object.__getattribute__(obj, 'df_sql_convert_table')
 29 |         else:
 30 |             return obj
 31 | 
 32 |     def __getattribute__(self, name):
 33 |         if name in ['df_pandas', 'df_sql_convert_table']:
 34 |             return object.__getattribute__(self, name)
 35 |         
 36 |         df_sql_convert_table_attr =  self.df_sql_convert_table.__getattribute__(name)
 37 |         if name=='get_sql_string' and hasattr(df_sql_convert_table_attr, '__call__'):
 38 |             return lambda *args, **kwargs: df_sql_convert_table_attr(*args, **kwargs)
 39 | 
 40 |         df_pandas_attr =  self.df_pandas.__getattribute__(name)
 41 |         if name=='columns' and not hasattr(df_pandas_attr, '__call__'):
 42 |             return df_pandas_attr
 43 |               
 44 |         if hasattr(df_sql_convert_table_attr, '__call__'):
 45 |             def _(*args, **kwargs):
 46 |                 def __dictionary_map_values(d, func):
 47 |                     return {k: func(v) for k, v in d.items()}
 48 |                 
 49 |                 args_df_pandas = tuple(map(PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed, args))
 50 |                 args_obj_new = tuple(map(PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed, args))
 51 |                 
 52 |                 kwargs_df_pandas = __dictionary_map_values(kwargs, PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed)
 53 |                 kwargs_obj_new = __dictionary_map_values(kwargs, PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed)
 54 |                 
 55 |                 a = df_pandas_attr(*args_df_pandas, **kwargs_df_pandas)
 56 |                 b = df_sql_convert_table_attr(*args_obj_new, **kwargs_obj_new)
 57 |                 return PandasDataFrameIntercepter(a, b)
 58 |             return _
 59 |         else:           
 60 |             return PandasDataFrameIntercepter(df_pandas_attr, df_sql_convert_table_attr)
 61 |     
 62 |     def __getitem__(self, key):
 63 |         a = self.df_pandas[PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed(key)]
 64 |         b = self.df_sql_convert_table[PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed(key)]
 65 |         return PandasDataFrameIntercepter(a, b)
 66 |     
 67 |     def __setitem__(self, key, newvalue):
 68 |         self.df_pandas[key] = PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed(newvalue)
 69 |         self.df_sql_convert_table[key] = PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed(newvalue)
 70 |         return PandasDataFrameIntercepter(self.df_pandas, self.df_sql_convert_table)
 71 | 
 72 |     def __getattr__(self, attribute_name):
 73 |         a = self.df_pandas[attribute_name]
 74 |         b = self.df_sql_convert_table[attribute_name]
 75 |         return PandasDataFrameIntercepter(a, b)
 76 |     
 77 |     def __copy__(self):
 78 |         return PandasDataFrameIntercepter(copy(self.df_pandas), copy(self.df_sql_convert_table))
 79 |     
 80 |     @staticmethod
 81 |     def run_operation_and_return(left, right, op):
 82 |         left_ = PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed(left)
 83 |         right_ = PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed(right)
 84 |         a = op(left_, right_)
 85 | 
 86 |         left_ = PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed(left)
 87 |         right_ = PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed(right)
 88 |         b = op(left_, right_)
 89 |         return PandasDataFrameIntercepter(a, b)
 90 | 
 91 |     @staticmethod
 92 |     def run_operation_single_and_return(obj, op):
 93 |         a = PandasDataFrameIntercepter.get_attr_for_df_pandas_if_needed(obj)
 94 |         b = PandasDataFrameIntercepter.get_attr_for_df_sql_convert_table_if_needed(obj)
 95 |         a = op(a)
 96 |         b = op(b)
 97 |         return PandasDataFrameIntercepter(a, b)
 98 |     
 99 |     # comparisons
100 |     def __lt__(self,other):
101 |         return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.lt)
102 | 
103 |     def __le__(self,other):
104 |         return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.le)
105 | 
106 |     def __gt__(self,other):
107 |         return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.gt)
108 | 
109 |     def __ge__(self,other):
110 |         return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.ge)
111 | 
112 |     def __eq__(self,other):
113 |         return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.eq)
114 | 
115 |     def __ne__(self,other):
116 |         return PandasDataFrameIntercepter.run_operation_and_return(self, other, operator.ne)
117 | 
118 |     def __abs__(self):
119 |         return PandasDataFrameIntercepter.run_operation_single_and_return(self, operator.abs)
120 |         
121 |     def __neg__(self):
122 |         return PandasDataFrameIntercepter.run_operation_single_and_return(self, operator.neg)
123 | 
124 |     def __invert__(self):
125 |         return PandasDataFrameIntercepter.run_operation_single_and_return(self, operator.invert)
126 | 
127 |     def __contains__(self, r):
128 |         return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.contains)
129 | 
130 |     # numeric 
131 |     def __add__(self, r):
132 |         return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.add)
133 |     
134 |     def __sub__(self, r):
135 |         return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.sub)
136 |     
137 |     def __mul__(self, r):
138 |         return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.mul)
139 |     
140 |     # def __matmul__(self, r):
141 |     #     return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.matmul)
142 | 
143 |     def __truediv__(self, r):
144 |         return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.truediv)
145 |     
146 |     def __floordiv__(self, r):
147 |         return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.floordiv)
148 |     
149 |     def __mod__(self, r):
150 |         return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.mod)
151 |     
152 |     def __pow__(self, r):
153 |         return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.pow)
154 |     
155 |     def __and__(self, r):
156 |         return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.and_)
157 |     
158 |     def __or__(self, r):
159 |         return PandasDataFrameIntercepter.run_operation_and_return(self, r, operator.or_)
160 | 
161 |     # numeric r
162 |     def __radd__(self, l):
163 |         return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.add)
164 |     
165 |     def __rsub__(self, l):
166 |         return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.sub)
167 |     
168 |     def __rmul__(self, l):
169 |         return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.mul)
170 |     
171 |     def __rmatmul__(self, l):
172 |         return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.matmul)
173 | 
174 |     def __rtruediv__(self, l):
175 |         return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.truediv)
176 |     
177 |     def __rfloordiv__(self, l):
178 |         return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.floordiv)
179 |     
180 |     def __rmod__(self, l):
181 |         return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.mod)
182 |     
183 |     def __rpow__(self, l):
184 |         return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.pow)
185 |     
186 |     def __rand__(self, l):
187 |         return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.and_)
188 |     
189 |     def __ror__(self, l):
190 |         return PandasDataFrameIntercepter.run_operation_and_return(l, self, operator.or_)
191 |   
192 |     
193 |     
194 |     
195 | 


--------------------------------------------------------------------------------
/pandas_to_sql/engine/table.py:
--------------------------------------------------------------------------------
  1 | from copy import copy
  2 | from pandas_to_sql.engine.columns.column import Column
  3 | from pandas_to_sql.engine.grouped_table import GroupedTable
  4 | from pandas_to_sql.engine.columns.common import get_column_class_from_type, create_column_from_value
  5 | 
  6 | 
  7 | class Table:
  8 |     table_name = None
  9 |     columns = None
 10 |     filters = None
 11 |     from_sql_string = None
 12 |     had_changed = None
 13 |     
 14 |     def __init__(self, table_name, columns, from_sql_string, filters,  had_changed):
 15 |         self.table_name = table_name
 16 |         self.columns = columns
 17 |         self.filters = filters
 18 |         self.from_sql_string = from_sql_string
 19 |         self.had_changed = had_changed
 20 | 
 21 |     def __getitem__(self, key):
 22 |         if isinstance(key, Column):
 23 |             if key.dtype != 'BOOL':
 24 |                 raise Exception('Can only filter/where using column of type BOOL. got %s' % (key.dtype))
 25 |             return self.where(key)
 26 |         if isinstance(key, list):
 27 |             if all(map(lambda x: isinstance(x, str), key)) == False:
 28 |                 raise Exception('List must be all strings. got %s' % (key))
 29 |             if all(map(lambda x: x in self.columns, key)) == False:
 30 |                 raise Exception('All columns names must be a column in the table. got %s' % (key))
 31 |             return self.select(key)
 32 | 
 33 |         c = copy(self.columns[key])
 34 |         return c
 35 | 
 36 |     def __setitem__(self, key, newvalue):
 37 |         if isinstance(newvalue, Column) or issubclass(type(newvalue), Column):
 38 |             self.columns[key] = newvalue
 39 |             self.had_changed = True
 40 |         else:            
 41 |             self.columns[key] = create_column_from_value(newvalue)
 42 |             self.had_changed = True
 43 |     
 44 |     def __getattr__(self, attribute_name):
 45 |         return self[attribute_name]
 46 |     
 47 |     def __copy__(self):
 48 |         columns_copy = {}
 49 |         for c in self.columns.keys(): 
 50 |             columns_copy[c] = self[c]  # column deep copy will occur in __getitem__
 51 |         
 52 |         filters_copy = []
 53 |         for f in self.filters: filters_copy.append(copy(f))
 54 | 
 55 |         result_table = create_table(table_name=self.table_name,
 56 |                              from_sql_string=self.from_sql_string, 
 57 |                              had_changed=self.had_changed,
 58 |                              columns=columns_copy,
 59 |                              filters=filters_copy)
 60 |         return result_table
 61 | 
 62 |     def reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill=''):
 63 |         return copy(self)
 64 |     
 65 |     def to_frame(self):
 66 |         return copy(self)
 67 | 
 68 |     def rename(self, columns):
 69 |         self.had_changed = True
 70 |         new_table = copy(self)
 71 |         new_columns = {}
 72 |         for col_name, col_value in new_table.columns.items():
 73 |             if col_name in columns.keys():
 74 |                 new_columns[columns[col_name]] = col_value
 75 |             else:
 76 |                 new_columns[col_name] = col_value
 77 | 
 78 |         new_table.columns = new_columns
 79 |         return new_table
 80 | 
 81 |     def drop(self, columns):
 82 |         self.had_changed = True
 83 |         new_table = copy(self)
 84 |         new_columns = { col_name: col_value 
 85 |             for col_name, col_value in new_table.columns.items() 
 86 |             if col_name not in columns }
 87 |         new_table.columns = new_columns
 88 |         return new_table
 89 | 
 90 |     def where(self, cond_column):
 91 |         self.had_changed = True
 92 |         new_table = copy(self)
 93 |         new_table.filters.append(cond_column)
 94 |         return new_table
 95 | 
 96 |     def select(self, columns_names):
 97 |         self.had_changed = True
 98 |         new_table = copy(self)
 99 |         # filter only selected columns from columns dictionary
100 |         new_table.columns = \
101 |             {col_name:col_val for (col_name, col_val) in new_table.columns.items() if col_name in columns_names}
102 |         return new_table
103 |     
104 |     def merge(self, right, how='inner', on=None, left_on=None, right_on=None):
105 |         if not isinstance(right, Table):
106 |             raise Exception("merge expects right to be of type: %s, got: %s" %  (str(type(Table)), str(type(right))))
107 |         if how not in ['left', 'inner']:
108 |             raise Exception("merge 'how' value must be in [‘left’, ‘inner’]")
109 | 
110 |         left = copy(self)
111 |         right = copy(right)
112 |         if len(set(left.columns.keys()) & set(right.columns.keys())) > 1:
113 |             raise Exception("merge got duplicates columns in both tables (except 'on' value)")
114 |         
115 |         left_on_column = None
116 |         right_on_column = None
117 |         if on and not left_on and not right_on:
118 |             left_on_column = on
119 |             right_on_column = on
120 |         elif left_on and right_on and not on:
121 |             left_on_column = left_on
122 |             right_on_column = right_on
123 |         else:
124 |             raise Exception("got unexpected on/left_on/right_on values.")
125 | 
126 |         if not isinstance(left_on_column, str) or \
127 |             not isinstance(right_on_column, str):
128 |             raise Exception("'on/left_on/right_on' must be str")
129 |         
130 |         if left_on_column not in left.columns or right_on_column not in right.columns:
131 |             raise Exception("merge 'on/left_on/right_on' value must be in both tables as column")
132 |         
133 |         left_columns = dict(zip(left.columns.keys(), map(lambda x: left[x], left.columns.keys())))
134 |         right_columns = dict(zip(right.columns.keys(), map(lambda x: right[x], right.columns.keys())))
135 | 
136 |         # creating new table columns
137 |         if left_on_column == right_on_column:
138 |             right_columns.pop(on)
139 |         new_table_columns = {**left_columns, **right_columns}
140 | 
141 |         # creating new table sql string
142 |         single_select_field_format = 't1.%s AS %s'
143 |         selected_fields_left = ', '.join(list(map(lambda x: single_select_field_format % (x, x), left_columns.keys())))
144 |         
145 |         single_select_field_format = 't2.%s AS %s'
146 |         selected_fields_right = ', '.join(list(map(lambda x: single_select_field_format % (x, x), right_columns.keys())))
147 |         
148 |         selected_fields = selected_fields_left
149 |         if selected_fields_right:
150 |             selected_fields += ', ' + selected_fields_right
151 |         
152 |         new_table_sql_string = f'SELECT {selected_fields} FROM ({left.get_sql_string()}) AS t1 {how.upper()} JOIN ({right.get_sql_string()}) AS t2 ON t1.{left_on_column}=t2.{right_on_column}'
153 |         
154 |         return create_table(table_name='Temp',
155 |                     columns=new_table_columns,
156 |                     from_sql_string=new_table_sql_string)
157 | 
158 |     def groupby(self, by):
159 |         def __get_column_key(col):
160 |             for k in self.columns.keys():
161 |                 if self.columns[k].sql_string==col.sql_string: return k
162 |             raise Exception('groupby got column that is not in table')
163 |                 
164 |         groupings = None
165 |         if isinstance(by, str):
166 |             groupings = {by:self[by]}
167 |         elif isinstance(by, Column):
168 |             groupings = {__get_column_key(by): copy(by)}
169 |         elif isinstance(by, list):
170 |             groupings = {}
171 |             for b in by:
172 |                 if isinstance(b, str): groupings[b] = self[b]
173 |                 elif isinstance(b, Column): groupings[__get_column_key(by)] = copy(b)
174 |                 else: raise Exception(f'groupby got unexpected type. expect str or column, got: {str(type(b))}')
175 |         else:
176 |             raise Exception("groupby 'by' value must be str OR list[str] OR Column OR list[Column]")
177 |         
178 |         return GroupedTable(copy(self), groupings=groupings)
179 |         
180 |     def get_sql_string(self):
181 |         if self.from_sql_string and not self.had_changed:
182 |             return self.from_sql_string
183 |         
184 |         from_field = None
185 |         selected_fields = None
186 |         if self.from_sql_string:
187 |             from_field = f'({self.from_sql_string}) AS {self.table_name}'
188 |         else:
189 |             from_field = self.table_name
190 | 
191 |         single_select_field_format = '(%s) AS %s'
192 |         selected_fields = ', '.join(list(map(lambda x: single_select_field_format % (self[x].sql_string, x), self.columns.keys())))
193 | 
194 |         single_where_field_format = '(%s)'
195 |         where_cond = ' AND '.join(list(map(lambda c: single_where_field_format % (c.sql_string), self.filters)))
196 |         
197 |         if where_cond:
198 |             return f'SELECT {selected_fields} FROM {from_field} WHERE {where_cond} '
199 |         else:
200 |             return f'SELECT {selected_fields} FROM {from_field}'
201 | 
202 | 
203 | 
204 | 
205 | def create_table_from_schema(table_name, schema) -> Table:
206 |     columns = {}
207 |     for column_name in schema.keys():
208 |         columns[column_name] = get_column_class_from_type(schema[column_name])(sql_string=column_name)
209 |     return create_table(table_name=table_name, columns=columns)
210 | 
211 | def create_table(table_name, columns={}, from_sql_string=None, filters=[], had_changed=False) -> Table:
212 |     return Table(
213 |         table_name=table_name,
214 |         columns=columns,
215 |         from_sql_string=from_sql_string,
216 |         filters=filters,
217 |         had_changed=had_changed)


--------------------------------------------------------------------------------
/pandas_to_sql_colab_example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "pandas_to_sql_colab_example.ipynb",
  7 |       "provenance": [],
  8 |       "collapsed_sections": [],
  9 |       "authorship_tag": "ABX9TyPTBsf7gZggRD828S1nx250",
 10 |       "include_colab_link": true
 11 |     },
 12 |     "kernelspec": {
 13 |       "name": "python3",
 14 |       "display_name": "Python 3"
 15 |     }
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "view-in-github",
 22 |         "colab_type": "text"
 23 |       },
 24 |       "source": [
 25 |         "<a href=\"https://colab.research.google.com/github/AmirPupko/pandas-to-sql/blob/main/pandas_to_sql_colab_example.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "cell_type": "code",
 30 |       "metadata": {
 31 |         "id": "5oTOIl8oHBhe",
 32 |         "colab": {
 33 |           "base_uri": "https://localhost:8080/"
 34 |         },
 35 |         "outputId": "1f6d0bea-3f84-43d3-d43a-ddb55596e920"
 36 |       },
 37 |       "source": [
 38 |         "!pip install pandas-to-sql -U"
 39 |       ],
 40 |       "execution_count": 1,
 41 |       "outputs": [
 42 |         {
 43 |           "output_type": "stream",
 44 |           "text": [
 45 |             "Requirement already up-to-date: pandas-to-sql in /usr/local/lib/python3.6/dist-packages (0.0.546)\n"
 46 |           ],
 47 |           "name": "stdout"
 48 |         }
 49 |       ]
 50 |     },
 51 |     {
 52 |       "cell_type": "code",
 53 |       "metadata": {
 54 |         "id": "sGSsvHC8HaQ0"
 55 |       },
 56 |       "source": [
 57 |         "from copy import copy\r\n",
 58 |         "import sqlite3\r\n",
 59 |         "import pandas as pd\r\n",
 60 |         "import pandas_to_sql\r\n",
 61 |         "from pandas_to_sql import conventions"
 62 |       ],
 63 |       "execution_count": 2,
 64 |       "outputs": []
 65 |     },
 66 |     {
 67 |       "cell_type": "code",
 68 |       "metadata": {
 69 |         "id": "NexlwrknMQGS",
 70 |         "colab": {
 71 |           "base_uri": "https://localhost:8080/",
 72 |           "height": 110
 73 |         },
 74 |         "outputId": "3e8f6560-0d38-4ca2-b728-15c0b44dbe69"
 75 |       },
 76 |       "source": [
 77 |         "iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')\r\n",
 78 |         "table_name = 'iris'\r\n",
 79 |         "sql_connection = sqlite3.connect('./iris.db') #create db\r\n",
 80 |         "iris.to_sql(table_name, sql_connection, if_exists='replace', index=False)\r\n",
 81 |         "iris[:2]"
 82 |       ],
 83 |       "execution_count": 3,
 84 |       "outputs": [
 85 |         {
 86 |           "output_type": "execute_result",
 87 |           "data": {
 88 |             "text/html": [
 89 |               "<div>\n",
 90 |               "<style scoped>\n",
 91 |               "    .dataframe tbody tr th:only-of-type {\n",
 92 |               "        vertical-align: middle;\n",
 93 |               "    }\n",
 94 |               "\n",
 95 |               "    .dataframe tbody tr th {\n",
 96 |               "        vertical-align: top;\n",
 97 |               "    }\n",
 98 |               "\n",
 99 |               "    .dataframe thead th {\n",
100 |               "        text-align: right;\n",
101 |               "    }\n",
102 |               "</style>\n",
103 |               "<table border=\"1\" class=\"dataframe\">\n",
104 |               "  <thead>\n",
105 |               "    <tr style=\"text-align: right;\">\n",
106 |               "      <th></th>\n",
107 |               "      <th>sepal_length</th>\n",
108 |               "      <th>sepal_width</th>\n",
109 |               "      <th>petal_length</th>\n",
110 |               "      <th>petal_width</th>\n",
111 |               "      <th>species</th>\n",
112 |               "    </tr>\n",
113 |               "  </thead>\n",
114 |               "  <tbody>\n",
115 |               "    <tr>\n",
116 |               "      <th>0</th>\n",
117 |               "      <td>5.1</td>\n",
118 |               "      <td>3.5</td>\n",
119 |               "      <td>1.4</td>\n",
120 |               "      <td>0.2</td>\n",
121 |               "      <td>setosa</td>\n",
122 |               "    </tr>\n",
123 |               "    <tr>\n",
124 |               "      <th>1</th>\n",
125 |               "      <td>4.9</td>\n",
126 |               "      <td>3.0</td>\n",
127 |               "      <td>1.4</td>\n",
128 |               "      <td>0.2</td>\n",
129 |               "      <td>setosa</td>\n",
130 |               "    </tr>\n",
131 |               "  </tbody>\n",
132 |               "</table>\n",
133 |               "</div>"
134 |             ],
135 |             "text/plain": [
136 |               "   sepal_length  sepal_width  petal_length  petal_width species\n",
137 |               "0           5.1          3.5           1.4          0.2  setosa\n",
138 |               "1           4.9          3.0           1.4          0.2  setosa"
139 |             ]
140 |           },
141 |           "metadata": {
142 |             "tags": []
143 |           },
144 |           "execution_count": 3
145 |         }
146 |       ]
147 |     },
148 |     {
149 |       "cell_type": "code",
150 |       "metadata": {
151 |         "id": "dlK3PNWBMPXc"
152 |       },
153 |       "source": [
154 |         "df = pandas_to_sql.wrap_df(iris, table_name)\r\n",
155 |         "pd_wrapped = pandas_to_sql.wrap_pd(pd)"
156 |       ],
157 |       "execution_count": 4,
158 |       "outputs": []
159 |     },
160 |     {
161 |       "cell_type": "code",
162 |       "metadata": {
163 |         "id": "cUwu1LtnVCXY",
164 |         "colab": {
165 |           "base_uri": "https://localhost:8080/",
166 |           "height": 53
167 |         },
168 |         "outputId": "6463e2a7-f4b2-4fda-a064-658e4b52b4a9"
169 |       },
170 |       "source": [
171 |         "df.get_sql_string()"
172 |       ],
173 |       "execution_count": 5,
174 |       "outputs": [
175 |         {
176 |           "output_type": "execute_result",
177 |           "data": {
178 |             "application/vnd.google.colaboratory.intrinsic+json": {
179 |               "type": "string"
180 |             },
181 |             "text/plain": [
182 |               "'SELECT (sepal_length) AS sepal_length, (sepal_width) AS sepal_width, (petal_length) AS petal_length, (petal_width) AS petal_width, (species) AS species FROM iris'"
183 |             ]
184 |           },
185 |           "metadata": {
186 |             "tags": []
187 |           },
188 |           "execution_count": 5
189 |         }
190 |       ]
191 |     },
192 |     {
193 |       "cell_type": "code",
194 |       "metadata": {
195 |         "id": "0tQZAfyLMtDB",
196 |         "colab": {
197 |           "base_uri": "https://localhost:8080/",
198 |           "height": 53
199 |         },
200 |         "outputId": "b68d70a9-fd51-4fe7-ee68-4b56830762ab"
201 |       },
202 |       "source": [
203 |         "species_petal_length_stats_df = df.groupby('species').agg({'petal_length':['mean','sum','count']})\r\n",
204 |         "species_petal_length_stats_df = conventions.flatten_grouped_dataframe(species_petal_length_stats_df)\r\n",
205 |         "\r\n",
206 |         "species_petal_length_stats_df.get_sql_string()\r\n"
207 |       ],
208 |       "execution_count": 6,
209 |       "outputs": [
210 |         {
211 |           "output_type": "execute_result",
212 |           "data": {
213 |             "application/vnd.google.colaboratory.intrinsic+json": {
214 |               "type": "string"
215 |             },
216 |             "text/plain": [
217 |               "'SELECT (avg(petal_length)) AS petal_length_mean, (sum(petal_length)) AS petal_length_sum, (count(petal_length)) AS petal_length_count, (species) AS species FROM iris GROUP BY species'"
218 |             ]
219 |           },
220 |           "metadata": {
221 |             "tags": []
222 |           },
223 |           "execution_count": 6
224 |         }
225 |       ]
226 |     },
227 |     {
228 |       "cell_type": "code",
229 |       "metadata": {
230 |         "id": "p9YLqZ7EMs9V",
231 |         "colab": {
232 |           "base_uri": "https://localhost:8080/",
233 |           "height": 53
234 |         },
235 |         "outputId": "fcf3e396-007c-48f4-815b-e611b5628da8"
236 |       },
237 |       "source": [
238 |         "df[(df.petal_length>1.4) & (df.petal_width<.2)].get_sql_string()"
239 |       ],
240 |       "execution_count": 7,
241 |       "outputs": [
242 |         {
243 |           "output_type": "execute_result",
244 |           "data": {
245 |             "application/vnd.google.colaboratory.intrinsic+json": {
246 |               "type": "string"
247 |             },
248 |             "text/plain": [
249 |               "'SELECT (sepal_length) AS sepal_length, (sepal_width) AS sepal_width, (petal_length) AS petal_length, (petal_width) AS petal_width, (species) AS species FROM iris WHERE (((petal_length > 1.4) AND (petal_width < 0.2))) '"
250 |             ]
251 |           },
252 |           "metadata": {
253 |             "tags": []
254 |           },
255 |           "execution_count": 7
256 |         }
257 |       ]
258 |     },
259 |     {
260 |       "cell_type": "code",
261 |       "metadata": {
262 |         "id": "9DfYy1SeMs40",
263 |         "colab": {
264 |           "base_uri": "https://localhost:8080/",
265 |           "height": 141
266 |         },
267 |         "outputId": "821d7ea9-6f30-4fea-f1d3-4fcad1e23613"
268 |       },
269 |       "source": [
270 |         "df_ = copy(df)\r\n",
271 |         "df_['sepal_width_rounded'] = df_.sepal_width.round()\r\n",
272 |         "df_1 = df_[df_.species=='setosa'].reset_index(drop=True)\r\n",
273 |         "df_2 = df_[df_.species=='versicolor'].reset_index(drop=True)\r\n",
274 |         "pd_wrapped.concat([df_1, df_2]).get_sql_string()"
275 |       ],
276 |       "execution_count": 8,
277 |       "outputs": [
278 |         {
279 |           "output_type": "execute_result",
280 |           "data": {
281 |             "application/vnd.google.colaboratory.intrinsic+json": {
282 |               "type": "string"
283 |             },
284 |             "text/plain": [
285 |               "\"SELECT (sepal_length) AS sepal_length, (sepal_width) AS sepal_width, (petal_length) AS petal_length, (petal_width) AS petal_width, (species) AS species, ((CASE WHEN ((ABS(sepal_width) - ROUND(ABS(sepal_width)-0.5))==.5) AND ((CAST(sepal_width AS INT))%2 == 0) THEN (CASE WHEN sepal_width>0 THEN ROUND(sepal_width-0.001) ELSE ROUND(sepal_width+0.001) END) ELSE (ROUND(sepal_width)) END)) AS sepal_width_rounded FROM iris WHERE ((species = 'setosa'))  UNION ALL SELECT (sepal_length) AS sepal_length, (sepal_width) AS sepal_width, (petal_length) AS petal_length, (petal_width) AS petal_width, (species) AS species, ((CASE WHEN ((ABS(sepal_width) - ROUND(ABS(sepal_width)-0.5))==.5) AND ((CAST(sepal_width AS INT))%2 == 0) THEN (CASE WHEN sepal_width>0 THEN ROUND(sepal_width-0.001) ELSE ROUND(sepal_width+0.001) END) ELSE (ROUND(sepal_width)) END)) AS sepal_width_rounded FROM iris WHERE ((species = 'versicolor')) \""
286 |             ]
287 |           },
288 |           "metadata": {
289 |             "tags": []
290 |           },
291 |           "execution_count": 8
292 |         }
293 |       ]
294 |     },
295 |     {
296 |       "cell_type": "code",
297 |       "metadata": {
298 |         "id": "N5tjEM2tMszV"
299 |       },
300 |       "source": [
301 |         "df_ = copy(df)\r\n",
302 |         "df_['sepal_width_rounded'] = df_.sepal_width.round()\r\n",
303 |         "df_1 = df_[df_.species=='setosa'].reset_index(drop=True)\r\n",
304 |         "df_2 = df_[df_.species=='versicolor'].reset_index(drop=True)\r\n",
305 |         "\r\n",
306 |         "some_df = pd_wrapped.concat([df_1, df_2])\r\n",
307 |         "\r\n",
308 |         "sql_string = some_df.get_sql_string()\r\n",
309 |         "\r\n",
310 |         "df_from_sql_database = pd.read_sql_query(sql_string, sql_connection)\r\n",
311 |         "df_pandas = some_df.df_pandas\r\n",
312 |         "\r\n",
313 |         "from pandas_to_sql.testing.utils.asserters import assert_dataframes_equals\r\n",
314 |         "assert_dataframes_equals(df_pandas, df_from_sql_database)"
315 |       ],
316 |       "execution_count": 9,
317 |       "outputs": []
318 |     },
319 |     {
320 |       "cell_type": "code",
321 |       "metadata": {
322 |         "id": "Xdq5qc-ZMssZ"
323 |       },
324 |       "source": [
325 |         ""
326 |       ],
327 |       "execution_count": 9,
328 |       "outputs": []
329 |     }
330 |   ]
331 | }


--------------------------------------------------------------------------------