├── .gitignore ├── LICENSE ├── README.md ├── alembic.ini ├── db ├── migrations.db └── pandas_oop.db ├── migrations ├── README ├── env.py ├── script.py.mako └── versions │ ├── 2f81577f200c_first_revision.py │ ├── 5be67895ab4d_revision_4.py │ ├── cb6921b84bf1_revision_3.py │ └── d9d3205a5cf1_revision_2.py ├── pyproject.toml ├── requirements-dev.txt ├── setup.py ├── src ├── __init__.py └── pandas_oop │ ├── __init__.py │ ├── _decorators.py │ ├── custom_exceptions.py │ ├── fields.py │ └── models.py ├── static ├── data │ ├── cars.csv │ ├── lot_of_people.csv │ ├── people.csv │ └── people_jobs.csv └── images │ ├── df.png │ ├── poop.jpg │ ├── poop.png │ └── poop_sticker.png └── tests ├── __init__.py ├── test_dataframe_behavior.py ├── test_db_migrations_and_sqlalchemy_behavior.py ├── test_models_declaration.py └── test_sql_operations.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | .idea/ 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 The Python Packaging Authority 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![image](static/images/poop_sticker.png) 2 | # Pandas-Oop (not maintained, see https://github.com/MayasMess/panorma it's simpler) 3 | (Also known as Poop), is a package that uses Pandas dataframes with object oriented programming style 4 | 5 | Installation: 6 | - 7 | 8 | ```shell script 9 | pip install pandas-oop 10 | ``` 11 | 12 | Some examples 13 | - 14 | 15 | ```python 16 | from pandas_oop import models 17 | from pandas_oop.fields import StringColumn, IntegerColumn, FloatColumn, DateColumn, BoolColumn 18 | ``` 19 | ```python 20 | DB_CONNECTION = models.Connection('sqlite:///pandas_oop.db') # this is the same con_string for sqlalchemy engine 21 | ``` 22 | ```python 23 | @models.sql(table='people', con=DB_CONNECTION) # Use this decorator if you want to connect your class to a database 24 | @models.Data 25 | class People(models.DataFrame): 26 | name = StringColumn(unique=True) 27 | age = IntegerColumn() 28 | money = FloatColumn(target_name="coins") # target_name if the name in the csv or table is coins and you want to have a different variable name 29 | insertion_date = DateColumn(format='%d-%m-%Y') 30 | is_staff = BoolColumn(true='yes', false='no') 31 | ``` 32 | 33 | Now when instantiating this class, it will return a custom dataframe with all the functionalities of a Pandas 34 | dataframe and some others 35 | 36 | ```python 37 | people = People() 38 | """-----------------------------------------------------------""" 39 | people = People(from_csv=DATA_FILE, delimiter=";") 40 | """-----------------------------------------------------------""" 41 | people = People(from_sql_query='select * from people') 42 | """-----------------------------------------------------------""" 43 | people = People(from_df=some_dataframe) 44 | """-----------------------------------------------------------""" 45 | people = People(from_iterator=some_function_that_yield_values) 46 | """-----------------------------------------------------------""" 47 | for people_chunk in People(from_csv=DATA_FILE, delimiter=";", chunksize=10): 48 | ... 49 | ``` 50 | example of function that yield values: 51 | 52 | ```python 53 | def some_function_that_yield_values(): 54 | while something: 55 | ... 56 | yield name, age, money, insertion_date, is_staff 57 | ``` 58 | 59 | ![image](static/images/df.png) 60 | 61 | You can also save it to the database with the save() method (if the dtypes of the columns change, this will raise a 62 | ValidationError): 63 | 64 | ```python 65 | people.save() 66 | ``` 67 | 68 | You can upsert to the database and this will automatically look at the unique fields that were declared in the class 69 | 70 | ```python 71 | people.save(if_row_exists='update') 72 | or 73 | people.save(if_row_exists='ignore') 74 | ``` 75 | 76 | If you want to revalidate your dataframe (convert the columns dtypes to the type that was declared in the class), you can 77 | call the validate() method: 78 | 79 | ```python 80 | people.validate() 81 | ``` 82 | 83 | You can also validate from another class. For example, you can do something like this: 84 | 85 | ```python 86 | people = People(from_csv=DATA_FILE) 87 | jobs = Jobs(from_sql_query='select * from jobs') 88 | people_with_jobs = people.merge(jobs, on='name').validate(from_class=PeopleWithJobs) 89 | ``` 90 | 91 | This is the list of the overriten methods that return a pandas_oop custom dataframe 92 | - 'isnull' 93 | - 'head' 94 | - 'abs' 95 | - 'merge' 96 | - 'loc' and dataframe slicing 97 | 98 | I will add more and more methods on this list. 99 | 100 | 101 | New features 102 | - 103 | Alembic Database migration support added: 104 | - On your main application package, import Base (this is a declarative_base from sqlalchemy) 105 | ```python 106 | from pandas_oop import Base 107 | ``` 108 | - Add this configuration on the env.py file of your alembic config 109 | ```python 110 | from your_app import Base 111 | target_metadata = Base.metadata 112 | ``` 113 | - And finaly, update your database url on your alembic.ini file 114 | -------------------------------------------------------------------------------- /alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = migrations 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # sys.path path, will be prepended to sys.path if present. 11 | # defaults to the current working directory. 12 | prepend_sys_path = . 13 | 14 | # timezone to use when rendering the date within the migration file 15 | # as well as the filename. 16 | # If specified, requires the python-dateutil library that can be 17 | # installed by adding `alembic[tz]` to the pip requirements 18 | # string value is passed to dateutil.tz.gettz() 19 | # leave blank for localtime 20 | # timezone = 21 | 22 | # max length of characters to apply to the 23 | # "slug" field 24 | # truncate_slug_length = 40 25 | 26 | # set to 'true' to run the environment during 27 | # the 'revision' command, regardless of autogenerate 28 | # revision_environment = false 29 | 30 | # set to 'true' to allow .pyc and .pyo files without 31 | # a source .py file to be detected as revisions in the 32 | # versions/ directory 33 | # sourceless = false 34 | 35 | # version location specification; This defaults 36 | # to migrations/versions. When using multiple version 37 | # directories, initial revisions must be specified with --version-path. 38 | # The path separator used here should be the separator specified by "version_path_separator" below. 39 | # version_locations = %(here)s/bar:%(here)s/bat:migrations/versions 40 | 41 | # version path separator; As mentioned above, this is the character used to split 42 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. 43 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. 44 | # Valid values for version_path_separator are: 45 | # 46 | # version_path_separator = : 47 | # version_path_separator = ; 48 | # version_path_separator = space 49 | version_path_separator = os # Use os.pathsep. Default configuration used for new projects. 50 | 51 | # the output encoding used when revision files 52 | # are written from script.py.mako 53 | # output_encoding = utf-8 54 | 55 | sqlalchemy.url = sqlite:///db/migrations.db 56 | 57 | 58 | [post_write_hooks] 59 | # post_write_hooks defines scripts or Python functions that are run 60 | # on newly generated revision scripts. See the documentation for further 61 | # detail and examples 62 | 63 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 64 | # hooks = black 65 | # black.type = console_scripts 66 | # black.entrypoint = black 67 | # black.options = -l 79 REVISION_SCRIPT_FILENAME 68 | 69 | # Logging configuration 70 | [loggers] 71 | keys = root,sqlalchemy,alembic 72 | 73 | [handlers] 74 | keys = console 75 | 76 | [formatters] 77 | keys = generic 78 | 79 | [logger_root] 80 | level = WARN 81 | handlers = console 82 | qualname = 83 | 84 | [logger_sqlalchemy] 85 | level = WARN 86 | handlers = 87 | qualname = sqlalchemy.engine 88 | 89 | [logger_alembic] 90 | level = INFO 91 | handlers = 92 | qualname = alembic 93 | 94 | [handler_console] 95 | class = StreamHandler 96 | args = (sys.stderr,) 97 | level = NOTSET 98 | formatter = generic 99 | 100 | [formatter_generic] 101 | format = %(levelname)-5.5s [%(name)s] %(message)s 102 | datefmt = %H:%M:%S 103 | -------------------------------------------------------------------------------- /db/migrations.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/db/migrations.db -------------------------------------------------------------------------------- /db/pandas_oop.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/db/pandas_oop.db -------------------------------------------------------------------------------- /migrations/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /migrations/env.py: -------------------------------------------------------------------------------- 1 | from logging.config import fileConfig 2 | 3 | from sqlalchemy import engine_from_config 4 | from sqlalchemy import pool 5 | 6 | from alembic import context 7 | 8 | # this is the Alembic Config object, which provides 9 | # access to the values within the .ini file in use. 10 | config = context.config 11 | 12 | # Interpret the config file for Python logging. 13 | # This line sets up loggers basically. 14 | if config.config_file_name is not None: 15 | fileConfig(config.config_file_name) 16 | 17 | # add your model's MetaData object here 18 | # for 'autogenerate' support 19 | from tests.test_db_migrations_and_sqlalchemy_behavior import Base 20 | target_metadata = Base.metadata 21 | # target_metadata = None 22 | 23 | # other values from the config, defined by the needs of env.py, 24 | # can be acquired: 25 | # my_important_option = config.get_main_option("my_important_option") 26 | # ... etc. 27 | 28 | 29 | def run_migrations_offline(): 30 | """Run migrations in 'offline' mode. 31 | 32 | This configures the context with just a URL 33 | and not an Engine, though an Engine is acceptable 34 | here as well. By skipping the Engine creation 35 | we don't even need a DBAPI to be available. 36 | 37 | Calls to context.execute() here emit the given string to the 38 | script output. 39 | 40 | """ 41 | url = config.get_main_option("sqlalchemy.url") 42 | context.configure( 43 | url=url, 44 | target_metadata=target_metadata, 45 | literal_binds=True, 46 | dialect_opts={"paramstyle": "named"}, 47 | ) 48 | 49 | with context.begin_transaction(): 50 | context.run_migrations() 51 | 52 | 53 | def run_migrations_online(): 54 | """Run migrations in 'online' mode. 55 | 56 | In this scenario we need to create an Engine 57 | and associate a connection with the context. 58 | 59 | """ 60 | connectable = engine_from_config( 61 | config.get_section(config.config_ini_section), 62 | prefix="sqlalchemy.", 63 | poolclass=pool.NullPool, 64 | ) 65 | 66 | with connectable.connect() as connection: 67 | context.configure( 68 | connection=connection, target_metadata=target_metadata 69 | ) 70 | 71 | with context.begin_transaction(): 72 | context.run_migrations() 73 | 74 | 75 | if context.is_offline_mode(): 76 | run_migrations_offline() 77 | else: 78 | run_migrations_online() 79 | -------------------------------------------------------------------------------- /migrations/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade(): 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade(): 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /migrations/versions/2f81577f200c_first_revision.py: -------------------------------------------------------------------------------- 1 | """first revision 2 | 3 | Revision ID: 2f81577f200c 4 | Revises: 5 | Create Date: 2022-04-09 02:23:54.004780 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '2f81577f200c' 14 | down_revision = None 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('T_Contacts', 22 | sa.Column('id', sa.Integer(), nullable=False), 23 | sa.Column('firstName', sa.Text(), nullable=True), 24 | sa.Column('lastName', sa.Text(), nullable=True), 25 | sa.PrimaryKeyConstraint('id') 26 | ) 27 | op.create_table('people_migrations', 28 | sa.Column('name', sa.Text(), nullable=True), 29 | sa.Column('id', sa.Integer(), nullable=False), 30 | sa.PrimaryKeyConstraint('id') 31 | ) 32 | # ### end Alembic commands ### 33 | 34 | 35 | def downgrade(): 36 | # ### commands auto generated by Alembic - please adjust! ### 37 | op.drop_table('people_migrations') 38 | op.drop_table('T_Contacts') 39 | # ### end Alembic commands ### 40 | -------------------------------------------------------------------------------- /migrations/versions/5be67895ab4d_revision_4.py: -------------------------------------------------------------------------------- 1 | """revision 4 2 | 3 | Revision ID: 5be67895ab4d 4 | Revises: cb6921b84bf1 5 | Create Date: 2022-04-09 14:01:11.578570 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '5be67895ab4d' 14 | down_revision = 'cb6921b84bf1' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.add_column('people_migrations_with_multiple_pk', sa.Column('money', sa.Float(), nullable=True)) 22 | # ### end Alembic commands ### 23 | 24 | 25 | def downgrade(): 26 | # ### commands auto generated by Alembic - please adjust! ### 27 | op.drop_column('people_migrations_with_multiple_pk', 'money') 28 | # ### end Alembic commands ### 29 | -------------------------------------------------------------------------------- /migrations/versions/cb6921b84bf1_revision_3.py: -------------------------------------------------------------------------------- 1 | """revision 3 2 | 3 | Revision ID: cb6921b84bf1 4 | Revises: d9d3205a5cf1 5 | Create Date: 2022-04-09 14:00:23.462587 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'cb6921b84bf1' 14 | down_revision = 'd9d3205a5cf1' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('people_migrations_with_multiple_pk', 22 | sa.Column('name', sa.Text(), nullable=False), 23 | sa.Column('age', sa.Integer(), nullable=False), 24 | sa.PrimaryKeyConstraint('name', 'age') 25 | ) 26 | # ### end Alembic commands ### 27 | 28 | 29 | def downgrade(): 30 | # ### commands auto generated by Alembic - please adjust! ### 31 | op.drop_table('people_migrations_with_multiple_pk') 32 | # ### end Alembic commands ### 33 | -------------------------------------------------------------------------------- /migrations/versions/d9d3205a5cf1_revision_2.py: -------------------------------------------------------------------------------- 1 | """revision 2 2 | 3 | Revision ID: d9d3205a5cf1 4 | Revises: 2f81577f200c 5 | Create Date: 2022-04-09 13:41:26.003285 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = 'd9d3205a5cf1' 14 | down_revision = '2f81577f200c' 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('people_migrations_with_pk', 22 | sa.Column('name', sa.Text(), nullable=False), 23 | sa.Column('age', sa.Integer(), nullable=True), 24 | sa.PrimaryKeyConstraint('name') 25 | ) 26 | # ### end Alembic commands ### 27 | 28 | 29 | def downgrade(): 30 | # ### commands auto generated by Alembic - please adjust! ### 31 | op.drop_table('people_migrations_with_pk') 32 | # ### end Alembic commands ### 33 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42", "pandas>=1.3.0"] 3 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pandas==1.4.1 2 | pangres==4.1.1 3 | sqlalchemy==1.4.34 4 | alembic==1.7.7 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="pandas-oop", 8 | version="0.9.6", 9 | author="Mayas Nova", 10 | author_email="test@test.com", 11 | description="Pandas dataframes with object oriented programming style", 12 | install_requires=["pandas", "pangres", "sqlalchemy"], 13 | keywords=["pandas", "oop", "dataframe", "poop"], 14 | long_description=long_description, 15 | long_description_content_type="text/markdown", 16 | url="https://github.com/MayasMess/pandas-oop", 17 | project_urls={ 18 | "Bug Tracker": "https://github.com/MayasMess/pandas-oop/issues", 19 | }, 20 | classifiers=[ 21 | "Programming Language :: Python :: 3", 22 | "License :: OSI Approved :: MIT License", 23 | "Operating System :: OS Independent", 24 | ], 25 | package_dir={"": "src"}, 26 | packages=setuptools.find_packages(where="src"), 27 | python_requires=">=3.6", 28 | ) 29 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/src/__init__.py -------------------------------------------------------------------------------- /src/pandas_oop/__init__.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.ext.declarative import declarative_base 2 | 3 | Base = declarative_base() -------------------------------------------------------------------------------- /src/pandas_oop/_decorators.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from pandas.core.generic import NDFrame 3 | from pandas.core.frame import DataFrame 4 | 5 | from sqlalchemy import Column, Integer 6 | 7 | from . import Base 8 | 9 | 10 | # this methods will return a pandas_oop.models.DataFrame 11 | METHODS_TO_OVERRIDE = [ 12 | 'isnull', 13 | 'head', 14 | 'abs', 15 | 'merge', 16 | ] 17 | 18 | 19 | def _decorate_all_methods(method_decorator): 20 | def decorator(cls): 21 | _classes = [DataFrame, NDFrame] 22 | method_to_override = {} 23 | for _class in _classes: 24 | for name, obj in vars(_class).items(): 25 | if callable(obj) and name in METHODS_TO_OVERRIDE: 26 | method_to_override[name] = obj.__annotations__ 27 | setattr(cls, name, method_decorator(obj, cls)) 28 | return cls 29 | return decorator 30 | 31 | 32 | def _return_custom_df_on_call(func, cls=None): 33 | @wraps(func) 34 | def wrapper(*args, **kwargs): 35 | res = func(*args, **kwargs) 36 | return cls.generic_overrider(res, args[0]) 37 | return wrapper 38 | 39 | 40 | def init_sqlalchemy_class(func): 41 | # Init sqlalchemy class. (this is used for migration detection) 42 | attr_sqlalchemy_dict = {data_type.name: data_type.col_obj_series.sqlalchemy_column 43 | for data_type in func.data_types} 44 | attr_sqlalchemy_dict['__tablename__'] = func.sql.get('table') 45 | func.index_list = [data_type.name 46 | for data_type in func.data_types 47 | if data_type.col_obj_series.kwargs.get('unique') is True] 48 | if not func.index_list: 49 | attr_sqlalchemy_dict['id'] = Column(Integer, primary_key=True) 50 | func.sqlalchemy_class = type(func.decorated_class.__name__, 51 | (Base,), 52 | attr_sqlalchemy_dict) 53 | return func 54 | 55 | 56 | def sql(**kwargs): 57 | """ 58 | Sql Decorator => just used to get arguments and init the sqlalchemy class to enable db migrations 59 | """ 60 | def wrapper(func): 61 | func.__setattr__('sql', kwargs) 62 | func = init_sqlalchemy_class(func=func) 63 | return func 64 | return wrapper 65 | -------------------------------------------------------------------------------- /src/pandas_oop/custom_exceptions.py: -------------------------------------------------------------------------------- 1 | class ValidationError(Exception): 2 | """Empty directory exception""" 3 | def __init__(self, msg): 4 | self.msg = msg 5 | super(ValidationError, self).__init__(msg) 6 | 7 | 8 | class MissingDecorator(Exception): 9 | """Empty directory exception""" 10 | def __init__(self, msg): 11 | self.msg = msg 12 | super(MissingDecorator, self).__init__(msg) 13 | 14 | 15 | class MissingArguments(Exception): 16 | """Empty directory exception""" 17 | def __init__(self, msg): 18 | self.msg = msg 19 | super(MissingArguments, self).__init__(msg) 20 | 21 | 22 | class MissingUniqueField(Exception): 23 | """Empty directory exception""" 24 | def __init__(self, msg): 25 | self.msg = msg 26 | super(MissingUniqueField, self).__init__(msg) 27 | -------------------------------------------------------------------------------- /src/pandas_oop/fields.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | 3 | import pandas as pd 4 | import numpy as np 5 | from sqlalchemy import Column, Text, Integer, Float, Date, Boolean 6 | 7 | 8 | class BaseColumn(pd.Series): 9 | def __init__(self, base_type, dtype, np_type, **kwargs): 10 | super().__init__(dtype=dtype) 11 | self.str_type = dtype 12 | self.np_type = np_type 13 | self.base_type = base_type 14 | self.kwargs = copy(kwargs) 15 | 16 | @staticmethod 17 | def init_sqlalchemy_column(sqlalchemy_col_type, **kwargs): 18 | kwargs['primary_key'] = kwargs.pop('unique', None) 19 | kwargs.pop('target_name', None) 20 | return Column(sqlalchemy_col_type, **kwargs) 21 | 22 | 23 | class StringColumn(BaseColumn): 24 | def __init__(self, **kwargs): 25 | super().__init__(base_type='object', dtype='object', np_type=np.str_, **kwargs) 26 | self.sqlalchemy_column = self.init_sqlalchemy_column(Text, **kwargs) 27 | 28 | 29 | class IntegerColumn(BaseColumn): 30 | def __init__(self, **kwargs): 31 | super().__init__(base_type='int', dtype='int64', np_type=np.int64, **kwargs) 32 | self.sqlalchemy_column = self.init_sqlalchemy_column(Integer, **kwargs) 33 | 34 | 35 | class FloatColumn(BaseColumn): 36 | def __init__(self, **kwargs): 37 | super().__init__(base_type='float', dtype='float64', np_type=np.float64, **kwargs) 38 | self.sqlalchemy_column = self.init_sqlalchemy_column(Float, **kwargs) 39 | 40 | 41 | class DateColumn(BaseColumn): 42 | def __init__(self, **kwargs): 43 | super().__init__(base_type='datetime', dtype='datetime64[ns]', np_type=np.datetime64, **kwargs) 44 | if kwargs.get('format') is not None: 45 | del kwargs['format'] 46 | self.sqlalchemy_column = self.init_sqlalchemy_column(Date, **kwargs) 47 | 48 | 49 | class BoolColumn(BaseColumn): 50 | def __init__(self, **kwargs): 51 | super().__init__(base_type='bool', dtype='bool', np_type=np.bool_, **kwargs) 52 | self.true_or_false = None 53 | if kwargs.get('true') is not None and kwargs.get('false') is not None: 54 | self.true_or_false = {kwargs.get('true'): True, kwargs.get('false'): False} 55 | del kwargs['true'] 56 | del kwargs['false'] 57 | self.sqlalchemy_column = self.init_sqlalchemy_column(Boolean, **kwargs) 58 | -------------------------------------------------------------------------------- /src/pandas_oop/models.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List 3 | import logging 4 | 5 | import pandas as pd 6 | from pandas._typing import NDFrameT 7 | from pandas.io.parsers.readers import TextFileReader 8 | from pangres import upsert 9 | import numpy as np 10 | import typing 11 | 12 | from sqlalchemy import create_engine 13 | 14 | from ._decorators import _decorate_all_methods, _return_custom_df_on_call, sql 15 | from .custom_exceptions import ValidationError, MissingDecorator, MissingArguments, MissingUniqueField 16 | from . import Base 17 | 18 | 19 | @dataclass 20 | class DataFrameState: 21 | data_types: typing.Optional[list] = None 22 | index_list: typing.Optional[list] = None 23 | sql: typing.Optional[dict] = None 24 | class_name: typing.Optional = None 25 | decorated_class: typing.Optional = None 26 | sqlalchemy_class: Base = None 27 | 28 | 29 | @_decorate_all_methods(_return_custom_df_on_call) 30 | class DataFrame(pd.DataFrame): 31 | 32 | def __init__(self, from_df: pd.DataFrame = None, from_csv=None, from_sql_query=None, from_iterator=None, chunksize=None): 33 | super().__init__() 34 | self._dataframe_state = DataFrameState() 35 | self.__is_valide = False 36 | 37 | def is_valid(self) -> bool: 38 | if self._dataframe_state.data_types is None: 39 | self.__is_valide = True 40 | return self.__is_valide 41 | try: 42 | for data_type in self._dataframe_state.data_types: 43 | if data_type.base_type not in self[data_type.name].dtype.name: 44 | raise ValidationError( 45 | f"The column {data_type.name} is not of type {data_type.col_obj_series.dtype}") 46 | self.__is_valide = True 47 | return self.__is_valide 48 | except ValidationError as ve: 49 | logging.warning(ve.msg) 50 | return False 51 | 52 | def validate(self, from_class=None) -> 'DataFrame': 53 | if from_class is not None: 54 | self._dataframe_state = from_class().dataframe_state 55 | for data_type in self._dataframe_state.data_types: 56 | if data_type.str_type == 'datetime64[ns]': 57 | self[data_type.name] = pd.to_datetime(self[data_type.name], 58 | format=data_type.col_obj_series.kwargs.get('format')) 59 | else: 60 | self[data_type.name] = self[data_type.name].astype(data_type.str_type) 61 | self.is_valid() 62 | return self 63 | 64 | def save(self, *args, **kwargs) -> int: 65 | self.is_valid() 66 | self.is_sql_decorator_missing() 67 | if kwargs.get("if_row_exists") is not None: 68 | if self._dataframe_state.index_list is None or not self._dataframe_state.index_list: 69 | raise MissingUniqueField( 70 | 'Your class must contain one or multiple fields with the parameter "unique=True"') 71 | return upsert(df=self.set_index(self._dataframe_state.index_list), 72 | con=self.sql_engine, 73 | table_name=self.sql_table, **kwargs) 74 | return self.normal_save(*args, **kwargs) 75 | 76 | def normal_save(self, *args, **kwargs) -> int: 77 | kwargs['name'] = self.sql_table 78 | with self.sql_engine.connect() as con: 79 | kwargs['con'] = con 80 | if kwargs.get('if_exists') is None: 81 | kwargs['if_exists'] = 'append' 82 | elif kwargs.get('if_exists') == 'replace': 83 | raise TypeError(f'got an unexpected value "if_exists=replace". Please use a normal pandas dataframe ' 84 | f'to access this functionality') 85 | if kwargs.get('index') is None: 86 | kwargs['index'] = False 87 | elif kwargs.get('index') is True: 88 | return self.set_index(self._dataframe_state.index_list).to_sql(*args, **kwargs) 89 | return self.to_sql(*args, **kwargs) 90 | 91 | def is_sql_decorator_missing(self) -> None: 92 | if self._dataframe_state.sql is None: 93 | raise MissingDecorator("You have to decorate your class with models.sql") 94 | for key in self._dataframe_state.sql.keys(): 95 | if self._dataframe_state.sql.get(key) is None: 96 | raise MissingArguments("Missing arguments on models.sql decorator") 97 | 98 | def _take_with_is_copy(self, indices, axis=0) -> NDFrameT: 99 | """ 100 | Internal version of the `take` method that sets the `_is_copy` 101 | attribute to keep track of the parent dataframe (using in indexing 102 | for the SettingWithCopyWarning). 103 | 104 | See the docstring of `take` for full explanation of the parameters. 105 | """ 106 | result = self.generic_overrider(self.take(indices=indices, axis=axis), self) 107 | # Maybe set copy if we didn't actually change the index. 108 | if not result._get_axis(axis).equals(self._get_axis(axis)): 109 | result._set_is_copy(self) 110 | return result 111 | 112 | def _slice(self, slobj: slice, axis=0) -> NDFrameT: 113 | """ 114 | Construct a slice of this container. 115 | 116 | Slicing with this method is *always* positional. 117 | """ 118 | assert isinstance(slobj, slice), type(slobj) 119 | axis = self._get_block_manager_axis(axis) 120 | result = self._constructor(self._mgr.get_slice(slobj, axis=axis)) 121 | result = result.__finalize__(self) 122 | 123 | # this could be a view 124 | # but only in a single-dtyped view sliceable case 125 | is_copy = axis != 0 or result._is_view 126 | result._set_is_copy(self, copy=is_copy) 127 | return self.generic_overrider(result, self) 128 | 129 | @classmethod 130 | def generic_overrider(cls, df: pd.DataFrame, ct_df: 'DataFrame') -> 'DataFrame': 131 | new_custom_df = cls() 132 | new_custom_df._dataframe_state = ct_df.dataframe_state 133 | for col_name in df.columns: 134 | new_custom_df[col_name] = df[col_name] 135 | return new_custom_df 136 | 137 | @property 138 | def dataframe_state(self): 139 | return self._dataframe_state 140 | 141 | @property 142 | def sql_engine(self): 143 | return self._dataframe_state.sql.get('con').sql_engine 144 | 145 | @property 146 | def sql_table(self): 147 | return self._dataframe_state.sql.get('table') 148 | 149 | def __str__(self): 150 | return self._dataframe_state.class_name 151 | 152 | 153 | @dataclass 154 | class DataTypes: 155 | name: str 156 | base_type: str 157 | str_type: str 158 | np_type: np.generic 159 | col_obj_series: pd.Series 160 | target_name: str 161 | 162 | 163 | class Data: 164 | def __init__(self, decorated_class): 165 | """ 166 | This init function is called in the class definition 167 | """ 168 | self.decorated_class = decorated_class 169 | self.decorated_inst = self.decorated_class() 170 | self.df: typing.Optional[DataFrame] = None 171 | self.index_list: typing.Optional[list] = None 172 | self.sqlalchemy_class = None 173 | self.data_types: List[DataTypes] = [ 174 | DataTypes( 175 | name=attr_key, 176 | base_type=attr_val.base_type, 177 | str_type=attr_val.str_type, 178 | np_type=attr_val.np_type, 179 | col_obj_series=getattr(self.decorated_class, attr_key), 180 | target_name=attr_val.kwargs.get('target_name') if attr_val.kwargs.get('target_name') is not None else attr_key 181 | ) 182 | for attr_key, attr_val in self.decorated_class.__dict__.items() 183 | if not attr_key.startswith('__') and not attr_key.endswith('__')] 184 | 185 | """ 186 | Between them is called the sql decorator in the _decorators.py file 187 | """ 188 | def __call__(self, *args, **kwargs) -> DataFrame: 189 | """ 190 | This call function is called in the class instantiation 191 | """ 192 | self.init_new_custom_df() 193 | 194 | if kwargs.get('from_df') is not None: 195 | return self._validate_kwargs(**kwargs) 196 | if kwargs.get('from_csv') is not None: 197 | return self._validate_from_csv_kwarg(**kwargs) 198 | if kwargs.get('from_iterator') is not None: 199 | return self._validate_from_iterator_kwarg(**kwargs) 200 | if kwargs.get('from_sql_query') is not None: 201 | self.df.is_sql_decorator_missing() 202 | with self.df.sql_engine.connect() as con: 203 | kwargs['con'] = con 204 | return self._validate_from_sql_query_kwarg(**kwargs) 205 | for data_type in self.data_types: 206 | self.df[data_type.name] = data_type.col_obj_series 207 | return self.df 208 | 209 | def _validate_from_csv_kwarg(self, **kwargs) -> DataFrame: 210 | kwargs['filepath_or_buffer'] = kwargs.pop('from_csv') 211 | return self._validate_kwargs(func=pd.read_csv, **kwargs) 212 | 213 | def _validate_from_sql_query_kwarg(self, **kwargs) -> DataFrame: 214 | kwargs['sql'] = kwargs.pop('from_sql_query') 215 | return self._validate_kwargs(func=pd.read_sql_query, **kwargs) 216 | 217 | def _validate_from_iterator_kwarg(self, **kwargs) -> DataFrame: 218 | data = [] 219 | for row in kwargs.get('from_iterator')(): 220 | data.append(row) 221 | kwargs['columns'] = [data_type.name for data_type in self.data_types] 222 | kwargs['data'] = data 223 | kwargs.pop('from_iterator') 224 | return self._validate_kwargs(func=self.create_df_from_data_and_columns, **kwargs) 225 | 226 | def _validate_kwargs(self, func=None, **kwargs): 227 | col_type = {} 228 | bool_validator = {} 229 | for index, data_type in enumerate(self.data_types): 230 | 231 | if data_type.str_type == 'datetime64[ns]': 232 | if 'parse_dates' in kwargs.keys(): 233 | kwargs['parse_dates'].append(data_type.target_name) 234 | else: 235 | kwargs['parse_dates'] = [data_type.target_name] 236 | continue 237 | 238 | if data_type.str_type == 'bool' and data_type.col_obj_series.true_or_false is not None: 239 | bool_validator[data_type.name] = data_type.col_obj_series.true_or_false 240 | 241 | col_type[data_type.name] = data_type.np_type 242 | if kwargs.get('from_df') is not None: 243 | df = kwargs.get('from_df') 244 | else: 245 | df = func(**kwargs) 246 | if isinstance(df, TextFileReader): 247 | return self.df_generator(df, bool_validator) 248 | self.build_custom_df(df, bool_validator) 249 | return self.df 250 | 251 | def df_generator(self, df, bool_validator): 252 | for chunk in df: 253 | self.init_new_custom_df() 254 | self.build_custom_df(chunk, bool_validator) 255 | yield self.df 256 | 257 | def build_custom_df(self, df, bool_validator): 258 | # Convert bool values 259 | for col_name, bool_val_dict in bool_validator.items(): 260 | df[col_name] = df[col_name].map(bool_val_dict) 261 | 262 | for data_type in self.data_types: 263 | self.df[data_type.name] = df[data_type.target_name] 264 | 265 | def init_new_custom_df(self): 266 | self.df = DataFrame() 267 | self.df.dataframe_state.decorated_class = self.decorated_class 268 | self.df.dataframe_state.class_name = self.decorated_class.__name__ 269 | self.df.dataframe_state.data_types = self.data_types 270 | self.df.dataframe_state.index_list = self.index_list 271 | if hasattr(self, 'sql'): 272 | self.df.dataframe_state.sql = self.sql 273 | self.df.dataframe_state.sql['table'] = self.sql.get('table') 274 | self.df.dataframe_state.sqlalchemy_class = self.sqlalchemy_class 275 | 276 | @staticmethod 277 | def create_df_from_data_and_columns(**kwargs) -> pd.DataFrame: 278 | return pd.DataFrame(data=kwargs.get('data'), columns=kwargs.get('columns')) 279 | 280 | 281 | class Connection: 282 | def __init__(self, con_string): 283 | self.sql_engine = create_engine(con_string) 284 | 285 | 286 | _trust = sql 287 | -------------------------------------------------------------------------------- /static/data/cars.csv: -------------------------------------------------------------------------------- 1 | name;model;random_string 2 | 206;Peugeot;aaaa 3 | Clio;Renault;bbbb 4 | M6;BMW;zzzz -------------------------------------------------------------------------------- /static/data/lot_of_people.csv: -------------------------------------------------------------------------------- 1 | name;age;money;insertion_date;is_staff 2 | John;15;13.6;2005-02-25;yes 3 | Snow;40;6.7;2005-02-25;no 4 | Marie;17;6.9;2005-02-25;yes 5 | Youpa;17;6.9;2005-02-25;no 6 | Loulou;17;6.9;2005-02-25;yes 7 | Miro;17;6.9;2005-02-25;no 8 | Mushu;17;6.9;2005-02-25;yes -------------------------------------------------------------------------------- /static/data/people.csv: -------------------------------------------------------------------------------- 1 | name;age;money;insertion_date;is_staff 2 | John;15;13.6;2005-02-25;yes 3 | Snow;40;6.7;2005-02-25;no 4 | -------------------------------------------------------------------------------- /static/data/people_jobs.csv: -------------------------------------------------------------------------------- 1 | name;job 2 | John;Developer 3 | Snow;RH -------------------------------------------------------------------------------- /static/images/df.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/static/images/df.png -------------------------------------------------------------------------------- /static/images/poop.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/static/images/poop.jpg -------------------------------------------------------------------------------- /static/images/poop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/static/images/poop.png -------------------------------------------------------------------------------- /static/images/poop_sticker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/static/images/poop_sticker.png -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_dataframe_behavior.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import pandas as pd 3 | import numpy as np 4 | from pandas import Timestamp 5 | 6 | from src.pandas_oop.models import DataFrame 7 | from tests.test_models_declaration import People, PeopleNoTable, PEOPLE_DATA_FILE, PeopleFromDatabase, \ 8 | PeopleFromDatabaseWithoutBoolArgs, PEOPLE2_DATA_FILE, PeopleJobs, UniqueCars, MergedPeople, retrieve_people, \ 9 | PeopleFromIterator, PeopleDeclaredWithDifferentFields, LOT_OF_PEOPLE_DATA_FILE, PeopleTwoColumns 10 | 11 | 12 | class TestDataframeBehavior(TestCase): 13 | 14 | def test_instance_is_dataframe(self): 15 | people = People() 16 | self.assertIsInstance(people, pd.DataFrame, "Not an instance of pandas dataframe") 17 | 18 | def test_object_is_not_singleton(self): 19 | people_1 = People() 20 | people_2 = People() 21 | self.assertIsNot(people_2, people_1) 22 | 23 | def test_dataframe_has_only_declared_columns(self): 24 | people = PeopleTwoColumns(from_csv=PEOPLE_DATA_FILE, delimiter=";") 25 | self.assertEqual(['name', 'age'], list(people.columns)) 26 | 27 | def test_instance_is_dataframe_no_table(self): 28 | people = PeopleNoTable() 29 | self.assertIsInstance(people, pd.DataFrame, "Not an instance of pandas dataframe") 30 | 31 | def test_append_list_to_one_column(self): 32 | people = People() 33 | people.name = self.name_list 34 | people.age = self.age_list 35 | people.money = self.money_list 36 | people.insertion_date = self.insertion_date_list 37 | people.is_staff = self.is_staff_list 38 | 39 | result = people.to_dict() 40 | 41 | self.assertEqual(result, self.expected_result) 42 | 43 | def test_from_csv(self): 44 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 45 | result = people.to_dict() 46 | self.assertEqual(result, self.expected_result) 47 | self.assertEqual(people.insertion_date.dtype.type, np.datetime64, "Column is not a date") 48 | 49 | def test_from_sql_query(self): 50 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 51 | people.sql_engine.execute('delete from people') 52 | people.save() 53 | people_from_db = PeopleFromDatabase(from_sql_query='select * from people') 54 | self.assertEqual(people_from_db.to_dict(), people.to_dict()) 55 | 56 | def test_from_sql_query_without_bool_args(self): 57 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 58 | people.sql_engine.execute('delete from people') 59 | people.save() 60 | people_from_db = PeopleFromDatabaseWithoutBoolArgs(from_sql_query='select * from people') 61 | self.assertEqual(people_from_db.to_dict(), people.to_dict()) 62 | 63 | def test_dataframe_is_valid(self): 64 | people = People() 65 | people.name = self.name_list 66 | people.age = self.age_list 67 | people.money = self.money_list 68 | people.insertion_date = self.insertion_date_list 69 | people.is_staff = self.is_staff_list 70 | self.assertTrue(people.is_valid()) 71 | 72 | def test_dataframe_validate(self): 73 | people = People() 74 | people.name = self.name_list 75 | people.age = self.age_list 76 | people.money = self.money_list 77 | people.insertion_date = self.string_insertion_date_list 78 | people.is_staff = self.is_staff_list 79 | people.validate() 80 | people.save() 81 | self.assertTrue(people.is_valid()) 82 | 83 | def test_dataframe_is_not_valid(self): 84 | people = People() 85 | people.name = self.name_list 86 | people.age = self.age_list 87 | people.money = self.money_list 88 | people.insertion_date = self.string_insertion_date_list 89 | people.is_staff = self.is_staff_list 90 | self.assertFalse(people.is_valid()) 91 | 92 | def test_isnull_return_custom_df(self): 93 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";").isnull() 94 | self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when isnull is called') 95 | 96 | def test_head_return_custom_df(self): 97 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";").head(1) 98 | self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when head is called') 99 | 100 | def test_abs_return_custom_df(self): 101 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 102 | people.name = [3, -7] 103 | people.insertion_date = [3, -7] 104 | people.is_staff = [3, -7] 105 | people = people.abs() 106 | self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when abs is called') 107 | 108 | def test_merge_return_custom_df(self): 109 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 110 | people2 = PeopleJobs(from_csv=PEOPLE2_DATA_FILE, delimiter=";") 111 | merged_result = people.merge(people2, on='name') 112 | self.assertIsInstance(merged_result, DataFrame, 'Not a custom dataframe when abs is called') 113 | self.assertEqual(merged_result.to_dict(), self.expected_merged_result) 114 | 115 | def test_validate_accept_argument(self): 116 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 117 | people_jobs = PeopleJobs(from_csv=PEOPLE2_DATA_FILE, delimiter=";") 118 | merged_result = people.merge(people_jobs, on='name').validate(from_class=MergedPeople) 119 | self.assertEqual(str(merged_result), 'MergedPeople') 120 | 121 | def test_transform_df_to_custom_df_from_class_instantiation(self): 122 | data = pd.read_csv(filepath_or_buffer=PEOPLE_DATA_FILE, delimiter=";") 123 | people = People(from_df=data) 124 | self.assertEqual(str(people), 'People') 125 | 126 | def test_populate_from_iterator(self): 127 | people = PeopleFromIterator(from_iterator=retrieve_people) 128 | self.assertEqual(people.shape, (1000, 5)) 129 | self.assertTrue(people.is_valid()) 130 | 131 | def test_dataframe_has_column_name_declared(self): 132 | people = PeopleDeclaredWithDifferentFields(from_csv=PEOPLE_DATA_FILE, delimiter=";") 133 | self.assertEqual(list(people.columns), ['name_test', 'age', 'money_test', 'insertion_date_test', 'is_staff']) 134 | 135 | def test_slicing_return_custom_df(self): 136 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 137 | people = people[people.name == 'John'] 138 | self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when slicing is performed') 139 | 140 | def test_when_loc_is_performed(self): 141 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 142 | people = people.loc[people.name == 'John'] 143 | self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when loc is performed') 144 | 145 | def test_when_loc_set_value_is_performed(self): 146 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 147 | people.loc[people.name == 'John'] = ('Marie', 15, 15.0, Timestamp('2005-02-25'), True) 148 | self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when loc set value is performed') 149 | 150 | def test_when_loc_slice_indexing_is_performed(self): 151 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 152 | people = people[:1] 153 | self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when loc set value is performed') 154 | 155 | def test_multi_loc_conditions(self): 156 | people = People(from_csv=LOT_OF_PEOPLE_DATA_FILE, delimiter=";") 157 | people = people.loc[(people.age < 18) & (people.name.str.startswith("M"))] 158 | self.assertEqual(people.shape, (3, 5)) 159 | self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when loc multiple conditions is performed') 160 | 161 | def test_chunksize(self): 162 | for people_chunk in People(from_csv=LOT_OF_PEOPLE_DATA_FILE, delimiter=";", chunksize=2): 163 | self.assertIsInstance(people_chunk, DataFrame, 'Not a custom dataframe when chunksize') 164 | 165 | def setUp(self): 166 | # Old school creation 167 | self.old_school_df = pd.DataFrame({'name': pd.Series(dtype='O'), 168 | 'age': pd.Series(dtype='int'), 169 | 'money': pd.Series(dtype='float'), 170 | 'insertion_date': pd.Series(dtype='datetime64[ns]'), 171 | 'is_staff': pd.Series(dtype='bool')}) 172 | self.old_school_read_csv_df = pd.read_csv(PEOPLE_DATA_FILE, delimiter=';', parse_dates=['insertion_date']) 173 | self.old_school_read_csv_df['is_staff'] = self.old_school_read_csv_df['is_staff'].map({'yes': True, 174 | 'no': False}) 175 | 176 | # Test variable for new creation 177 | self.name_list = ["John", "Snow"] 178 | self.age_list = [15, 40] 179 | self.money_list = [13.6, 6.7] 180 | self.insertion_date_list = [Timestamp('2005-02-25'), Timestamp('2005-02-25')] 181 | self.is_staff_list = [True, False] 182 | 183 | self.expected_result = { 184 | 'name': { 185 | 0: 'John', 186 | 1: 'Snow' 187 | }, 188 | 'age': { 189 | 0: 15, 190 | 1: 40 191 | }, 192 | 'money': { 193 | 0: 13.6, 194 | 1: 6.7 195 | }, 196 | 'insertion_date': { 197 | 0: Timestamp('2005-02-25'), 198 | 1: Timestamp('2005-02-25') 199 | }, 200 | 'is_staff': { 201 | 0: True, 202 | 1: False 203 | }, 204 | } 205 | 206 | self.expected_merged_result = { 207 | 'name': { 208 | 0: 'John', 209 | 1: 'Snow' 210 | }, 211 | 'age': { 212 | 0: 15, 213 | 1: 40 214 | }, 215 | 'money': { 216 | 0: 13.6, 217 | 1: 6.7 218 | }, 219 | 'insertion_date': { 220 | 0: Timestamp('2005-02-25'), 221 | 1: Timestamp('2005-02-25') 222 | }, 223 | 'is_staff': { 224 | 0: True, 225 | 1: False 226 | }, 227 | 'job': { 228 | 0: 'Developer', 229 | 1: 'RH' 230 | }, 231 | } 232 | 233 | self.string_insertion_date_list = ['25-02-2005', '25-02-2005'] 234 | -------------------------------------------------------------------------------- /tests/test_db_migrations_and_sqlalchemy_behavior.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from unittest import TestCase 3 | 4 | from sqlalchemy import Column, Integer, Text 5 | from sqlalchemy.exc import IntegrityError 6 | 7 | from src.pandas_oop import Base 8 | from src.pandas_oop import models 9 | from src.pandas_oop.fields import StringColumn, IntegerColumn, FloatColumn 10 | 11 | ABS_PATH = Path(__file__).resolve().parent.parent 12 | DB_CONNECTION = models.Connection(f'sqlite:///{ABS_PATH}/db/migrations.db') 13 | 14 | 15 | class Contact(models.Base): 16 | __tablename__ = 'T_Contacts' 17 | 18 | id = Column(Integer, primary_key=True) 19 | firstName = Column(Text) 20 | lastName = Column(Text) 21 | 22 | 23 | @models.sql(table="people_migrations", con=DB_CONNECTION) 24 | @models.Data 25 | class PeopleMigrations(models.DataFrame): 26 | name = StringColumn() 27 | 28 | 29 | @models.sql(table="people_migrations_with_pk", con=DB_CONNECTION) 30 | @models.Data 31 | class PeopleMigrationsWithPrimaryKey(models.DataFrame): 32 | name = StringColumn(unique=True) 33 | age = IntegerColumn() 34 | 35 | 36 | @models.sql(table="people_migrations_with_multiple_pk", con=DB_CONNECTION) 37 | @models.Data 38 | class PeopleMigrationsWithMultiplePrimaryKey(models.DataFrame): 39 | name = StringColumn(unique=True) 40 | age = IntegerColumn(unique=True) 41 | money = FloatColumn() 42 | 43 | 44 | class TestMigrations(TestCase): 45 | 46 | def test_custom_dataframe_is_detected_as_sqlalchemy_class(self): 47 | detected_tables = [table.fullname for table in Base.metadata.sorted_tables] 48 | self.assertIn('people_migrations', detected_tables) 49 | 50 | def test_save_without_pk_no_error(self): 51 | people = PeopleMigrations() 52 | people.name = ['John', 'Snow', 'Armin'] 53 | people.save() 54 | people.sql_engine.execute('delete from people_migrations') 55 | 56 | def test_save_with_pk(self): 57 | people = PeopleMigrationsWithPrimaryKey() 58 | people.sql_engine.execute('delete from people_migrations_with_pk') 59 | people.name = ['John', 'Snow', 'Armin'] 60 | people.age = [17, 28, 39] 61 | people.save() 62 | self.assertRaises(IntegrityError, people.save) 63 | -------------------------------------------------------------------------------- /tests/test_models_declaration.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from pathlib import Path 3 | 4 | from pandas import Timestamp 5 | from sqlalchemy.ext.declarative import declarative_base 6 | from src.pandas_oop import models 7 | from src.pandas_oop.fields import StringColumn, IntegerColumn, FloatColumn, DateColumn, BoolColumn 8 | 9 | Base = declarative_base() 10 | 11 | ABS_PATH = Path(__file__).resolve().parent.parent 12 | # DB_CONNECTION = models.Connection(':memory:') 13 | DB_CONNECTION = models.Connection(f'sqlite:///{ABS_PATH}/db/pandas_oop.db') 14 | PEOPLE_DATA_FILE = ABS_PATH / 'static/data/people.csv' 15 | PEOPLE2_DATA_FILE = ABS_PATH / 'static/data/people_jobs.csv' 16 | LOT_OF_PEOPLE_DATA_FILE = ABS_PATH / 'static/data/lot_of_people.csv' 17 | CARS_DATA_FILE = ABS_PATH / 'static/data/cars.csv' 18 | 19 | 20 | @models.Data 21 | class PeopleNoTable(models.DataFrame): 22 | name = StringColumn() 23 | age = IntegerColumn() 24 | money = FloatColumn() 25 | insertion_date = DateColumn() 26 | is_staff = BoolColumn(true='yes', false='no') 27 | 28 | 29 | @models.Data 30 | class PeopleTwoColumns(models.DataFrame): 31 | name = StringColumn() 32 | age = IntegerColumn() 33 | 34 | 35 | @models.sql(table='people', con=DB_CONNECTION) 36 | @models.Data 37 | class People(models.DataFrame): 38 | name = StringColumn() 39 | age = IntegerColumn() 40 | money = FloatColumn() 41 | insertion_date = DateColumn(format='%d-%m-%Y') 42 | is_staff = BoolColumn(true='yes', false='no') 43 | 44 | 45 | @models.Data 46 | class PeopleJobs(models.DataFrame): 47 | name = StringColumn() 48 | job = StringColumn() 49 | 50 | 51 | @models.Data 52 | class MergedPeople(models.DataFrame): 53 | name = StringColumn() 54 | age = IntegerColumn() 55 | money = FloatColumn() 56 | insertion_date = DateColumn(format='%d-%m-%Y') 57 | is_staff = BoolColumn(true='yes', false='no') 58 | job = StringColumn() 59 | 60 | 61 | @models.sql(table='people_numeric_bool', con=DB_CONNECTION) 62 | @models.Data 63 | class PeopleFromDatabase(models.DataFrame): 64 | name = StringColumn() 65 | age = IntegerColumn() 66 | money = FloatColumn() 67 | insertion_date = DateColumn() 68 | is_staff = BoolColumn(true=1, false=0) 69 | 70 | 71 | @models.sql(table='people_from_db', con=DB_CONNECTION) 72 | @models.Data 73 | class PeopleFromDatabaseWithoutBoolArgs(models.DataFrame): 74 | name = StringColumn() 75 | age = IntegerColumn() 76 | money = FloatColumn() 77 | insertion_date = DateColumn() 78 | is_staff = BoolColumn() 79 | 80 | 81 | @models.sql(table='cars', con=DB_CONNECTION) 82 | @models.Data 83 | class UniqueCars(models.DataFrame): 84 | name = StringColumn(unique=True) 85 | model = StringColumn(unique=True) 86 | random_string = StringColumn() 87 | 88 | 89 | @models.sql(table='people_from_iter', con=DB_CONNECTION) 90 | @models.Data 91 | class PeopleFromIterator(models.DataFrame): 92 | name = StringColumn() 93 | age = IntegerColumn() 94 | money = FloatColumn() 95 | insertion_date = DateColumn() 96 | is_staff = BoolColumn() 97 | 98 | 99 | @models.Data 100 | class PeopleDeclaredWithDifferentFields(models.DataFrame): 101 | name_test = StringColumn(target_name='name') 102 | age = IntegerColumn() 103 | money_test = FloatColumn(target_name='money') 104 | insertion_date_test = DateColumn(target_name="insertion_date") 105 | is_staff = BoolColumn() 106 | 107 | 108 | def retrieve_people(): 109 | for x in range(1000): 110 | yield "John", x, 50.0, Timestamp("2005-02-02"), True 111 | -------------------------------------------------------------------------------- /tests/test_sql_operations.py: -------------------------------------------------------------------------------- 1 | import string 2 | import random 3 | from unittest import TestCase 4 | from pandas import Timestamp 5 | 6 | from src.pandas_oop.custom_exceptions import MissingDecorator, MissingUniqueField 7 | from tests.test_models_declaration import PeopleNoTable, PEOPLE_DATA_FILE, People, PeopleFromDatabase, UniqueCars, \ 8 | CARS_DATA_FILE 9 | 10 | 11 | class TestSqlOperations(TestCase): 12 | 13 | def test_missing_sql_decorator_error(self): 14 | people = PeopleNoTable(from_csv=PEOPLE_DATA_FILE, delimiter=";") 15 | if people.is_valid(): 16 | self.assertRaises(MissingDecorator, people.save, if_exists='replace', index=False) 17 | 18 | def test_table_attribute(self): 19 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 20 | if people.is_valid(): 21 | people.sql_engine.execute('delete from people') 22 | people.save() 23 | people_from_db = PeopleFromDatabase(from_sql_query='select * from people') 24 | self.assertEqual(people_from_db.to_dict(), people.to_dict()) 25 | 26 | def test_insert_or_update(self): 27 | random_string = [self.get_random_string() for _ in range(3)] 28 | cars = UniqueCars(from_csv=CARS_DATA_FILE, delimiter=";") 29 | cars.random_string = random_string 30 | cars.save(if_row_exists='update') 31 | expected_result = UniqueCars(from_sql_query='select * from cars').random_string.tolist() 32 | self.assertEqual(random_string, expected_result) 33 | 34 | def test_insert_or_ignore(self): 35 | cars = UniqueCars(from_csv=CARS_DATA_FILE, delimiter=";") 36 | cars.sql_engine.execute('delete from cars') 37 | cars.head(2).save() 38 | cars.save(if_row_exists='ignore') 39 | expected_result = ['aaaa', 'bbbb', 'zzzz'] 40 | self.assertEqual(expected_result, cars.random_string.tolist()) 41 | 42 | def test_missing_unique_field(self): 43 | people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";") 44 | self.assertRaises(MissingUniqueField, people.save, if_row_exists='update') 45 | 46 | def setUp(self): 47 | # Test variable for new creation 48 | self.name_list = ["John", "Snow"] 49 | self.age_list = [15, 40] 50 | self.money_list = [13.6, 6.7] 51 | self.insertion_date_list = [Timestamp('2005-02-25'), Timestamp('2005-02-25')] 52 | self.is_staff_list = [True, False] 53 | 54 | self.expected_result = { 55 | 'name': { 56 | 0: 'John', 57 | 1: 'Snow' 58 | }, 59 | 'age': { 60 | 0: 15, 61 | 1: 40 62 | }, 63 | 'money': { 64 | 0: 13.6, 65 | 1: 6.7 66 | }, 67 | 'insertion_date': { 68 | 0: Timestamp('2005-02-25'), 69 | 1: Timestamp('2005-02-25') 70 | }, 71 | 'is_staff': { 72 | 0: True, 73 | 1: False 74 | }, 75 | } 76 | 77 | self.string_insertion_date_list = ['25-02-2005', '25-02-2005'] 78 | 79 | @staticmethod 80 | def get_random_string() -> str: 81 | return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) 82 | --------------------------------------------------------------------------------