├── .gitignore
├── LICENSE
├── README.md
├── alembic.ini
├── db
    ├── migrations.db
    └── pandas_oop.db
├── migrations
    ├── README
    ├── env.py
    ├── script.py.mako
    └── versions
    │   ├── 2f81577f200c_first_revision.py
    │   ├── 5be67895ab4d_revision_4.py
    │   ├── cb6921b84bf1_revision_3.py
    │   └── d9d3205a5cf1_revision_2.py
├── pyproject.toml
├── requirements-dev.txt
├── setup.py
├── src
    ├── __init__.py
    └── pandas_oop
    │   ├── __init__.py
    │   ├── _decorators.py
    │   ├── custom_exceptions.py
    │   ├── fields.py
    │   └── models.py
├── static
    ├── data
    │   ├── cars.csv
    │   ├── lot_of_people.csv
    │   ├── people.csv
    │   └── people_jobs.csv
    └── images
    │   ├── df.png
    │   ├── poop.jpg
    │   ├── poop.png
    │   └── poop_sticker.png
└── tests
    ├── __init__.py
    ├── test_dataframe_behavior.py
    ├── test_db_migrations_and_sqlalchemy_behavior.py
    ├── test_models_declaration.py
    └── test_sql_operations.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | .idea/
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018 The Python Packaging Authority
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![image](static/images/poop_sticker.png)
  2 | # Pandas-Oop (not maintained, see https://github.com/MayasMess/panorma it's simpler)
  3 | (Also known as Poop), is a package that uses Pandas dataframes with object oriented programming style
  4 | 
  5 | Installation:
  6 | - 
  7 | 
  8 | ```shell script
  9 |   pip install pandas-oop
 10 | ```
 11 | 
 12 | Some examples
 13 | -
 14 | 
 15 | ```python
 16 | from pandas_oop import models
 17 | from pandas_oop.fields import StringColumn, IntegerColumn, FloatColumn, DateColumn, BoolColumn
 18 | ```
 19 | ```python
 20 | DB_CONNECTION = models.Connection('sqlite:///pandas_oop.db') # this is the same con_string for sqlalchemy engine
 21 | ```
 22 | ```python
 23 | @models.sql(table='people', con=DB_CONNECTION) # Use this decorator if you want to connect your class to a database
 24 | @models.Data
 25 | class People(models.DataFrame):
 26 |     name = StringColumn(unique=True)
 27 |     age = IntegerColumn()
 28 |     money = FloatColumn(target_name="coins") # target_name if the name in the csv or table is coins and you want to have a different variable name
 29 |     insertion_date = DateColumn(format='%d-%m-%Y')
 30 |     is_staff = BoolColumn(true='yes', false='no')
 31 | ```
 32 | 
 33 | Now when instantiating this class, it will return a custom dataframe with all the functionalities of a Pandas
 34 | dataframe and some others
 35 | 
 36 | ```python
 37 | people = People()
 38 | """-----------------------------------------------------------"""
 39 | people = People(from_csv=DATA_FILE, delimiter=";")
 40 | """-----------------------------------------------------------"""
 41 | people = People(from_sql_query='select * from people')
 42 | """-----------------------------------------------------------"""
 43 | people = People(from_df=some_dataframe)
 44 | """-----------------------------------------------------------"""
 45 | people = People(from_iterator=some_function_that_yield_values)
 46 | """-----------------------------------------------------------""" 
 47 | for people_chunk in People(from_csv=DATA_FILE, delimiter=";", chunksize=10):
 48 |     ...
 49 | ```
 50 | example of function that yield values:
 51 | 
 52 | ```python
 53 | def some_function_that_yield_values():
 54 |     while something:
 55 |         ...
 56 |         yield name, age, money, insertion_date, is_staff
 57 | ```
 58 | 
 59 | ![image](static/images/df.png)
 60 | 
 61 | You can also save it to the database with the save() method (if the dtypes of the columns change, this will raise a 
 62 | ValidationError):
 63 | 
 64 | ```python
 65 | people.save()
 66 | ```
 67 | 
 68 | You can upsert to the database and this will automatically look at the unique fields that were declared in the class
 69 | 
 70 | ```python
 71 | people.save(if_row_exists='update')
 72 | or
 73 | people.save(if_row_exists='ignore')
 74 | ```
 75 | 
 76 | If you want to revalidate your dataframe (convert the columns dtypes to the type that was declared in the class), you can 
 77 | call the validate() method:
 78 | 
 79 | ```python
 80 | people.validate()
 81 | ```
 82 | 
 83 | You can also validate from another class. For example, you can do something like this:  
 84 | 
 85 | ```python
 86 | people = People(from_csv=DATA_FILE)
 87 | jobs = Jobs(from_sql_query='select * from jobs')
 88 | people_with_jobs = people.merge(jobs, on='name').validate(from_class=PeopleWithJobs)
 89 | ```
 90 | 
 91 | This is the list of the overriten methods that return a pandas_oop custom dataframe
 92 | - 'isnull'
 93 | - 'head'
 94 | - 'abs'
 95 | - 'merge'
 96 | - 'loc' and dataframe slicing
 97 | 
 98 | I will add more and more methods on this list.
 99 | 
100 | 
101 | New features
102 | -
103 | Alembic Database migration support added:
104 | - On your main application package, import Base (this is a declarative_base from sqlalchemy)
105 | ```python
106 | from pandas_oop import Base
107 | ```
108 | - Add this configuration on the env.py file of your alembic config
109 | ```python
110 | from your_app import Base
111 | target_metadata = Base.metadata
112 | ```
113 | - And finaly, update your database url on your alembic.ini file
114 | 


--------------------------------------------------------------------------------
/alembic.ini:
--------------------------------------------------------------------------------
  1 | # A generic, single database configuration.
  2 | 
  3 | [alembic]
  4 | # path to migration scripts
  5 | script_location = migrations
  6 | 
  7 | # template used to generate migration files
  8 | # file_template = %%(rev)s_%%(slug)s
  9 | 
 10 | # sys.path path, will be prepended to sys.path if present.
 11 | # defaults to the current working directory.
 12 | prepend_sys_path = .
 13 | 
 14 | # timezone to use when rendering the date within the migration file
 15 | # as well as the filename.
 16 | # If specified, requires the python-dateutil library that can be
 17 | # installed by adding `alembic[tz]` to the pip requirements
 18 | # string value is passed to dateutil.tz.gettz()
 19 | # leave blank for localtime
 20 | # timezone =
 21 | 
 22 | # max length of characters to apply to the
 23 | # "slug" field
 24 | # truncate_slug_length = 40
 25 | 
 26 | # set to 'true' to run the environment during
 27 | # the 'revision' command, regardless of autogenerate
 28 | # revision_environment = false
 29 | 
 30 | # set to 'true' to allow .pyc and .pyo files without
 31 | # a source .py file to be detected as revisions in the
 32 | # versions/ directory
 33 | # sourceless = false
 34 | 
 35 | # version location specification; This defaults
 36 | # to migrations/versions.  When using multiple version
 37 | # directories, initial revisions must be specified with --version-path.
 38 | # The path separator used here should be the separator specified by "version_path_separator" below.
 39 | # version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
 40 | 
 41 | # version path separator; As mentioned above, this is the character used to split
 42 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
 43 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
 44 | # Valid values for version_path_separator are:
 45 | #
 46 | # version_path_separator = :
 47 | # version_path_separator = ;
 48 | # version_path_separator = space
 49 | version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
 50 | 
 51 | # the output encoding used when revision files
 52 | # are written from script.py.mako
 53 | # output_encoding = utf-8
 54 | 
 55 | sqlalchemy.url = sqlite:///db/migrations.db
 56 | 
 57 | 
 58 | [post_write_hooks]
 59 | # post_write_hooks defines scripts or Python functions that are run
 60 | # on newly generated revision scripts.  See the documentation for further
 61 | # detail and examples
 62 | 
 63 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
 64 | # hooks = black
 65 | # black.type = console_scripts
 66 | # black.entrypoint = black
 67 | # black.options = -l 79 REVISION_SCRIPT_FILENAME
 68 | 
 69 | # Logging configuration
 70 | [loggers]
 71 | keys = root,sqlalchemy,alembic
 72 | 
 73 | [handlers]
 74 | keys = console
 75 | 
 76 | [formatters]
 77 | keys = generic
 78 | 
 79 | [logger_root]
 80 | level = WARN
 81 | handlers = console
 82 | qualname =
 83 | 
 84 | [logger_sqlalchemy]
 85 | level = WARN
 86 | handlers =
 87 | qualname = sqlalchemy.engine
 88 | 
 89 | [logger_alembic]
 90 | level = INFO
 91 | handlers =
 92 | qualname = alembic
 93 | 
 94 | [handler_console]
 95 | class = StreamHandler
 96 | args = (sys.stderr,)
 97 | level = NOTSET
 98 | formatter = generic
 99 | 
100 | [formatter_generic]
101 | format = %(levelname)-5.5s [%(name)s] %(message)s
102 | datefmt = %H:%M:%S
103 | 


--------------------------------------------------------------------------------
/db/migrations.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/db/migrations.db


--------------------------------------------------------------------------------
/db/pandas_oop.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/db/pandas_oop.db


--------------------------------------------------------------------------------
/migrations/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration.


--------------------------------------------------------------------------------
/migrations/env.py:
--------------------------------------------------------------------------------
 1 | from logging.config import fileConfig
 2 | 
 3 | from sqlalchemy import engine_from_config
 4 | from sqlalchemy import pool
 5 | 
 6 | from alembic import context
 7 | 
 8 | # this is the Alembic Config object, which provides
 9 | # access to the values within the .ini file in use.
10 | config = context.config
11 | 
12 | # Interpret the config file for Python logging.
13 | # This line sets up loggers basically.
14 | if config.config_file_name is not None:
15 |     fileConfig(config.config_file_name)
16 | 
17 | # add your model's MetaData object here
18 | # for 'autogenerate' support
19 | from tests.test_db_migrations_and_sqlalchemy_behavior import Base
20 | target_metadata = Base.metadata
21 | # target_metadata = None
22 | 
23 | # other values from the config, defined by the needs of env.py,
24 | # can be acquired:
25 | # my_important_option = config.get_main_option("my_important_option")
26 | # ... etc.
27 | 
28 | 
29 | def run_migrations_offline():
30 |     """Run migrations in 'offline' mode.
31 | 
32 |     This configures the context with just a URL
33 |     and not an Engine, though an Engine is acceptable
34 |     here as well.  By skipping the Engine creation
35 |     we don't even need a DBAPI to be available.
36 | 
37 |     Calls to context.execute() here emit the given string to the
38 |     script output.
39 | 
40 |     """
41 |     url = config.get_main_option("sqlalchemy.url")
42 |     context.configure(
43 |         url=url,
44 |         target_metadata=target_metadata,
45 |         literal_binds=True,
46 |         dialect_opts={"paramstyle": "named"},
47 |     )
48 | 
49 |     with context.begin_transaction():
50 |         context.run_migrations()
51 | 
52 | 
53 | def run_migrations_online():
54 |     """Run migrations in 'online' mode.
55 | 
56 |     In this scenario we need to create an Engine
57 |     and associate a connection with the context.
58 | 
59 |     """
60 |     connectable = engine_from_config(
61 |         config.get_section(config.config_ini_section),
62 |         prefix="sqlalchemy.",
63 |         poolclass=pool.NullPool,
64 |     )
65 | 
66 |     with connectable.connect() as connection:
67 |         context.configure(
68 |             connection=connection, target_metadata=target_metadata
69 |         )
70 | 
71 |         with context.begin_transaction():
72 |             context.run_migrations()
73 | 
74 | 
75 | if context.is_offline_mode():
76 |     run_migrations_offline()
77 | else:
78 |     run_migrations_online()
79 | 


--------------------------------------------------------------------------------
/migrations/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 | 
18 | 
19 | def upgrade():
20 |     ${upgrades if upgrades else "pass"}
21 | 
22 | 
23 | def downgrade():
24 |     ${downgrades if downgrades else "pass"}
25 | 


--------------------------------------------------------------------------------
/migrations/versions/2f81577f200c_first_revision.py:
--------------------------------------------------------------------------------
 1 | """first revision
 2 | 
 3 | Revision ID: 2f81577f200c
 4 | Revises: 
 5 | Create Date: 2022-04-09 02:23:54.004780
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | 
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = '2f81577f200c'
14 | down_revision = None
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | 
19 | def upgrade():
20 |     # ### commands auto generated by Alembic - please adjust! ###
21 |     op.create_table('T_Contacts',
22 |     sa.Column('id', sa.Integer(), nullable=False),
23 |     sa.Column('firstName', sa.Text(), nullable=True),
24 |     sa.Column('lastName', sa.Text(), nullable=True),
25 |     sa.PrimaryKeyConstraint('id')
26 |     )
27 |     op.create_table('people_migrations',
28 |     sa.Column('name', sa.Text(), nullable=True),
29 |     sa.Column('id', sa.Integer(), nullable=False),
30 |     sa.PrimaryKeyConstraint('id')
31 |     )
32 |     # ### end Alembic commands ###
33 | 
34 | 
35 | def downgrade():
36 |     # ### commands auto generated by Alembic - please adjust! ###
37 |     op.drop_table('people_migrations')
38 |     op.drop_table('T_Contacts')
39 |     # ### end Alembic commands ###
40 | 


--------------------------------------------------------------------------------
/migrations/versions/5be67895ab4d_revision_4.py:
--------------------------------------------------------------------------------
 1 | """revision 4
 2 | 
 3 | Revision ID: 5be67895ab4d
 4 | Revises: cb6921b84bf1
 5 | Create Date: 2022-04-09 14:01:11.578570
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | 
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = '5be67895ab4d'
14 | down_revision = 'cb6921b84bf1'
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | 
19 | def upgrade():
20 |     # ### commands auto generated by Alembic - please adjust! ###
21 |     op.add_column('people_migrations_with_multiple_pk', sa.Column('money', sa.Float(), nullable=True))
22 |     # ### end Alembic commands ###
23 | 
24 | 
25 | def downgrade():
26 |     # ### commands auto generated by Alembic - please adjust! ###
27 |     op.drop_column('people_migrations_with_multiple_pk', 'money')
28 |     # ### end Alembic commands ###
29 | 


--------------------------------------------------------------------------------
/migrations/versions/cb6921b84bf1_revision_3.py:
--------------------------------------------------------------------------------
 1 | """revision 3
 2 | 
 3 | Revision ID: cb6921b84bf1
 4 | Revises: d9d3205a5cf1
 5 | Create Date: 2022-04-09 14:00:23.462587
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | 
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = 'cb6921b84bf1'
14 | down_revision = 'd9d3205a5cf1'
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | 
19 | def upgrade():
20 |     # ### commands auto generated by Alembic - please adjust! ###
21 |     op.create_table('people_migrations_with_multiple_pk',
22 |     sa.Column('name', sa.Text(), nullable=False),
23 |     sa.Column('age', sa.Integer(), nullable=False),
24 |     sa.PrimaryKeyConstraint('name', 'age')
25 |     )
26 |     # ### end Alembic commands ###
27 | 
28 | 
29 | def downgrade():
30 |     # ### commands auto generated by Alembic - please adjust! ###
31 |     op.drop_table('people_migrations_with_multiple_pk')
32 |     # ### end Alembic commands ###
33 | 


--------------------------------------------------------------------------------
/migrations/versions/d9d3205a5cf1_revision_2.py:
--------------------------------------------------------------------------------
 1 | """revision 2
 2 | 
 3 | Revision ID: d9d3205a5cf1
 4 | Revises: 2f81577f200c
 5 | Create Date: 2022-04-09 13:41:26.003285
 6 | 
 7 | """
 8 | from alembic import op
 9 | import sqlalchemy as sa
10 | 
11 | 
12 | # revision identifiers, used by Alembic.
13 | revision = 'd9d3205a5cf1'
14 | down_revision = '2f81577f200c'
15 | branch_labels = None
16 | depends_on = None
17 | 
18 | 
19 | def upgrade():
20 |     # ### commands auto generated by Alembic - please adjust! ###
21 |     op.create_table('people_migrations_with_pk',
22 |     sa.Column('name', sa.Text(), nullable=False),
23 |     sa.Column('age', sa.Integer(), nullable=True),
24 |     sa.PrimaryKeyConstraint('name')
25 |     )
26 |     # ### end Alembic commands ###
27 | 
28 | 
29 | def downgrade():
30 |     # ### commands auto generated by Alembic - please adjust! ###
31 |     op.drop_table('people_migrations_with_pk')
32 |     # ### end Alembic commands ###
33 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=42", "pandas>=1.3.0"]
3 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pandas==1.4.1
2 | pangres==4.1.1
3 | sqlalchemy==1.4.34
4 | alembic==1.7.7


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="pandas-oop",
 8 |     version="0.9.6",
 9 |     author="Mayas Nova",
10 |     author_email="test@test.com",
11 |     description="Pandas dataframes with object oriented programming style",
12 |     install_requires=["pandas", "pangres", "sqlalchemy"],
13 |     keywords=["pandas", "oop", "dataframe", "poop"],
14 |     long_description=long_description,
15 |     long_description_content_type="text/markdown",
16 |     url="https://github.com/MayasMess/pandas-oop",
17 |     project_urls={
18 |         "Bug Tracker": "https://github.com/MayasMess/pandas-oop/issues",
19 |     },
20 |     classifiers=[
21 |         "Programming Language :: Python :: 3",
22 |         "License :: OSI Approved :: MIT License",
23 |         "Operating System :: OS Independent",
24 |     ],
25 |     package_dir={"": "src"},
26 |     packages=setuptools.find_packages(where="src"),
27 |     python_requires=">=3.6",
28 | )
29 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/src/__init__.py


--------------------------------------------------------------------------------
/src/pandas_oop/__init__.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy.ext.declarative import declarative_base
2 | 
3 | Base = declarative_base()


--------------------------------------------------------------------------------
/src/pandas_oop/_decorators.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | from pandas.core.generic import NDFrame
 3 | from pandas.core.frame import DataFrame
 4 | 
 5 | from sqlalchemy import Column, Integer
 6 | 
 7 | from . import Base
 8 | 
 9 | 
10 | # this methods will return a pandas_oop.models.DataFrame
11 | METHODS_TO_OVERRIDE = [
12 |     'isnull',
13 |     'head',
14 |     'abs',
15 |     'merge',
16 | ]
17 | 
18 | 
19 | def _decorate_all_methods(method_decorator):
20 |     def decorator(cls):
21 |         _classes = [DataFrame, NDFrame]
22 |         method_to_override = {}
23 |         for _class in _classes:
24 |             for name, obj in vars(_class).items():
25 |                 if callable(obj) and name in METHODS_TO_OVERRIDE:
26 |                     method_to_override[name] = obj.__annotations__
27 |                     setattr(cls, name, method_decorator(obj, cls))
28 |         return cls
29 |     return decorator
30 | 
31 | 
32 | def _return_custom_df_on_call(func, cls=None):
33 |     @wraps(func)
34 |     def wrapper(*args, **kwargs):
35 |         res = func(*args, **kwargs)
36 |         return cls.generic_overrider(res, args[0])
37 |     return wrapper
38 | 
39 | 
40 | def init_sqlalchemy_class(func):
41 |     # Init sqlalchemy class. (this is used for migration detection)
42 |     attr_sqlalchemy_dict = {data_type.name: data_type.col_obj_series.sqlalchemy_column
43 |                             for data_type in func.data_types}
44 |     attr_sqlalchemy_dict['__tablename__'] = func.sql.get('table')
45 |     func.index_list = [data_type.name
46 |                        for data_type in func.data_types
47 |                        if data_type.col_obj_series.kwargs.get('unique') is True]
48 |     if not func.index_list:
49 |         attr_sqlalchemy_dict['id'] = Column(Integer, primary_key=True)
50 |     func.sqlalchemy_class = type(func.decorated_class.__name__,
51 |                                  (Base,),
52 |                                  attr_sqlalchemy_dict)
53 |     return func
54 | 
55 | 
56 | def sql(**kwargs):
57 |     """
58 |     Sql Decorator => just used to get arguments and init the sqlalchemy class to enable db migrations
59 |     """
60 |     def wrapper(func):
61 |         func.__setattr__('sql', kwargs)
62 |         func = init_sqlalchemy_class(func=func)
63 |         return func
64 |     return wrapper
65 | 


--------------------------------------------------------------------------------
/src/pandas_oop/custom_exceptions.py:
--------------------------------------------------------------------------------
 1 | class ValidationError(Exception):
 2 |     """Empty directory exception"""
 3 |     def __init__(self, msg):
 4 |         self.msg = msg
 5 |         super(ValidationError, self).__init__(msg)
 6 | 
 7 | 
 8 | class MissingDecorator(Exception):
 9 |     """Empty directory exception"""
10 |     def __init__(self, msg):
11 |         self.msg = msg
12 |         super(MissingDecorator, self).__init__(msg)
13 | 
14 | 
15 | class MissingArguments(Exception):
16 |     """Empty directory exception"""
17 |     def __init__(self, msg):
18 |         self.msg = msg
19 |         super(MissingArguments, self).__init__(msg)
20 | 
21 | 
22 | class MissingUniqueField(Exception):
23 |     """Empty directory exception"""
24 |     def __init__(self, msg):
25 |         self.msg = msg
26 |         super(MissingUniqueField, self).__init__(msg)
27 | 


--------------------------------------------------------------------------------
/src/pandas_oop/fields.py:
--------------------------------------------------------------------------------
 1 | from copy import copy
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | from sqlalchemy import Column, Text, Integer, Float, Date, Boolean
 6 | 
 7 | 
 8 | class BaseColumn(pd.Series):
 9 |     def __init__(self, base_type, dtype, np_type, **kwargs):
10 |         super().__init__(dtype=dtype)
11 |         self.str_type = dtype
12 |         self.np_type = np_type
13 |         self.base_type = base_type
14 |         self.kwargs = copy(kwargs)
15 | 
16 |     @staticmethod
17 |     def init_sqlalchemy_column(sqlalchemy_col_type, **kwargs):
18 |         kwargs['primary_key'] = kwargs.pop('unique', None)
19 |         kwargs.pop('target_name', None)
20 |         return Column(sqlalchemy_col_type, **kwargs)
21 | 
22 | 
23 | class StringColumn(BaseColumn):
24 |     def __init__(self, **kwargs):
25 |         super().__init__(base_type='object', dtype='object', np_type=np.str_, **kwargs)
26 |         self.sqlalchemy_column = self.init_sqlalchemy_column(Text, **kwargs)
27 | 
28 | 
29 | class IntegerColumn(BaseColumn):
30 |     def __init__(self, **kwargs):
31 |         super().__init__(base_type='int', dtype='int64', np_type=np.int64, **kwargs)
32 |         self.sqlalchemy_column = self.init_sqlalchemy_column(Integer, **kwargs)
33 | 
34 | 
35 | class FloatColumn(BaseColumn):
36 |     def __init__(self, **kwargs):
37 |         super().__init__(base_type='float', dtype='float64', np_type=np.float64, **kwargs)
38 |         self.sqlalchemy_column = self.init_sqlalchemy_column(Float, **kwargs)
39 | 
40 | 
41 | class DateColumn(BaseColumn):
42 |     def __init__(self, **kwargs):
43 |         super().__init__(base_type='datetime', dtype='datetime64[ns]', np_type=np.datetime64, **kwargs)
44 |         if kwargs.get('format') is not None:
45 |             del kwargs['format']
46 |         self.sqlalchemy_column = self.init_sqlalchemy_column(Date, **kwargs)
47 | 
48 | 
49 | class BoolColumn(BaseColumn):
50 |     def __init__(self, **kwargs):
51 |         super().__init__(base_type='bool', dtype='bool', np_type=np.bool_, **kwargs)
52 |         self.true_or_false = None
53 |         if kwargs.get('true') is not None and kwargs.get('false') is not None:
54 |             self.true_or_false = {kwargs.get('true'): True, kwargs.get('false'): False}
55 |             del kwargs['true']
56 |             del kwargs['false']
57 |         self.sqlalchemy_column = self.init_sqlalchemy_column(Boolean, **kwargs)
58 | 


--------------------------------------------------------------------------------
/src/pandas_oop/models.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from typing import List
  3 | import logging
  4 | 
  5 | import pandas as pd
  6 | from pandas._typing import NDFrameT
  7 | from pandas.io.parsers.readers import TextFileReader
  8 | from pangres import upsert
  9 | import numpy as np
 10 | import typing
 11 | 
 12 | from sqlalchemy import create_engine
 13 | 
 14 | from ._decorators import _decorate_all_methods, _return_custom_df_on_call, sql
 15 | from .custom_exceptions import ValidationError, MissingDecorator, MissingArguments, MissingUniqueField
 16 | from . import Base
 17 | 
 18 | 
 19 | @dataclass
 20 | class DataFrameState:
 21 |     data_types: typing.Optional[list] = None
 22 |     index_list: typing.Optional[list] = None
 23 |     sql: typing.Optional[dict] = None
 24 |     class_name: typing.Optional = None
 25 |     decorated_class: typing.Optional = None
 26 |     sqlalchemy_class: Base = None
 27 | 
 28 | 
 29 | @_decorate_all_methods(_return_custom_df_on_call)
 30 | class DataFrame(pd.DataFrame):
 31 | 
 32 |     def __init__(self, from_df: pd.DataFrame = None, from_csv=None, from_sql_query=None, from_iterator=None, chunksize=None):
 33 |         super().__init__()
 34 |         self._dataframe_state = DataFrameState()
 35 |         self.__is_valide = False
 36 | 
 37 |     def is_valid(self) -> bool:
 38 |         if self._dataframe_state.data_types is None:
 39 |             self.__is_valide = True
 40 |             return self.__is_valide
 41 |         try:
 42 |             for data_type in self._dataframe_state.data_types:
 43 |                 if data_type.base_type not in self[data_type.name].dtype.name:
 44 |                     raise ValidationError(
 45 |                         f"The column {data_type.name} is not of type {data_type.col_obj_series.dtype}")
 46 |             self.__is_valide = True
 47 |             return self.__is_valide
 48 |         except ValidationError as ve:
 49 |             logging.warning(ve.msg)
 50 |             return False
 51 | 
 52 |     def validate(self, from_class=None) -> 'DataFrame':
 53 |         if from_class is not None:
 54 |             self._dataframe_state = from_class().dataframe_state
 55 |         for data_type in self._dataframe_state.data_types:
 56 |             if data_type.str_type == 'datetime64[ns]':
 57 |                 self[data_type.name] = pd.to_datetime(self[data_type.name],
 58 |                                                       format=data_type.col_obj_series.kwargs.get('format'))
 59 |             else:
 60 |                 self[data_type.name] = self[data_type.name].astype(data_type.str_type)
 61 |         self.is_valid()
 62 |         return self
 63 | 
 64 |     def save(self, *args, **kwargs) -> int:
 65 |         self.is_valid()
 66 |         self.is_sql_decorator_missing()
 67 |         if kwargs.get("if_row_exists") is not None:
 68 |             if self._dataframe_state.index_list is None or not self._dataframe_state.index_list:
 69 |                 raise MissingUniqueField(
 70 |                     'Your class must contain one or multiple fields with the parameter "unique=True"')
 71 |             return upsert(df=self.set_index(self._dataframe_state.index_list),
 72 |                           con=self.sql_engine,
 73 |                           table_name=self.sql_table, **kwargs)
 74 |         return self.normal_save(*args, **kwargs)
 75 | 
 76 |     def normal_save(self, *args, **kwargs) -> int:
 77 |         kwargs['name'] = self.sql_table
 78 |         with self.sql_engine.connect() as con:
 79 |             kwargs['con'] = con
 80 |             if kwargs.get('if_exists') is None:
 81 |                 kwargs['if_exists'] = 'append'
 82 |             elif kwargs.get('if_exists') == 'replace':
 83 |                 raise TypeError(f'got an unexpected value "if_exists=replace". Please use a normal pandas dataframe '
 84 |                                 f'to access this functionality')
 85 |             if kwargs.get('index') is None:
 86 |                 kwargs['index'] = False
 87 |             elif kwargs.get('index') is True:
 88 |                 return self.set_index(self._dataframe_state.index_list).to_sql(*args, **kwargs)
 89 |             return self.to_sql(*args, **kwargs)
 90 | 
 91 |     def is_sql_decorator_missing(self) -> None:
 92 |         if self._dataframe_state.sql is None:
 93 |             raise MissingDecorator("You have to decorate your class with models.sql")
 94 |         for key in self._dataframe_state.sql.keys():
 95 |             if self._dataframe_state.sql.get(key) is None:
 96 |                 raise MissingArguments("Missing arguments on models.sql decorator")
 97 | 
 98 |     def _take_with_is_copy(self, indices, axis=0) -> NDFrameT:
 99 |         """
100 |         Internal version of the `take` method that sets the `_is_copy`
101 |         attribute to keep track of the parent dataframe (using in indexing
102 |         for the SettingWithCopyWarning).
103 | 
104 |         See the docstring of `take` for full explanation of the parameters.
105 |         """
106 |         result = self.generic_overrider(self.take(indices=indices, axis=axis), self)
107 |         # Maybe set copy if we didn't actually change the index.
108 |         if not result._get_axis(axis).equals(self._get_axis(axis)):
109 |             result._set_is_copy(self)
110 |         return result
111 | 
112 |     def _slice(self, slobj: slice, axis=0) -> NDFrameT:
113 |         """
114 |         Construct a slice of this container.
115 | 
116 |         Slicing with this method is *always* positional.
117 |         """
118 |         assert isinstance(slobj, slice), type(slobj)
119 |         axis = self._get_block_manager_axis(axis)
120 |         result = self._constructor(self._mgr.get_slice(slobj, axis=axis))
121 |         result = result.__finalize__(self)
122 | 
123 |         # this could be a view
124 |         # but only in a single-dtyped view sliceable case
125 |         is_copy = axis != 0 or result._is_view
126 |         result._set_is_copy(self, copy=is_copy)
127 |         return self.generic_overrider(result, self)
128 | 
129 |     @classmethod
130 |     def generic_overrider(cls, df: pd.DataFrame, ct_df: 'DataFrame') -> 'DataFrame':
131 |         new_custom_df = cls()
132 |         new_custom_df._dataframe_state = ct_df.dataframe_state
133 |         for col_name in df.columns:
134 |             new_custom_df[col_name] = df[col_name]
135 |         return new_custom_df
136 | 
137 |     @property
138 |     def dataframe_state(self):
139 |         return self._dataframe_state
140 | 
141 |     @property
142 |     def sql_engine(self):
143 |         return self._dataframe_state.sql.get('con').sql_engine
144 | 
145 |     @property
146 |     def sql_table(self):
147 |         return self._dataframe_state.sql.get('table')
148 | 
149 |     def __str__(self):
150 |         return self._dataframe_state.class_name
151 | 
152 | 
153 | @dataclass
154 | class DataTypes:
155 |     name: str
156 |     base_type: str
157 |     str_type: str
158 |     np_type: np.generic
159 |     col_obj_series: pd.Series
160 |     target_name: str
161 | 
162 | 
163 | class Data:
164 |     def __init__(self, decorated_class):
165 |         """
166 |         This init function is called in the class definition
167 |         """
168 |         self.decorated_class = decorated_class
169 |         self.decorated_inst = self.decorated_class()
170 |         self.df: typing.Optional[DataFrame] = None
171 |         self.index_list: typing.Optional[list] = None
172 |         self.sqlalchemy_class = None
173 |         self.data_types: List[DataTypes] = [
174 |             DataTypes(
175 |                 name=attr_key,
176 |                 base_type=attr_val.base_type,
177 |                 str_type=attr_val.str_type,
178 |                 np_type=attr_val.np_type,
179 |                 col_obj_series=getattr(self.decorated_class, attr_key),
180 |                 target_name=attr_val.kwargs.get('target_name') if attr_val.kwargs.get('target_name') is not None else attr_key
181 |             )
182 |             for attr_key, attr_val in self.decorated_class.__dict__.items()
183 |             if not attr_key.startswith('__') and not attr_key.endswith('__')]
184 | 
185 |     """
186 |     Between them is called the sql decorator in the _decorators.py file
187 |     """
188 |     def __call__(self, *args, **kwargs) -> DataFrame:
189 |         """
190 |         This call function is called in the class instantiation
191 |         """
192 |         self.init_new_custom_df()
193 | 
194 |         if kwargs.get('from_df') is not None:
195 |             return self._validate_kwargs(**kwargs)
196 |         if kwargs.get('from_csv') is not None:
197 |             return self._validate_from_csv_kwarg(**kwargs)
198 |         if kwargs.get('from_iterator') is not None:
199 |             return self._validate_from_iterator_kwarg(**kwargs)
200 |         if kwargs.get('from_sql_query') is not None:
201 |             self.df.is_sql_decorator_missing()
202 |             with self.df.sql_engine.connect() as con:
203 |                 kwargs['con'] = con
204 |                 return self._validate_from_sql_query_kwarg(**kwargs)
205 |         for data_type in self.data_types:
206 |             self.df[data_type.name] = data_type.col_obj_series
207 |         return self.df
208 | 
209 |     def _validate_from_csv_kwarg(self, **kwargs) -> DataFrame:
210 |         kwargs['filepath_or_buffer'] = kwargs.pop('from_csv')
211 |         return self._validate_kwargs(func=pd.read_csv, **kwargs)
212 | 
213 |     def _validate_from_sql_query_kwarg(self, **kwargs) -> DataFrame:
214 |         kwargs['sql'] = kwargs.pop('from_sql_query')
215 |         return self._validate_kwargs(func=pd.read_sql_query, **kwargs)
216 | 
217 |     def _validate_from_iterator_kwarg(self, **kwargs) -> DataFrame:
218 |         data = []
219 |         for row in kwargs.get('from_iterator')():
220 |             data.append(row)
221 |         kwargs['columns'] = [data_type.name for data_type in self.data_types]
222 |         kwargs['data'] = data
223 |         kwargs.pop('from_iterator')
224 |         return self._validate_kwargs(func=self.create_df_from_data_and_columns, **kwargs)
225 | 
226 |     def _validate_kwargs(self, func=None, **kwargs):
227 |         col_type = {}
228 |         bool_validator = {}
229 |         for index, data_type in enumerate(self.data_types):
230 | 
231 |             if data_type.str_type == 'datetime64[ns]':
232 |                 if 'parse_dates' in kwargs.keys():
233 |                     kwargs['parse_dates'].append(data_type.target_name)
234 |                 else:
235 |                     kwargs['parse_dates'] = [data_type.target_name]
236 |                 continue
237 | 
238 |             if data_type.str_type == 'bool' and data_type.col_obj_series.true_or_false is not None:
239 |                 bool_validator[data_type.name] = data_type.col_obj_series.true_or_false
240 | 
241 |             col_type[data_type.name] = data_type.np_type
242 |         if kwargs.get('from_df') is not None:
243 |             df = kwargs.get('from_df')
244 |         else:
245 |             df = func(**kwargs)
246 |         if isinstance(df, TextFileReader):
247 |             return self.df_generator(df, bool_validator)
248 |         self.build_custom_df(df, bool_validator)
249 |         return self.df
250 | 
251 |     def df_generator(self, df, bool_validator):
252 |         for chunk in df:
253 |             self.init_new_custom_df()
254 |             self.build_custom_df(chunk, bool_validator)
255 |             yield self.df
256 | 
257 |     def build_custom_df(self, df, bool_validator):
258 |         # Convert bool values
259 |         for col_name, bool_val_dict in bool_validator.items():
260 |             df[col_name] = df[col_name].map(bool_val_dict)
261 | 
262 |         for data_type in self.data_types:
263 |             self.df[data_type.name] = df[data_type.target_name]
264 | 
265 |     def init_new_custom_df(self):
266 |         self.df = DataFrame()
267 |         self.df.dataframe_state.decorated_class = self.decorated_class
268 |         self.df.dataframe_state.class_name = self.decorated_class.__name__
269 |         self.df.dataframe_state.data_types = self.data_types
270 |         self.df.dataframe_state.index_list = self.index_list
271 |         if hasattr(self, 'sql'):
272 |             self.df.dataframe_state.sql = self.sql
273 |             self.df.dataframe_state.sql['table'] = self.sql.get('table')
274 |             self.df.dataframe_state.sqlalchemy_class = self.sqlalchemy_class
275 | 
276 |     @staticmethod
277 |     def create_df_from_data_and_columns(**kwargs) -> pd.DataFrame:
278 |         return pd.DataFrame(data=kwargs.get('data'), columns=kwargs.get('columns'))
279 | 
280 | 
281 | class Connection:
282 |     def __init__(self, con_string):
283 |         self.sql_engine = create_engine(con_string)
284 | 
285 | 
286 | _trust = sql
287 | 


--------------------------------------------------------------------------------
/static/data/cars.csv:
--------------------------------------------------------------------------------
1 | name;model;random_string
2 | 206;Peugeot;aaaa
3 | Clio;Renault;bbbb
4 | M6;BMW;zzzz


--------------------------------------------------------------------------------
/static/data/lot_of_people.csv:
--------------------------------------------------------------------------------
1 | name;age;money;insertion_date;is_staff
2 | John;15;13.6;2005-02-25;yes
3 | Snow;40;6.7;2005-02-25;no
4 | Marie;17;6.9;2005-02-25;yes
5 | Youpa;17;6.9;2005-02-25;no
6 | Loulou;17;6.9;2005-02-25;yes
7 | Miro;17;6.9;2005-02-25;no
8 | Mushu;17;6.9;2005-02-25;yes


--------------------------------------------------------------------------------
/static/data/people.csv:
--------------------------------------------------------------------------------
1 | name;age;money;insertion_date;is_staff
2 | John;15;13.6;2005-02-25;yes
3 | Snow;40;6.7;2005-02-25;no
4 | 


--------------------------------------------------------------------------------
/static/data/people_jobs.csv:
--------------------------------------------------------------------------------
1 | name;job
2 | John;Developer
3 | Snow;RH


--------------------------------------------------------------------------------
/static/images/df.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/static/images/df.png


--------------------------------------------------------------------------------
/static/images/poop.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/static/images/poop.jpg


--------------------------------------------------------------------------------
/static/images/poop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/static/images/poop.png


--------------------------------------------------------------------------------
/static/images/poop_sticker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/static/images/poop_sticker.png


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MayasMess/pandas-oop/960f024a777b0ec91dbbfcfeaf15bddd5ba590dc/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_dataframe_behavior.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | import pandas as pd
  3 | import numpy as np
  4 | from pandas import Timestamp
  5 | 
  6 | from src.pandas_oop.models import DataFrame
  7 | from tests.test_models_declaration import People, PeopleNoTable, PEOPLE_DATA_FILE, PeopleFromDatabase, \
  8 |     PeopleFromDatabaseWithoutBoolArgs, PEOPLE2_DATA_FILE, PeopleJobs, UniqueCars, MergedPeople, retrieve_people, \
  9 |     PeopleFromIterator, PeopleDeclaredWithDifferentFields, LOT_OF_PEOPLE_DATA_FILE, PeopleTwoColumns
 10 | 
 11 | 
 12 | class TestDataframeBehavior(TestCase):
 13 | 
 14 |     def test_instance_is_dataframe(self):
 15 |         people = People()
 16 |         self.assertIsInstance(people, pd.DataFrame, "Not an instance of pandas dataframe")
 17 | 
 18 |     def test_object_is_not_singleton(self):
 19 |         people_1 = People()
 20 |         people_2 = People()
 21 |         self.assertIsNot(people_2, people_1)
 22 | 
 23 |     def test_dataframe_has_only_declared_columns(self):
 24 |         people = PeopleTwoColumns(from_csv=PEOPLE_DATA_FILE, delimiter=";")
 25 |         self.assertEqual(['name', 'age'], list(people.columns))
 26 | 
 27 |     def test_instance_is_dataframe_no_table(self):
 28 |         people = PeopleNoTable()
 29 |         self.assertIsInstance(people, pd.DataFrame, "Not an instance of pandas dataframe")
 30 | 
 31 |     def test_append_list_to_one_column(self):
 32 |         people = People()
 33 |         people.name = self.name_list
 34 |         people.age = self.age_list
 35 |         people.money = self.money_list
 36 |         people.insertion_date = self.insertion_date_list
 37 |         people.is_staff = self.is_staff_list
 38 | 
 39 |         result = people.to_dict()
 40 | 
 41 |         self.assertEqual(result, self.expected_result)
 42 | 
 43 |     def test_from_csv(self):
 44 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
 45 |         result = people.to_dict()
 46 |         self.assertEqual(result, self.expected_result)
 47 |         self.assertEqual(people.insertion_date.dtype.type, np.datetime64, "Column is not a date")
 48 | 
 49 |     def test_from_sql_query(self):
 50 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
 51 |         people.sql_engine.execute('delete from people')
 52 |         people.save()
 53 |         people_from_db = PeopleFromDatabase(from_sql_query='select * from people')
 54 |         self.assertEqual(people_from_db.to_dict(), people.to_dict())
 55 | 
 56 |     def test_from_sql_query_without_bool_args(self):
 57 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
 58 |         people.sql_engine.execute('delete from people')
 59 |         people.save()
 60 |         people_from_db = PeopleFromDatabaseWithoutBoolArgs(from_sql_query='select * from people')
 61 |         self.assertEqual(people_from_db.to_dict(), people.to_dict())
 62 | 
 63 |     def test_dataframe_is_valid(self):
 64 |         people = People()
 65 |         people.name = self.name_list
 66 |         people.age = self.age_list
 67 |         people.money = self.money_list
 68 |         people.insertion_date = self.insertion_date_list
 69 |         people.is_staff = self.is_staff_list
 70 |         self.assertTrue(people.is_valid())
 71 | 
 72 |     def test_dataframe_validate(self):
 73 |         people = People()
 74 |         people.name = self.name_list
 75 |         people.age = self.age_list
 76 |         people.money = self.money_list
 77 |         people.insertion_date = self.string_insertion_date_list
 78 |         people.is_staff = self.is_staff_list
 79 |         people.validate()
 80 |         people.save()
 81 |         self.assertTrue(people.is_valid())
 82 | 
 83 |     def test_dataframe_is_not_valid(self):
 84 |         people = People()
 85 |         people.name = self.name_list
 86 |         people.age = self.age_list
 87 |         people.money = self.money_list
 88 |         people.insertion_date = self.string_insertion_date_list
 89 |         people.is_staff = self.is_staff_list
 90 |         self.assertFalse(people.is_valid())
 91 | 
 92 |     def test_isnull_return_custom_df(self):
 93 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";").isnull()
 94 |         self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when isnull is called')
 95 | 
 96 |     def test_head_return_custom_df(self):
 97 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";").head(1)
 98 |         self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when head is called')
 99 | 
100 |     def test_abs_return_custom_df(self):
101 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
102 |         people.name = [3, -7]
103 |         people.insertion_date = [3, -7]
104 |         people.is_staff = [3, -7]
105 |         people = people.abs()
106 |         self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when abs is called')
107 | 
108 |     def test_merge_return_custom_df(self):
109 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
110 |         people2 = PeopleJobs(from_csv=PEOPLE2_DATA_FILE, delimiter=";")
111 |         merged_result = people.merge(people2, on='name')
112 |         self.assertIsInstance(merged_result, DataFrame, 'Not a custom dataframe when abs is called')
113 |         self.assertEqual(merged_result.to_dict(), self.expected_merged_result)
114 | 
115 |     def test_validate_accept_argument(self):
116 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
117 |         people_jobs = PeopleJobs(from_csv=PEOPLE2_DATA_FILE, delimiter=";")
118 |         merged_result = people.merge(people_jobs, on='name').validate(from_class=MergedPeople)
119 |         self.assertEqual(str(merged_result), 'MergedPeople')
120 | 
121 |     def test_transform_df_to_custom_df_from_class_instantiation(self):
122 |         data = pd.read_csv(filepath_or_buffer=PEOPLE_DATA_FILE, delimiter=";")
123 |         people = People(from_df=data)
124 |         self.assertEqual(str(people), 'People')
125 | 
126 |     def test_populate_from_iterator(self):
127 |         people = PeopleFromIterator(from_iterator=retrieve_people)
128 |         self.assertEqual(people.shape, (1000, 5))
129 |         self.assertTrue(people.is_valid())
130 | 
131 |     def test_dataframe_has_column_name_declared(self):
132 |         people = PeopleDeclaredWithDifferentFields(from_csv=PEOPLE_DATA_FILE, delimiter=";")
133 |         self.assertEqual(list(people.columns), ['name_test', 'age', 'money_test', 'insertion_date_test', 'is_staff'])
134 | 
135 |     def test_slicing_return_custom_df(self):
136 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
137 |         people = people[people.name == 'John']
138 |         self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when slicing is performed')
139 | 
140 |     def test_when_loc_is_performed(self):
141 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
142 |         people = people.loc[people.name == 'John']
143 |         self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when loc is performed')
144 | 
145 |     def test_when_loc_set_value_is_performed(self):
146 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
147 |         people.loc[people.name == 'John'] = ('Marie', 15, 15.0, Timestamp('2005-02-25'), True)
148 |         self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when loc set value is performed')
149 | 
150 |     def test_when_loc_slice_indexing_is_performed(self):
151 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
152 |         people = people[:1]
153 |         self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when loc set value is performed')
154 | 
155 |     def test_multi_loc_conditions(self):
156 |         people = People(from_csv=LOT_OF_PEOPLE_DATA_FILE, delimiter=";")
157 |         people = people.loc[(people.age < 18) & (people.name.str.startswith("M"))]
158 |         self.assertEqual(people.shape, (3, 5))
159 |         self.assertIsInstance(people, DataFrame, 'Not a custom dataframe when loc multiple conditions is performed')
160 | 
161 |     def test_chunksize(self):
162 |         for people_chunk in People(from_csv=LOT_OF_PEOPLE_DATA_FILE, delimiter=";", chunksize=2):
163 |             self.assertIsInstance(people_chunk, DataFrame, 'Not a custom dataframe when chunksize')
164 | 
165 |     def setUp(self):
166 |         # Old school creation
167 |         self.old_school_df = pd.DataFrame({'name': pd.Series(dtype='O'),
168 |                                            'age': pd.Series(dtype='int'),
169 |                                            'money': pd.Series(dtype='float'),
170 |                                            'insertion_date': pd.Series(dtype='datetime64[ns]'),
171 |                                            'is_staff': pd.Series(dtype='bool')})
172 |         self.old_school_read_csv_df = pd.read_csv(PEOPLE_DATA_FILE, delimiter=';', parse_dates=['insertion_date'])
173 |         self.old_school_read_csv_df['is_staff'] = self.old_school_read_csv_df['is_staff'].map({'yes': True,
174 |                                                                                                'no': False})
175 | 
176 |         # Test variable for new creation
177 |         self.name_list = ["John", "Snow"]
178 |         self.age_list = [15, 40]
179 |         self.money_list = [13.6, 6.7]
180 |         self.insertion_date_list = [Timestamp('2005-02-25'), Timestamp('2005-02-25')]
181 |         self.is_staff_list = [True, False]
182 | 
183 |         self.expected_result = {
184 |             'name': {
185 |                 0: 'John',
186 |                 1: 'Snow'
187 |             },
188 |             'age': {
189 |                 0: 15,
190 |                 1: 40
191 |             },
192 |             'money': {
193 |                 0: 13.6,
194 |                 1: 6.7
195 |             },
196 |             'insertion_date': {
197 |                 0: Timestamp('2005-02-25'),
198 |                 1: Timestamp('2005-02-25')
199 |             },
200 |             'is_staff': {
201 |                 0: True,
202 |                 1: False
203 |             },
204 |         }
205 | 
206 |         self.expected_merged_result = {
207 |             'name': {
208 |                 0: 'John',
209 |                 1: 'Snow'
210 |             },
211 |             'age': {
212 |                 0: 15,
213 |                 1: 40
214 |             },
215 |             'money': {
216 |                 0: 13.6,
217 |                 1: 6.7
218 |             },
219 |             'insertion_date': {
220 |                 0: Timestamp('2005-02-25'),
221 |                 1: Timestamp('2005-02-25')
222 |             },
223 |             'is_staff': {
224 |                 0: True,
225 |                 1: False
226 |             },
227 |             'job': {
228 |                 0: 'Developer',
229 |                 1: 'RH'
230 |             },
231 |         }
232 | 
233 |         self.string_insertion_date_list = ['25-02-2005', '25-02-2005']
234 | 


--------------------------------------------------------------------------------
/tests/test_db_migrations_and_sqlalchemy_behavior.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from unittest import TestCase
 3 | 
 4 | from sqlalchemy import Column, Integer, Text
 5 | from sqlalchemy.exc import IntegrityError
 6 | 
 7 | from src.pandas_oop import Base
 8 | from src.pandas_oop import models
 9 | from src.pandas_oop.fields import StringColumn, IntegerColumn, FloatColumn
10 | 
11 | ABS_PATH = Path(__file__).resolve().parent.parent
12 | DB_CONNECTION = models.Connection(f'sqlite:///{ABS_PATH}/db/migrations.db')
13 | 
14 | 
15 | class Contact(models.Base):
16 |     __tablename__ = 'T_Contacts'
17 | 
18 |     id = Column(Integer, primary_key=True)
19 |     firstName = Column(Text)
20 |     lastName = Column(Text)
21 | 
22 | 
23 | @models.sql(table="people_migrations", con=DB_CONNECTION)
24 | @models.Data
25 | class PeopleMigrations(models.DataFrame):
26 |     name = StringColumn()
27 | 
28 | 
29 | @models.sql(table="people_migrations_with_pk", con=DB_CONNECTION)
30 | @models.Data
31 | class PeopleMigrationsWithPrimaryKey(models.DataFrame):
32 |     name = StringColumn(unique=True)
33 |     age = IntegerColumn()
34 | 
35 | 
36 | @models.sql(table="people_migrations_with_multiple_pk", con=DB_CONNECTION)
37 | @models.Data
38 | class PeopleMigrationsWithMultiplePrimaryKey(models.DataFrame):
39 |     name = StringColumn(unique=True)
40 |     age = IntegerColumn(unique=True)
41 |     money = FloatColumn()
42 | 
43 | 
44 | class TestMigrations(TestCase):
45 | 
46 |     def test_custom_dataframe_is_detected_as_sqlalchemy_class(self):
47 |         detected_tables = [table.fullname for table in Base.metadata.sorted_tables]
48 |         self.assertIn('people_migrations', detected_tables)
49 | 
50 |     def test_save_without_pk_no_error(self):
51 |         people = PeopleMigrations()
52 |         people.name = ['John', 'Snow', 'Armin']
53 |         people.save()
54 |         people.sql_engine.execute('delete from people_migrations')
55 | 
56 |     def test_save_with_pk(self):
57 |         people = PeopleMigrationsWithPrimaryKey()
58 |         people.sql_engine.execute('delete from people_migrations_with_pk')
59 |         people.name = ['John', 'Snow', 'Armin']
60 |         people.age = [17, 28, 39]
61 |         people.save()
62 |         self.assertRaises(IntegrityError, people.save)
63 | 


--------------------------------------------------------------------------------
/tests/test_models_declaration.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | from pathlib import Path
  3 | 
  4 | from pandas import Timestamp
  5 | from sqlalchemy.ext.declarative import declarative_base
  6 | from src.pandas_oop import models
  7 | from src.pandas_oop.fields import StringColumn, IntegerColumn, FloatColumn, DateColumn, BoolColumn
  8 | 
  9 | Base = declarative_base()
 10 | 
 11 | ABS_PATH = Path(__file__).resolve().parent.parent
 12 | # DB_CONNECTION = models.Connection(':memory:')
 13 | DB_CONNECTION = models.Connection(f'sqlite:///{ABS_PATH}/db/pandas_oop.db')
 14 | PEOPLE_DATA_FILE = ABS_PATH / 'static/data/people.csv'
 15 | PEOPLE2_DATA_FILE = ABS_PATH / 'static/data/people_jobs.csv'
 16 | LOT_OF_PEOPLE_DATA_FILE = ABS_PATH / 'static/data/lot_of_people.csv'
 17 | CARS_DATA_FILE = ABS_PATH / 'static/data/cars.csv'
 18 | 
 19 | 
 20 | @models.Data
 21 | class PeopleNoTable(models.DataFrame):
 22 |     name = StringColumn()
 23 |     age = IntegerColumn()
 24 |     money = FloatColumn()
 25 |     insertion_date = DateColumn()
 26 |     is_staff = BoolColumn(true='yes', false='no')
 27 | 
 28 | 
 29 | @models.Data
 30 | class PeopleTwoColumns(models.DataFrame):
 31 |     name = StringColumn()
 32 |     age = IntegerColumn()
 33 | 
 34 | 
 35 | @models.sql(table='people', con=DB_CONNECTION)
 36 | @models.Data
 37 | class People(models.DataFrame):
 38 |     name = StringColumn()
 39 |     age = IntegerColumn()
 40 |     money = FloatColumn()
 41 |     insertion_date = DateColumn(format='%d-%m-%Y')
 42 |     is_staff = BoolColumn(true='yes', false='no')
 43 | 
 44 | 
 45 | @models.Data
 46 | class PeopleJobs(models.DataFrame):
 47 |     name = StringColumn()
 48 |     job = StringColumn()
 49 | 
 50 | 
 51 | @models.Data
 52 | class MergedPeople(models.DataFrame):
 53 |     name = StringColumn()
 54 |     age = IntegerColumn()
 55 |     money = FloatColumn()
 56 |     insertion_date = DateColumn(format='%d-%m-%Y')
 57 |     is_staff = BoolColumn(true='yes', false='no')
 58 |     job = StringColumn()
 59 | 
 60 | 
 61 | @models.sql(table='people_numeric_bool', con=DB_CONNECTION)
 62 | @models.Data
 63 | class PeopleFromDatabase(models.DataFrame):
 64 |     name = StringColumn()
 65 |     age = IntegerColumn()
 66 |     money = FloatColumn()
 67 |     insertion_date = DateColumn()
 68 |     is_staff = BoolColumn(true=1, false=0)
 69 | 
 70 | 
 71 | @models.sql(table='people_from_db', con=DB_CONNECTION)
 72 | @models.Data
 73 | class PeopleFromDatabaseWithoutBoolArgs(models.DataFrame):
 74 |     name = StringColumn()
 75 |     age = IntegerColumn()
 76 |     money = FloatColumn()
 77 |     insertion_date = DateColumn()
 78 |     is_staff = BoolColumn()
 79 | 
 80 | 
 81 | @models.sql(table='cars', con=DB_CONNECTION)
 82 | @models.Data
 83 | class UniqueCars(models.DataFrame):
 84 |     name = StringColumn(unique=True)
 85 |     model = StringColumn(unique=True)
 86 |     random_string = StringColumn()
 87 | 
 88 | 
 89 | @models.sql(table='people_from_iter', con=DB_CONNECTION)
 90 | @models.Data
 91 | class PeopleFromIterator(models.DataFrame):
 92 |     name = StringColumn()
 93 |     age = IntegerColumn()
 94 |     money = FloatColumn()
 95 |     insertion_date = DateColumn()
 96 |     is_staff = BoolColumn()
 97 | 
 98 | 
 99 | @models.Data
100 | class PeopleDeclaredWithDifferentFields(models.DataFrame):
101 |     name_test = StringColumn(target_name='name')
102 |     age = IntegerColumn()
103 |     money_test = FloatColumn(target_name='money')
104 |     insertion_date_test = DateColumn(target_name="insertion_date")
105 |     is_staff = BoolColumn()
106 | 
107 | 
108 | def retrieve_people():
109 |     for x in range(1000):
110 |         yield "John", x, 50.0, Timestamp("2005-02-02"), True
111 | 


--------------------------------------------------------------------------------
/tests/test_sql_operations.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import random
 3 | from unittest import TestCase
 4 | from pandas import Timestamp
 5 | 
 6 | from src.pandas_oop.custom_exceptions import MissingDecorator, MissingUniqueField
 7 | from tests.test_models_declaration import PeopleNoTable, PEOPLE_DATA_FILE, People, PeopleFromDatabase, UniqueCars, \
 8 |     CARS_DATA_FILE
 9 | 
10 | 
11 | class TestSqlOperations(TestCase):
12 | 
13 |     def test_missing_sql_decorator_error(self):
14 |         people = PeopleNoTable(from_csv=PEOPLE_DATA_FILE, delimiter=";")
15 |         if people.is_valid():
16 |             self.assertRaises(MissingDecorator, people.save, if_exists='replace', index=False)
17 | 
18 |     def test_table_attribute(self):
19 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
20 |         if people.is_valid():
21 |             people.sql_engine.execute('delete from people')
22 |             people.save()
23 |         people_from_db = PeopleFromDatabase(from_sql_query='select * from people')
24 |         self.assertEqual(people_from_db.to_dict(), people.to_dict())
25 | 
26 |     def test_insert_or_update(self):
27 |         random_string = [self.get_random_string() for _ in range(3)]
28 |         cars = UniqueCars(from_csv=CARS_DATA_FILE, delimiter=";")
29 |         cars.random_string = random_string
30 |         cars.save(if_row_exists='update')
31 |         expected_result = UniqueCars(from_sql_query='select * from cars').random_string.tolist()
32 |         self.assertEqual(random_string, expected_result)
33 | 
34 |     def test_insert_or_ignore(self):
35 |         cars = UniqueCars(from_csv=CARS_DATA_FILE, delimiter=";")
36 |         cars.sql_engine.execute('delete from cars')
37 |         cars.head(2).save()
38 |         cars.save(if_row_exists='ignore')
39 |         expected_result = ['aaaa', 'bbbb', 'zzzz']
40 |         self.assertEqual(expected_result, cars.random_string.tolist())
41 | 
42 |     def test_missing_unique_field(self):
43 |         people = People(from_csv=PEOPLE_DATA_FILE, delimiter=";")
44 |         self.assertRaises(MissingUniqueField, people.save, if_row_exists='update')
45 | 
46 |     def setUp(self):
47 |         # Test variable for new creation
48 |         self.name_list = ["John", "Snow"]
49 |         self.age_list = [15, 40]
50 |         self.money_list = [13.6, 6.7]
51 |         self.insertion_date_list = [Timestamp('2005-02-25'), Timestamp('2005-02-25')]
52 |         self.is_staff_list = [True, False]
53 | 
54 |         self.expected_result = {
55 |             'name': {
56 |                 0: 'John',
57 |                 1: 'Snow'
58 |             },
59 |             'age': {
60 |                 0: 15,
61 |                 1: 40
62 |             },
63 |             'money': {
64 |                 0: 13.6,
65 |                 1: 6.7
66 |             },
67 |             'insertion_date': {
68 |                 0: Timestamp('2005-02-25'),
69 |                 1: Timestamp('2005-02-25')
70 |             },
71 |             'is_staff': {
72 |                 0: True,
73 |                 1: False
74 |             },
75 |         }
76 | 
77 |         self.string_insertion_date_list = ['25-02-2005', '25-02-2005']
78 | 
79 |     @staticmethod
80 |     def get_random_string() -> str:
81 |         return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
82 | 


--------------------------------------------------------------------------------