├── requirements.txt ├── readme_src ├── notebook.png ├── sql_logo.jpg ├── sql_logo2.jpg ├── sql_logo_smaller.png ├── underConstruction.jpg └── notebook_results │ ├── 1.png │ ├── 10.png │ ├── 11.png │ ├── 12.png │ ├── 2.png │ ├── 3.png │ ├── 4.png │ ├── 5.png │ ├── 6.png │ ├── 7.png │ ├── 8.png │ ├── 9.png │ ├── data_sample.png │ └── db_connected.png ├── .readthedocs.yaml ├── docs ├── source │ ├── api.rst │ ├── index.rst │ ├── usage.rst │ └── conf.py ├── publish_cmds.txt ├── Makefile └── make.bat ├── sampleData └── DataBaseBackup │ └── INX.bak ├── pyproject.toml ├── README.rst ├── LICENSE ├── setup.py ├── .gitignore ├── README.md ├── example_notebook └── SampleNoteBook_edaSQL.ipynb └── edaSQL └── src └── edaSQL.py /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | seaborn 4 | missingno -------------------------------------------------------------------------------- /readme_src/notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook.png -------------------------------------------------------------------------------- /readme_src/sql_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/sql_logo.jpg -------------------------------------------------------------------------------- /readme_src/sql_logo2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/sql_logo2.jpg -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: "ubuntu-20.04" 5 | tools: 6 | python: "3.8" 7 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | .. autosummary:: 5 | :toctree: generated 6 | 7 | lumache 8 | -------------------------------------------------------------------------------- /readme_src/sql_logo_smaller.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/sql_logo_smaller.png -------------------------------------------------------------------------------- /readme_src/underConstruction.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/underConstruction.jpg -------------------------------------------------------------------------------- /readme_src/notebook_results/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/1.png -------------------------------------------------------------------------------- /readme_src/notebook_results/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/10.png -------------------------------------------------------------------------------- /readme_src/notebook_results/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/11.png -------------------------------------------------------------------------------- /readme_src/notebook_results/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/12.png -------------------------------------------------------------------------------- /readme_src/notebook_results/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/2.png -------------------------------------------------------------------------------- /readme_src/notebook_results/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/3.png -------------------------------------------------------------------------------- /readme_src/notebook_results/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/4.png -------------------------------------------------------------------------------- /readme_src/notebook_results/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/5.png -------------------------------------------------------------------------------- /readme_src/notebook_results/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/6.png -------------------------------------------------------------------------------- /readme_src/notebook_results/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/7.png -------------------------------------------------------------------------------- /readme_src/notebook_results/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/8.png -------------------------------------------------------------------------------- /readme_src/notebook_results/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/9.png -------------------------------------------------------------------------------- /sampleData/DataBaseBackup/INX.bak: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/sampleData/DataBaseBackup/INX.bak -------------------------------------------------------------------------------- /readme_src/notebook_results/data_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/data_sample.png -------------------------------------------------------------------------------- /readme_src/notebook_results/db_connected.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/selva221724/edaSQL/HEAD/readme_src/notebook_results/db_connected.png -------------------------------------------------------------------------------- /docs/publish_cmds.txt: -------------------------------------------------------------------------------- 1 | python setup.py sdist bdist_wheel 2 | 3 | python -m twine upload --repository testpypi dist/* 4 | 5 | python -m twine upload dist/* -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["flit_core >=3.2,<4"] 3 | build-backend = "flit_core.buildapi" 4 | 5 | [project] 6 | name = "edaSQL" 7 | authors = [{name = "TamilSelvan", email = "selva221724@gmail.com"}] 8 | dynamic = ["version", "description"] 9 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Template for the Read the Docs tutorial 2 | ======================================= 3 | 4 | This GitHub template includes fictional Python library 5 | with some basic Sphinx docs. 6 | 7 | Read the tutorial here: 8 | 9 | https://docs.readthedocs.io/en/stable/tutorial/ 10 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to edaSQL's documentation! 2 | =================================== 3 | 4 | **edaSQL** is a library to link SQL to Exploratory Data Analysis and further more in the Data Engineering. This will solve many limitations in the SQL studios available in the market. Use the SQL Query language to get your Table Results. 5 | 6 | 7 | Check out the :doc:`usage` section for further information, including 8 | how to :ref:`installation` the project. 9 | 10 | .. note:: 11 | 12 | This project is under active development. 13 | 14 | Contents 15 | -------- 16 | 17 | .. toctree:: 18 | 19 | usage 20 | api 21 | 22 | edaSQL has its documentation hosted on Read the Docs. 23 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/source/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | .. _installation: 5 | 6 | Installation 7 | ------------ 8 | 9 | To use edaSQL, first install it using pip: 10 | 11 | .. code-block:: console 12 | 13 | (.venv) $ pip install edaSQL 14 | 15 | Importing the Package and Iniate the eda object 16 | ---------------- 17 | .. code-block:: python 18 | 19 | import edaSQL 20 | edasql = edaSQL.SQL() 21 | 22 | To retrieve a list of random ingredients, 23 | you can use the ``edaSQL.SQL()`` function: 24 | 25 | .. autofunction:: lumache.get_random_ingredients 26 | 27 | The ``kind`` parameter should be either ``"meat"``, ``"fish"``, 28 | or ``"veggies"``. Otherwise, :py:func:`lumache.get_random_ingredients` 29 | will raise an exception. 30 | 31 | .. autoexception:: lumache.InvalidKindError 32 | 33 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | 3 | # -- Project information 4 | 5 | project = 'edaSQL' 6 | copyright = '2021, TamilSelvan' 7 | author = 'TamilSelvan' 8 | 9 | release = '0.1' 10 | version = '0.1.0' 11 | 12 | # -- General configuration 13 | 14 | extensions = [ 15 | 'sphinx.ext.duration', 16 | 'sphinx.ext.doctest', 17 | 'sphinx.ext.autodoc', 18 | 'sphinx.ext.autosummary', 19 | 'sphinx.ext.intersphinx', 20 | ] 21 | 22 | intersphinx_mapping = { 23 | 'python': ('https://docs.python.org/3/', None), 24 | 'sphinx': ('https://www.sphinx-doc.org/en/master/', None), 25 | } 26 | intersphinx_disabled_domains = ['std'] 27 | 28 | templates_path = ['_templates'] 29 | 30 | # -- Options for HTML output 31 | 32 | html_theme = 'sphinx_rtd_theme' 33 | 34 | # -- Options for EPUB output 35 | epub_show_urls = 'footnote' 36 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Tamil Selvan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | # Read the requirements 7 | with open("requirements.txt",encoding="utf8") as f: 8 | requirements = f.read().splitlines() 9 | 10 | setuptools.setup( 11 | name="edaSQL", # This is the name of the package 12 | version="0.0.1.5", # The initial release version 13 | author="Tamil Selvan A V", # Full name of the author 14 | description="Exploratory Data Analytics tool for SQL", 15 | url="https://github.com/selva221724/edaSQL", 16 | license="MIT", 17 | include_package_data=True, 18 | long_description=long_description, # Long description read from the the readme file 19 | long_description_content_type="text/markdown", 20 | packages=setuptools.find_packages(), # List of all python modules to be installed 21 | classifiers=[ 22 | "Programming Language :: Python :: 3", 23 | "Programming Language :: Python :: 3.6", 24 | "Programming Language :: Python :: 3.7", 25 | "Programming Language :: Python :: 3.8", 26 | "Programming Language :: Python :: 3.9", 27 | "Programming Language :: Python :: 3.10", 28 | "License :: OSI Approved :: MIT License", 29 | "Framework :: IPython", 30 | "Operating System :: OS Independent", 31 | "Intended Audience :: Science/Research", 32 | "Intended Audience :: Developers", 33 | "Intended Audience :: Financial and Insurance Industry", 34 | "Intended Audience :: Healthcare Industry", 35 | "Topic :: Scientific/Engineering", 36 | "Environment :: Console" 37 | ], # Information to filter the project on PyPi website 38 | python_requires='>=3.6', # Minimum version requirement of the package 39 | py_modules=["edaSQL"], # Name of the python package 40 | package_dir={'': 'edaSQL/src'}, # Directory of the source code of the package 41 | install_requires=requirements # Install other dependencies if any 42 | 43 | ) 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,jupyternotebooks 4 | 5 | ### JupyterNotebooks ### 6 | # gitignore template for Jupyter Notebooks 7 | # website: http://jupyter.org/ 8 | 9 | .ipynb_checkpoints 10 | */.ipynb_checkpoints/* 11 | 12 | # IPython 13 | profile_default/ 14 | ipython_config.py 15 | 16 | # Remove previous ipynb_checkpoints 17 | # git rm -r .ipynb_checkpoints/ 18 | 19 | ### Python ### 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | share/python-wheels/ 43 | *.egg-info/ 44 | .installed.cfg 45 | *.egg 46 | MANIFEST 47 | 48 | # PyInstaller 49 | # Usually these files are written by a python script from a template 50 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 51 | *.manifest 52 | *.spec 53 | 54 | # Installer logs 55 | pip-log.txt 56 | pip-delete-this-directory.txt 57 | 58 | # Unit test / coverage reports 59 | htmlcov/ 60 | .tox/ 61 | .nox/ 62 | .coverage 63 | .coverage.* 64 | .cache 65 | nosetests.xml 66 | coverage.xml 67 | *.cover 68 | *.py,cover 69 | .hypothesis/ 70 | .pytest_cache/ 71 | cover/ 72 | 73 | # Translations 74 | *.mo 75 | *.pot 76 | 77 | # Django stuff: 78 | *.log 79 | local_settings.py 80 | db.sqlite3 81 | db.sqlite3-journal 82 | 83 | # Flask stuff: 84 | instance/ 85 | .webassets-cache 86 | 87 | # Scrapy stuff: 88 | .scrapy 89 | 90 | # Sphinx documentation 91 | docs/_build/ 92 | 93 | # PyBuilder 94 | .pybuilder/ 95 | target/ 96 | 97 | # Jupyter Notebook 98 | 99 | # IPython 100 | 101 | # pyenv 102 | # For a library or package, you might want to ignore these files since the code is 103 | # intended to run in multiple environments; otherwise, check them in: 104 | # .python-version 105 | 106 | # pipenv 107 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 108 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 109 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 110 | # install all needed dependencies. 111 | #Pipfile.lock 112 | 113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 114 | __pypackages__/ 115 | 116 | # Celery stuff 117 | celerybeat-schedule 118 | celerybeat.pid 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # Environments 124 | .env 125 | .venv 126 | env/ 127 | venv/ 128 | ENV/ 129 | env.bak/ 130 | venv.bak/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | .dmypy.json 145 | dmypy.json 146 | 147 | # Pyre type checker 148 | .pyre/ 149 | 150 | # pytype static type analyzer 151 | .pytype/ 152 | 153 | # Cython debug symbols 154 | cython_debug/ 155 | 156 | # End of https://www.toptal.com/developers/gitignore/api/python,jupyternotebooks 157 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |
46 |
47 | [Read the detailed documentation in readthedocs.io](https://edasql.readthedocs.io/en/latest/) (still under the development)
48 |
49 | ## License
50 | The license for edaSQL is MIT license
51 |
52 | ## Need help?
53 | Stuck on your edaSQL code or problem? Any other questions? Don't
54 | hestitate to send me an email (selva221724@gmail.com).
55 |
56 | ## edaSQL Jupyter NoteBook Tutorial
57 |
58 | Access the sample Jupyter Notebook [here!!](https://github.com/selva221724/edaSQL/blob/main/example_notebook/SampleNoteBook_edaSQL.ipynb)
59 |
60 | Access the Sample Data Used in this Repo
61 | - [CSV](https://github.com/selva221724/edaSQL/blob/main/sampleData/CSV/INX.csv)
62 | - [DataBase Backup](https://github.com/selva221724/edaSQL/blob/main/sampleData/DataBaseBackup/INX.bak) ( you can restore the DB in SQL Studio )
63 |
64 | **edaSQL for DataFrame:** If you are using the CSV or Excel as a source , Read using the Pandas & start from the [**3. Data Overview**](#Chapter1)
65 |
66 | ### Import Packages
67 | ```python
68 | import edaSQL
69 | import pandas as pd
70 | ```
71 |
72 | ### 1. Connect to the DataBase
73 | ```python
74 | edasql = edaSQL.SQL()
75 | edasql.connectToDataBase(server='your server name',
76 | database='your database',
77 | user='username',
78 | password='password',
79 | sqlDriver='ODBC Driver 17 for SQL Server')
80 | ```
81 |
82 |
83 |
84 | ### 2. Query Data
85 | ```python
86 | sampleQuery = "select * from INX"
87 | data = pd.read_sql(sampleQuery, edasql.dbConnection)
88 | ```
89 |
90 |
91 |
92 |
93 | ### 3. Data Overview
94 | ```python
95 | insights = edaSQL.EDA(dataFrame=data,HTMLDisplay=True)
96 | dataInsights =insights.dataInsights()
97 | ```
98 |
99 |
100 |
101 | ```python
102 | deepInsights = insights.deepInsights()
103 | ```
104 |
105 |
106 | ### 4. Correlation
107 | ```python
108 | eda = edaSQL.EDA(dataFrame=data)
109 | eda.pearsonCorrelation()
110 | ```
111 |
112 |
113 | ```python
114 | eda.spearmanCorrelation()
115 | ```
116 |
117 |
118 | ```python
119 | eda.kendallCorrelation()
120 | ```
121 |
122 |
123 | ### 5. Missing Values
124 |
125 | ```python
126 | eda.missingValuesPlot(plot ='matrix')
127 | ```
128 |
129 |
130 | ```python
131 | eda.missingValuesPlot(plot ='bar')
132 | ```
133 |
134 |
135 | ```python
136 | eda.missingValuesPlot(plot ='heatmap')
137 | ```
138 |
139 |
140 | ```python
141 | eda.missingValuesPlot(plot ='dendrogram')
142 | ```
143 |
144 |
145 | ### 6. Outliers
146 |
147 | ```python
148 | eda.outliersVisualization(plot = 'box')
149 | ```
150 |
151 |
152 | ```python
153 | eda.outliersVisualization(plot = 'scatter')
154 | ```
155 |
156 |
157 | ```python
158 | outliers = eda.getOutliers()
159 | ```
160 |
161 |
162 |
163 |
--------------------------------------------------------------------------------
/example_notebook/SampleNoteBook_edaSQL.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "ad11b57b",
6 | "metadata": {},
7 | "source": [
8 | "## Import Packages"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "id": "83529769",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import edaSQL\n",
19 | "import pandas as pd"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "id": "342b6675",
25 | "metadata": {},
26 | "source": [
27 | "
'))
96 |
97 | if self.printAll:
98 | print('========== Connected to DataBase Successfully ===========')
99 | print('Server: ', self.server)
100 | print('DataBase: ', self.database)
101 | print('User : ', self.user)
102 | print('Password : ', self.password)
103 |
104 | self.cursor = self.dbConnection.cursor()
105 |
106 |
107 | class EDA:
108 |
109 | def __init__(self, dataFrame, HTMLDisplay=True):
110 | self.dataFrame = dataFrame
111 | self.HTMLDisplay = HTMLDisplay
112 | pass
113 |
114 | def getInfo(self):
115 | pass
116 |
117 | def dataInsights(self, printAll=True) -> dict:
118 | dataOverview = {
119 | 'Dataset Overview':
120 | {
121 | 'Number of Columns': str(len(self.dataFrame.columns)),
122 | 'Number of Rows': str(len(self.dataFrame)),
123 | 'Overall Missing cells': str(self.dataFrame.isnull().sum().sum()),
124 | 'Overall Missing cells (%)': str(
125 | round((self.dataFrame.isnull().sum().sum() / self.dataFrame.notnull().sum().sum()) * 100, 2)),
126 | 'Duplicate rows': str(self.dataFrame.duplicated().sum()),
127 | 'Duplicate rows (%)': str(round(self.dataFrame.duplicated().sum() / len(self.dataFrame) * 100, 2)),
128 | },
129 | 'Types of Columns':
130 | {
131 | 'Numeric': str(len(self.dataFrame.select_dtypes("number").columns)),
132 | 'Categorical': str(len(self.dataFrame.select_dtypes("object").columns)),
133 | 'Date and Time': str(len(self.dataFrame.select_dtypes("datetime64").columns))
134 | }
135 | }
136 |
137 | if self.HTMLDisplay and printAll:
138 | displayContent = """"""
139 | for parent, child in dataOverview.items():
140 | tempDisplay = ''
141 | tempDisplay += '\n\n\n\n\n\n\n| ' + cat + ' : | ' + value + ' |
|---|
| ' + mapper[parent][0] + ' | ' + mapper[parent][1] + ' | ' 192 | for cat, value in child.items(): 193 | tempDisplay += '
|---|---|
| ' + cat + ' | ' + value + ' |
| ' + cols + ' |