├── TPOT
    └── Capture.JPG
├── H2O.ai
    └── 2020-05-31 (7).png
├── AutoSklearn
    └── diabetes.pickle
├── Auto_TS
    ├── auto_ts
    │   ├── models
    │   │   ├── ar_based
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── var.cpython-37.pyc
    │   │   │   │   ├── arima.cpython-37.pyc
    │   │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   │   ├── sarimax.cpython-37.pyc
    │   │   │   │   └── param_finder.cpython-37.pyc
    │   │   │   ├── var.py
    │   │   │   ├── param_finder.py
    │   │   │   ├── sarimax.py
    │   │   │   └── arima.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── build_ml.cpython-37.pyc
    │   │   │   └── build_prophet.cpython-37.pyc
    │   │   ├── __init__.py
    │   │   ├── build_pyflux.py
    │   │   ├── build_prophet.py
    │   │   └── build_ml.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-37.pyc
    │   └── utils
    │   │   ├── __pycache__
    │   │       ├── eda.cpython-37.pyc
    │   │       ├── etl.cpython-37.pyc
    │   │       ├── val.cpython-37.pyc
    │   │       ├── colors.cpython-37.pyc
    │   │       ├── __init__.cpython-37.pyc
    │   │       └── metrics.cpython-37.pyc
    │   │   ├── colors.py
    │   │   ├── __init__.py
    │   │   ├── metrics.py
    │   │   ├── etl.py
    │   │   ├── val.py
    │   │   └── eda.py
    ├── requirements.txt
    ├── setup.py
    └── example_datasets
    │   └── Sales_and_Marketing.csv
├── MLBox
    └── README.rst
├── lazy-predict
    ├── README.md
    ├── Data.csv
    └── LazyPredict-notebook.ipynb
├── AutoGluon
    ├── README.rst
    ├── AutoGluon.ipynb
    └── Patient_data.csv
├── README.md
├── Transmogrif_AI
    ├── README.rst
    └── TransmogrifAI.ipynb
├── .gitignore
├── AutoViz
    └── AutoViz.ipynb
└── Auto_ViML
    └── Patient_data.csv


/TPOT/Capture.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/TPOT/Capture.JPG


--------------------------------------------------------------------------------
/H2O.ai/2020-05-31 (7).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/H2O.ai/2020-05-31 (7).png


--------------------------------------------------------------------------------
/AutoSklearn/diabetes.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/AutoSklearn/diabetes.pickle


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/ar_based/__init__.py:
--------------------------------------------------------------------------------
1 | from .arima import build_arima_model
2 | from .sarimax import build_sarimax_model
3 | from .var import build_var_model
4 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/__pycache__/eda.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/utils/__pycache__/eda.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/__pycache__/etl.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/utils/__pycache__/etl.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/__pycache__/val.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/utils/__pycache__/val.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/__pycache__/colors.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/utils/__pycache__/colors.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/__pycache__/build_ml.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/models/__pycache__/build_ml.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/__pycache__/metrics.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/utils/__pycache__/metrics.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/__pycache__/build_prophet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/models/__pycache__/build_prophet.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/ar_based/__pycache__/var.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/models/ar_based/__pycache__/var.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/ar_based/__pycache__/arima.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/models/ar_based/__pycache__/arima.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/ar_based/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/models/ar_based/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/ar_based/__pycache__/sarimax.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/models/ar_based/__pycache__/sarimax.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/ar_based/__pycache__/param_finder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/piyushpathak03/Automated-Machine-Learning/HEAD/Auto_TS/auto_ts/models/ar_based/__pycache__/param_finder.cpython-37.pyc


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .ar_based import build_arima_model, build_sarimax_model, build_var_model
2 | from .build_ml import run_ensemble_model
3 | from .build_prophet import build_prophet_model
4 | from .build_pyflux import build_pyflux_model
5 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/colors.py:
--------------------------------------------------------------------------------
 1 | class colorful:
 2 |    PURPLE = '\033[95m'
 3 |    CYAN = '\033[96m'
 4 |    DARKCYAN = '\033[36m'
 5 |    BLUE = '\033[94m'
 6 |    GREEN = '\033[92m'
 7 |    YELLOW = '\033[93m'
 8 |    RED = '\033[91m'
 9 |    BOLD = '\033[1m'
10 |    UNDERLINE = '\033[4m'
11 |    END = '\033[0m'
12 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .colors import colorful
2 | from .etl import load_ts_data, convert_timeseries_dataframe_to_supervised, \
3 |                  time_series_split, find_max_min_value_in_a_dataframe
4 | from .eda import time_series_plot, top_correlation_to_name, test_stationarity
5 | from .val import cross_validation_time_series, rolling_validation_time_series, \
6 |                  ts_model_validation
7 | from .metrics import print_static_rmse, print_dynamic_rmse, print_normalized_rmse, \
8 |                      print_ts_model_stats
9 | 


--------------------------------------------------------------------------------
/Auto_TS/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Library dependencies for the python code.  You need to install these with
 2 | 
 3 | # conda create -n <env_name> python=3.6 (or 3.7)
 4 | # source activate <env_name>
 5 | # run requirements
 6 | # python -m ipykernel install --user --name <env_name> --display-name "<Display Name>"
 7 | 
 8 | # `pip install -U -r requirements.txt` before you can run this.
 9 | 
10 | # Base libraries
11 | numpy
12 | pandas
13 | scipy
14 | 
15 | # Viz libs
16 | matplotlib
17 | seaborn
18 | 
19 | # Stats libraries
20 | scikit-learn
21 | statsmodels
22 | 
23 | # PyFlux
24 | pyflux
25 | 
26 | # Facebook Prophet
27 | fbprophet
28 | 


--------------------------------------------------------------------------------
/Auto_TS/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="auto_ts",
 8 |     version="0.0.19",
 9 |     author="Ram Seshadri",
10 |     # author_email="author@example.com",
11 |     description="Automatically Build Multiple Time Series models fast - now with Facebook Prophet!",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     license='Apache License 2.0',
15 |     url="https://github.com/AutoViML/Auto_TS",
16 |     packages=setuptools.find_packages(exclude=("tests",)),
17 |     install_requires=[
18 |         "ipython",
19 |         "jupyter",
20 |         "pyflux",
21 |         "pandas",
22 |         "matplotlib",
23 |         "seaborn",
24 |         "scikit-learn",
25 |         "fbprophet",
26 |         "statsmodels",
27 |     ],
28 |     classifiers=[
29 |         "Programming Language :: Python :: 3",
30 |         "Operating System :: OS Independent",
31 |     ],
32 | )
33 | 


--------------------------------------------------------------------------------
/MLBox/README.rst:
--------------------------------------------------------------------------------
 1 | **MLBox is a powerful Automated Machine Learning python library.** It provides the following features:
 2 | 
 3 | 
 4 | * Fast reading and distributed data preprocessing/cleaning/formatting
 5 | * Highly robust feature selection and leak detection
 6 | * Accurate hyper-parameter optimization in high-dimensional space
 7 | * State-of-the art predictive models for classification and regression (Deep Learning, Stacking, LightGBM,...)
 8 | * Prediction with models interpretation
 9 | 
10 | ### Featured Video:
11 | <p  align="center"><img height="300" src = "https://media.giphy.com/media/ih4Cuk0i9R2W152Ljr/giphy.gif"></p>
12 | 
13 | ## About me
14 | 
15 | **Piyush Pathak**
16 | 
17 | [**PORTFOLIO**](https://anirudhrapathak3.wixsite.com/piyush)
18 | 
19 | [**GITHUB**](https://github.com/piyushpathak03)
20 | 
21 | [**BLOG**](https://medium.com/@piyushpathak03)
22 | 
23 | 
24 | # 📫 Follw me: 
25 | 
26 | [![Linkedin Badge](https://img.shields.io/badge/-PiyushPathak-blue?style=flat-square&logo=Linkedin&logoColor=white&link=https://www.linkedin.com/in/piyushpathak03/)](https://www.linkedin.com/in/piyushpathak03/)
27 | 
28 | 
29 | <p  align="right"><img height="100" src = "https://media.giphy.com/media/l3URDstnIjBNY7rwLB/giphy.gif"></p>
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/lazy-predict/README.md:
--------------------------------------------------------------------------------
 1 | # Auto-ML Using Lazy Predict Python Library
 2 | 
 3 | ## To view the video
 4 | * [Click here](https://youtu.be/VCm8Rr9r8ug)
 5 | * Click on the image below
 6 | 
 7 | [![Auto-ML Using Lazy Predict Python Library](http://img.youtube.com/vi/VCm8Rr9r8ug/0.jpg)](http://www.youtube.com/watch?v=VCm8Rr9r8ug)
 8 | 
 9 | ### Want to know more about me?
10 | ## Follow Me
11 | <a href="https://twitter.com/_bhaveshbhatt" target="_blank"><img class="ai-subscribed-social-icon" src="https://bhattbhavesh91.github.io/assets/images/tw.png" width="30"></a>
12 | <a href="https://www.youtube.com/bhaveshbhatt8791/" target="_blank"><img class="ai-subscribed-social-icon" src="https://bhattbhavesh91.github.io/assets/images/ytb.png" width="30"></a>
13 | <a href="https://www.youtube.com/PythonTricks/" target="_blank"><img class="ai-subscribed-social-icon" src="https://bhattbhavesh91.github.io/assets/images/python_logo.png" width="30"></a>
14 | <a href="https://github.com/bhattbhavesh91" target="_blank"><img class="ai-subscribed-social-icon" src="https://bhattbhavesh91.github.io/assets/images/gthb.png" width="30"></a>
15 | <a href="https://www.linkedin.com/in/bhattbhavesh91/" target="_blank"><img class="ai-subscribed-social-icon" src="https://bhattbhavesh91.github.io/assets/images/lnkdn.png" width="30"></a>
16 | 


--------------------------------------------------------------------------------
/Auto_TS/example_datasets/Sales_and_Marketing.csv:
--------------------------------------------------------------------------------
 1 | Time Period,Sales,Marketing Expense
 2 | 2011-01-01,397,486.64
 3 | 2011-02-01,400,501.8
 4 | 2011-03-01,498,437.09
 5 | 2011-04-01,536,565.16
 6 | 2011-05-01,596,744.15
 7 | 2011-06-01,591,548.74
 8 | 2011-07-01,651,650.21
 9 | 2011-08-01,654,777.51
10 | 2011-09-01,509,547.11
11 | 2011-10-01,437,382.81
12 | 2011-11-01,406,551.56
13 | 2011-12-01,470,401.69
14 | 2012-01-01,428,370.97
15 | 2012-02-01,423,318.39
16 | 2012-03-01,507,477.39
17 | 2012-04-01,536,418.66
18 | 2012-05-01,610,429.68
19 | 2012-06-01,609,713.24
20 | 2012-07-01,687,658.22
21 | 2012-08-01,707,800.52
22 | 2012-09-01,509,640.45
23 | 2012-10-01,452,606.49
24 | 2012-11-01,412,426.88
25 | 2012-12-01,472,513.48
26 | 2013-01-01,454,300.29
27 | 2013-02-01,455,330.84
28 | 2013-03-01,568,444.04
29 | 2013-04-01,610,628.82
30 | 2013-05-01,706,620.36
31 | 2013-06-01,661,682.6
32 | 2013-07-01,767,684.64
33 | 2013-08-01,783,748.47
34 | 2013-09-01,583,668.46
35 | 2013-10-01,513,499.31
36 | 2013-11-01,481,401.92
37 | 2013-12-01,567,605.06
38 | 2014-01-01,525,429.73
39 | 2014-02-01,520,602.86
40 | 2014-03-01,587,596.15
41 | 2014-04-01,710,619.39
42 | 2014-05-01,793,758.31
43 | 2014-06-01,749,980.16
44 | 2014-07-01,871,905.1
45 | 2014-08-01,848,784.62
46 | 2014-09-01,640,718.98
47 | 2014-10-01,581,570.3
48 | 2014-11-01,519,527.6
49 | 2014-12-01,605,559.75
50 | 


--------------------------------------------------------------------------------
/AutoGluon/README.rst:
--------------------------------------------------------------------------------
 1 | AutoGluon: AutoML Toolkit for Deep Learning¶¶
 2 | AutoGluon enables easy-to-use and easy-to-extend AutoML with a focus on deep learning and real-world applications spanning image, text, or tabular data. Intended for both ML beginners and experts, AutoGluon enables you to:
 3 | 
 4 | Quickly prototype deep learning solutions for your data with few lines of code.
 5 | 
 6 | Leverage automatic hyperparameter tuning, model selection / architecture search, and data processing.
 7 | 
 8 | Automatically utilize state-of-the-art deep learning techniques without expert knowledge.
 9 | 
10 | Easily improve existing bespoke models and data pipelines, or customize AutoGluon for your use-case.
11 | 
12 | ### Featured Video:
13 | <p  align="center"><img height="300" src = "https://media.giphy.com/media/ih4Cuk0i9R2W152Ljr/giphy.gif"></p>
14 | 
15 | ## About me
16 | 
17 | **Piyush Pathak**
18 | 
19 | [**PORTFOLIO**](https://anirudhrapathak3.wixsite.com/piyush)
20 | 
21 | [**GITHUB**](https://github.com/piyushpathak03)
22 | 
23 | [**BLOG**](https://medium.com/@piyushpathak03)
24 | 
25 | 
26 | # 📫 Follw me: 
27 | 
28 | [![Linkedin Badge](https://img.shields.io/badge/-PiyushPathak-blue?style=flat-square&logo=Linkedin&logoColor=white&link=https://www.linkedin.com/in/piyushpathak03/)](https://www.linkedin.com/in/piyushpathak03/)
29 | 
30 | <p  align="right"><img height="100" src = "https://media.giphy.com/media/l3URDstnIjBNY7rwLB/giphy.gif"></p>
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Automated-Machine-Learning
 2 | Machine Learning is the most popular technology in current times!!! It is currently utilized in almost every field imaginable which has pushed its importance infinitely. But what about those who don’t know Machine Learning as well? That’s where Automated machine learning or AutoML comes in!
 3 | 
 4 | Automated machine learning (AutoML) basically involves automating the end-to-end process of applying machine learning to real-world problems that are actually relevant in the industry. In recent years, it has been noticed as well as proven time and time again that ML or machine learning is the key to the future. It is understandable that this is an up and coming technology that allows for various directions of research, analysis, and implementation.
 5 | 
 6 | ### Featured Video:
 7 | <p  align="center"><img height="300" src = "https://media.giphy.com/media/ih4Cuk0i9R2W152Ljr/giphy.gif"></p>
 8 | 
 9 | Video link 
10 | https://www.youtube.com/watch?v=j2gipG0yBVM
11 | 
12 | ## About me
13 | 
14 | **Piyush Pathak**
15 | 
16 | [**PORTFOLIO**](https://anirudhrapathak3.wixsite.com/piyush)
17 | 
18 | [**GITHUB**](https://github.com/piyushpathak03)
19 | 
20 | [**BLOG**](https://medium.com/@piyushpathak03)
21 | 
22 | 
23 | # 📫 Follw me: 
24 | 
25 | [![Linkedin Badge](https://img.shields.io/badge/-PiyushPathak-blue?style=flat-square&logo=Linkedin&logoColor=white&link=https://www.linkedin.com/in/piyushpathak03/)](https://www.linkedin.com/in/piyushpathak03/)
26 | 
27 | <p  align="right"><img height="100" src = "https://media.giphy.com/media/l3URDstnIjBNY7rwLB/giphy.gif"></p>
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/Transmogrif_AI/README.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | TransmogrifAI
 3 | TransmogrifAI (pronounced trăns-mŏgˈrə-fī) is an AutoML library written in Scala that runs on top of Apache Spark. It was developed with a focus on accelerating machine learning developer productivity through machine learning automation, and an API that enforces compile-time type-safety, modularity, and reuse. Through automation, it achieves accuracies close to hand-tuned models with almost 100x reduction in time.
 4 | TransmogrifAI is an AutoML library written in Scala that runs on top of Apache Spark. It was developed with a focus on enhancing machine learning developer productivity through machine learning automation, and an API that enforces compile-time type-safety, modularity and reuse.
 5 | 
 6 | Use TransmogrifAI if you need a machine learning library to:
 7 | 
 8 | Rapidly train good quality machine learnt models with minimal hand tuning Build modular, reusable, strongly typed machine learning workflows
 9 | 
10 | Use TransmogrifAI if you need a machine learning library to:
11 | 
12 | Build production ready machine learning applications in hours, not months
13 | Build machine learning models without getting a Ph.D. in machine learning
14 | Build modular, reusable, strongly typed machine learning workflows
15 | 
16 | ## About me
17 | 
18 | **Piyush Pathak**
19 | 
20 | [**PORTFOLIO**](https://anirudhrapathak3.wixsite.com/piyush)
21 | 
22 | [**GITHUB**](https://github.com/piyushpathak03)
23 | 
24 | [**BLOG**](https://medium.com/@piyushpathak03)
25 | 
26 | 
27 | # 📫 Follw me: 
28 | 
29 | [![Linkedin Badge](https://img.shields.io/badge/-PiyushPathak-blue?style=flat-square&logo=Linkedin&logoColor=white&link=https://www.linkedin.com/in/piyushpathak03/)](https://www.linkedin.com/in/piyushpathak03/)
30 | 
31 | 
32 | <p  align="right"><img height="100" src = "https://media.giphy.com/media/l3URDstnIjBNY7rwLB/giphy.gif"></p>
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn.metrics import mean_absolute_error, mean_squared_error
 4 | 
 5 | 
 6 | def print_static_rmse(actual, predicted, start_from=0,verbose=0):
 7 |     """
 8 |     this calculates the ratio of the rmse error to the standard deviation of the actuals.
 9 |     This ratio should be below 1 for a model to be considered useful.
10 |     The comparison starts from the row indicated in the "start_from" variable.
11 |     """
12 |     rmse = np.sqrt(mean_squared_error(actual[start_from:],predicted[start_from:]))
13 |     std_dev = actual[start_from:].std()
14 |     if verbose == 1:
15 |         print('    RMSE = %0.2f' %rmse)
16 |         print('    Std Deviation of Actuals = %0.2f' %(std_dev))
17 |         print('    Normalized RMSE = %0.1f%%' %(rmse*100/std_dev))
18 |     return rmse, rmse/std_dev
19 | 
20 | 
21 | def print_dynamic_rmse(actuals, predicted, original):
22 |     """
23 |     This utility calculates rmse between actuals and predicted. However, it does one more.
24 |     Since in dynamic forecast, we need the longer original, it calculates Normalized RMSE
25 |     using the original array's std deviation. That way, the forecast of 2 values does not
26 |     result in a larger Normalized RMSE since the std deviation of 2 values will be v small.
27 |     """
28 |     rmse = np.sqrt(np.mean((actuals - predicted)**2))
29 |     norm_rmse = rmse/original.std()
30 |     print('    RMSE = {:,.2f}'.format(rmse))
31 |     print('    Std Deviation of Originals = {:,.2f}'.format(original.std()))
32 |     print('    Normalized RMSE = %0.0f%%' %(100*norm_rmse))
33 |     return rmse, norm_rmse
34 | 
35 | 
36 | def print_normalized_rmse(actuals, predicted,start_from=0):
37 |     """
38 |     This utility calculates rmse between actuals and predicted. However, it does one more.
39 |     If the original is given, it calculates Normalized RMSE using the original array's std deviation.
40 |     """
41 |     actuals = actuals[start_from:]
42 |     predicted = predicted[start_from:]
43 |     rmse = np.sqrt(np.mean(mean_squared_error(actuals,predicted)))
44 |     norm_rmse = rmse/actuals.std()
45 |     print('RMSE = {:,.2f}'.format(rmse))
46 |     print('Std Deviation of Actuals = {:,.2f}'.format(actuals.std()))
47 |     print('Normalized RMSE = %0.0f%%' %(100*norm_rmse))
48 |     return rmse, norm_rmse
49 | 
50 | 
51 | def print_rmse(y, y_hat):
52 |     """
53 |     Calculating Root Mean Square Error https://en.wikipedia.org/wiki/Root-mean-square_deviation
54 |     """
55 |     mse = np.mean((y - y_hat)**2)
56 |     return np.sqrt(mse)
57 | 
58 | 
59 | def print_mape(y, y_hat):
60 |     """
61 |     Calculating Mean Absolute Percent Error https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
62 |     """
63 |     perc_err = (100*(y - y_hat))/y
64 |     return np.mean(abs(perc_err))
65 | 
66 | 
67 | def print_ts_model_stats(actuals, predicted, number_as_percentage=100):
68 |     """
69 |     This program prints and returns MAE, RMSE, MAPE.
70 |     If you like the MAE and RMSE as a percentage of something, just give that number
71 |     in the input as "number_as_percentage" and it will return the MAE and RMSE as a
72 |     ratio of that number. Returns MAE, MAE_as_percentage, and RMSE_as_percentage
73 |     """
74 |     #print(len(actuals))
75 |     #print(len(predicted))
76 |     plt.figure(figsize=(15,8))
77 |     dfplot = pd.DataFrame([predicted,actuals]).T
78 |     dfplot.columns = ['Forecast','Actual']
79 |     plt.plot(dfplot)
80 |     plt.legend(['Forecast','Actual'])
81 |     mae = mean_absolute_error(actuals, predicted)
82 |     mae_asp = (mean_absolute_error(actuals, predicted)/number_as_percentage)*100
83 |     rmse_asp = (np.sqrt(mean_squared_error(actuals,predicted))/number_as_percentage)*100
84 |     print('MAE (%% AUM) = %0.2f%%' %mae_asp)
85 |     print('RMSE (%% AUM) = %0.2f%%' %rmse_asp)
86 |     print('MAE (as %% Actual) = %0.2f%%' %(mae/abs(actuals).mean()*100))
87 |     _ = print_mape(actuals, predicted)
88 |     rmse = print_rmse(actuals, predicted)
89 |     mape = print_mape(actuals, predicted)
90 |     print("MAPE = %0.0f%%" %(mape))
91 |     # Normalized RMSE print('RMSE = {:,.Of}'.format(rmse))
92 |     print('Normalized RMSE (MinMax) = %0.0f%%' %(100*rmse/abs(actuals.max()-actuals.min())))
93 |     print('Normalized RMSE = %0.0f%%' %(100*rmse/actuals.std()))
94 |     return mae, mae_asp, rmse_asp
95 | 


--------------------------------------------------------------------------------
/lazy-predict/Data.csv:
--------------------------------------------------------------------------------
  1 | TV,radio,newspaper,sales
  2 | 230.1,37.8,69.2,22.1
  3 | 44.5,39.3,45.1,10.4
  4 | 17.2,45.9,69.3,9.3
  5 | 151.5,41.3,58.5,18.5
  6 | 180.8,10.8,58.4,12.9
  7 | 8.7,48.9,75,7.2
  8 | 57.5,32.8,23.5,11.8
  9 | 120.2,19.6,11.6,13.2
 10 | 8.6,2.1,1,4.8
 11 | 199.8,2.6,21.2,10.6
 12 | 66.1,5.8,24.2,8.6
 13 | 214.7,24,4,17.4
 14 | 23.8,35.1,65.9,9.2
 15 | 97.5,7.6,7.2,9.7
 16 | 204.1,32.9,46,19
 17 | 195.4,47.7,52.9,22.4
 18 | 67.8,36.6,114,12.5
 19 | 281.4,39.6,55.8,24.4
 20 | 69.2,20.5,18.3,11.3
 21 | 147.3,23.9,19.1,14.6
 22 | 218.4,27.7,53.4,18
 23 | 237.4,5.1,23.5,12.5
 24 | 13.2,15.9,49.6,5.6
 25 | 228.3,16.9,26.2,15.5
 26 | 62.3,12.6,18.3,9.7
 27 | 262.9,3.5,19.5,12
 28 | 142.9,29.3,12.6,15
 29 | 240.1,16.7,22.9,15.9
 30 | 248.8,27.1,22.9,18.9
 31 | 70.6,16,40.8,10.5
 32 | 292.9,28.3,43.2,21.4
 33 | 112.9,17.4,38.6,11.9
 34 | 97.2,1.5,30,9.6
 35 | 265.6,20,0.3,17.4
 36 | 95.7,1.4,7.4,9.5
 37 | 290.7,4.1,8.5,12.8
 38 | 266.9,43.8,5,25.4
 39 | 74.7,49.4,45.7,14.7
 40 | 43.1,26.7,35.1,10.1
 41 | 228,37.7,32,21.5
 42 | 202.5,22.3,31.6,16.6
 43 | 177,33.4,38.7,17.1
 44 | 293.6,27.7,1.8,20.7
 45 | 206.9,8.4,26.4,12.9
 46 | 25.1,25.7,43.3,8.5
 47 | 175.1,22.5,31.5,14.9
 48 | 89.7,9.9,35.7,10.6
 49 | 239.9,41.5,18.5,23.2
 50 | 227.2,15.8,49.9,14.8
 51 | 66.9,11.7,36.8,9.7
 52 | 199.8,3.1,34.6,11.4
 53 | 100.4,9.6,3.6,10.7
 54 | 216.4,41.7,39.6,22.6
 55 | 182.6,46.2,58.7,21.2
 56 | 262.7,28.8,15.9,20.2
 57 | 198.9,49.4,60,23.7
 58 | 7.3,28.1,41.4,5.5
 59 | 136.2,19.2,16.6,13.2
 60 | 210.8,49.6,37.7,23.8
 61 | 210.7,29.5,9.3,18.4
 62 | 53.5,2,21.4,8.1
 63 | 261.3,42.7,54.7,24.2
 64 | 239.3,15.5,27.3,15.7
 65 | 102.7,29.6,8.4,14
 66 | 131.1,42.8,28.9,18
 67 | 69,9.3,0.9,9.3
 68 | 31.5,24.6,2.2,9.5
 69 | 139.3,14.5,10.2,13.4
 70 | 237.4,27.5,11,18.9
 71 | 216.8,43.9,27.2,22.3
 72 | 199.1,30.6,38.7,18.3
 73 | 109.8,14.3,31.7,12.4
 74 | 26.8,33,19.3,8.8
 75 | 129.4,5.7,31.3,11
 76 | 213.4,24.6,13.1,17
 77 | 16.9,43.7,89.4,8.7
 78 | 27.5,1.6,20.7,6.9
 79 | 120.5,28.5,14.2,14.2
 80 | 5.4,29.9,9.4,5.3
 81 | 116,7.7,23.1,11
 82 | 76.4,26.7,22.3,11.8
 83 | 239.8,4.1,36.9,12.3
 84 | 75.3,20.3,32.5,11.3
 85 | 68.4,44.5,35.6,13.6
 86 | 213.5,43,33.8,21.7
 87 | 193.2,18.4,65.7,15.2
 88 | 76.3,27.5,16,12
 89 | 110.7,40.6,63.2,16
 90 | 88.3,25.5,73.4,12.9
 91 | 109.8,47.8,51.4,16.7
 92 | 134.3,4.9,9.3,11.2
 93 | 28.6,1.5,33,7.3
 94 | 217.7,33.5,59,19.4
 95 | 250.9,36.5,72.3,22.2
 96 | 107.4,14,10.9,11.5
 97 | 163.3,31.6,52.9,16.9
 98 | 197.6,3.5,5.9,11.7
 99 | 184.9,21,22,15.5
100 | 289.7,42.3,51.2,25.4
101 | 135.2,41.7,45.9,17.2
102 | 222.4,4.3,49.8,11.7
103 | 296.4,36.3,100.9,23.8
104 | 280.2,10.1,21.4,14.8
105 | 187.9,17.2,17.9,14.7
106 | 238.2,34.3,5.3,20.7
107 | 137.9,46.4,59,19.2
108 | 25,11,29.7,7.2
109 | 90.4,0.3,23.2,8.7
110 | 13.1,0.4,25.6,5.3
111 | 255.4,26.9,5.5,19.8
112 | 225.8,8.2,56.5,13.4
113 | 241.7,38,23.2,21.8
114 | 175.7,15.4,2.4,14.1
115 | 209.6,20.6,10.7,15.9
116 | 78.2,46.8,34.5,14.6
117 | 75.1,35,52.7,12.6
118 | 139.2,14.3,25.6,12.2
119 | 76.4,0.8,14.8,9.4
120 | 125.7,36.9,79.2,15.9
121 | 19.4,16,22.3,6.6
122 | 141.3,26.8,46.2,15.5
123 | 18.8,21.7,50.4,7
124 | 224,2.4,15.6,11.6
125 | 123.1,34.6,12.4,15.2
126 | 229.5,32.3,74.2,19.7
127 | 87.2,11.8,25.9,10.6
128 | 7.8,38.9,50.6,6.6
129 | 80.2,0,9.2,8.8
130 | 220.3,49,3.2,24.7
131 | 59.6,12,43.1,9.7
132 | 0.7,39.6,8.7,1.6
133 | 265.2,2.9,43,12.7
134 | 8.4,27.2,2.1,5.7
135 | 219.8,33.5,45.1,19.6
136 | 36.9,38.6,65.6,10.8
137 | 48.3,47,8.5,11.6
138 | 25.6,39,9.3,9.5
139 | 273.7,28.9,59.7,20.8
140 | 43,25.9,20.5,9.6
141 | 184.9,43.9,1.7,20.7
142 | 73.4,17,12.9,10.9
143 | 193.7,35.4,75.6,19.2
144 | 220.5,33.2,37.9,20.1
145 | 104.6,5.7,34.4,10.4
146 | 96.2,14.8,38.9,11.4
147 | 140.3,1.9,9,10.3
148 | 240.1,7.3,8.7,13.2
149 | 243.2,49,44.3,25.4
150 | 38,40.3,11.9,10.9
151 | 44.7,25.8,20.6,10.1
152 | 280.7,13.9,37,16.1
153 | 121,8.4,48.7,11.6
154 | 197.6,23.3,14.2,16.6
155 | 171.3,39.7,37.7,19
156 | 187.8,21.1,9.5,15.6
157 | 4.1,11.6,5.7,3.2
158 | 93.9,43.5,50.5,15.3
159 | 149.8,1.3,24.3,10.1
160 | 11.7,36.9,45.2,7.3
161 | 131.7,18.4,34.6,12.9
162 | 172.5,18.1,30.7,14.4
163 | 85.7,35.8,49.3,13.3
164 | 188.4,18.1,25.6,14.9
165 | 163.5,36.8,7.4,18
166 | 117.2,14.7,5.4,11.9
167 | 234.5,3.4,84.8,11.9
168 | 17.9,37.6,21.6,8
169 | 206.8,5.2,19.4,12.2
170 | 215.4,23.6,57.6,17.1
171 | 284.3,10.6,6.4,15
172 | 50,11.6,18.4,8.4
173 | 164.5,20.9,47.4,14.5
174 | 19.6,20.1,17,7.6
175 | 168.4,7.1,12.8,11.7
176 | 222.4,3.4,13.1,11.5
177 | 276.9,48.9,41.8,27
178 | 248.4,30.2,20.3,20.2
179 | 170.2,7.8,35.2,11.7
180 | 276.7,2.3,23.7,11.8
181 | 165.6,10,17.6,12.6
182 | 156.6,2.6,8.3,10.5
183 | 218.5,5.4,27.4,12.2
184 | 56.2,5.7,29.7,8.7
185 | 287.6,43,71.8,26.2
186 | 253.8,21.3,30,17.6
187 | 205,45.1,19.6,22.6
188 | 139.5,2.1,26.6,10.3
189 | 191.1,28.7,18.2,17.3
190 | 286,13.9,3.7,15.9
191 | 18.7,12.1,23.4,6.7
192 | 39.5,41.1,5.8,10.8
193 | 75.5,10.8,6,9.9
194 | 17.2,4.1,31.6,5.9
195 | 166.8,42,3.6,19.6
196 | 149.7,35.6,6,17.3
197 | 38.2,3.7,13.8,7.6
198 | 94.2,4.9,8.1,9.7
199 | 177,9.3,6.4,12.8
200 | 283.6,42,66.2,25.5
201 | 232.1,8.6,8.7,13.4
202 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/ar_based/var.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import itertools
 4 | import operator
 5 | import copy
 6 | import matplotlib.pyplot as plt
 7 | import seaborn as sns
 8 | get_ipython().magic('matplotlib inline')
 9 | sns.set(style="white", color_codes=True)
10 | # imported VARMAX from statsmodels pkg
11 | from statsmodels.tsa.statespace.varmax import VARMAX
12 | # helper functions
13 | from ...utils import print_dynamic_rmse
14 | from ...models.ar_based.param_finder import find_lowest_pq
15 | 
16 | 
17 | def build_var_model(df, criteria, forecast_period=2, p_max=3, q_max=3, verbose=0):
18 |     """
19 |     This builds a VAR model given a multivariate time series data frame with time as the Index.
20 |     Note that the input "y_train" can be a data frame with one column or multiple cols or a
21 |     multivariate array. However, the first column must be the target variable. The others are added.
22 |     You must include only Time Series data in it. DO NOT include "Non-Stationary" or "Trendy" data.
23 |     Make sure your Time Series is "Stationary" before you send it in!! If not, this will give spurious
24 |     results. Since it automatically builds a VAR model, you need to give it a Criteria to optimize on.
25 |     You can give it any of the following metrics as criteria: AIC, BIC, Deviance, Log-likelihood.
26 |     You can give the highest order values for p and q. Default is set to 3 for both.
27 |     """
28 |     df = df[:]
29 |     #### dmax here means the column number of the data frame: it serves as a placeholder for columns
30 |     dmax = df.shape[1]
31 |     ###############################################################################################
32 |     cols = df.columns.tolist()
33 |     ts_train = df[:-forecast_period]
34 |     ts_test = df[-forecast_period:]
35 |     if verbose == 1:
36 |         print('Data Set split into train %s and test %s for Cross Validation Purposes'
37 |               % (ts_train.shape, ts_test.shape))
38 |     # It is assumed that the first column of the dataframe is the target variable ####
39 |     ### make sure that is the case before doing this program ####################
40 |     i = 1
41 |     results_dict = {}
42 |     for d_val in range(1, dmax):
43 |         y_train = ts_train.iloc[:, [0, d_val]]
44 |         print('\nAdditional Variable in VAR model = %s' % cols[d_val])
45 |         info_criteria = pd.DataFrame(index=['AR{}'.format(i) for i in range(0, p_max+1)],
46 |                                      columns=['MA{}'.format(i) for i in range(0, q_max+1)])
47 |         for p_val, q_val in itertools.product(range(0, p_max+1), range(0, q_max+1)):
48 |             if p_val == 0 and q_val == 0:
49 |                 info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = np.nan
50 |                 print(' Iteration %d completed' % i)
51 |                 i += 1
52 |             else:
53 |                 try:
54 |                     model = VARMAX(y_train, order=(p_val, q_val), trend='c')
55 |                     model = model.fit(max_iter=1000, displ=False)
56 |                     info_criteria.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = eval('model.' + criteria)
57 |                     print(' Iteration %d completed' % i)
58 |                     i += 1
59 |                 except:
60 |                     i += 1
61 |                     print(' Iteration %d completed' % i)
62 |         info_criteria = info_criteria[info_criteria.columns].astype(float)
63 |         interim_d = copy.deepcopy(d_val)
64 |         interim_p, interim_q, interim_bic = find_lowest_pq(info_criteria)
65 |         if verbose == 1:
66 |             fig, ax = plt.subplots(figsize=(20, 10))
67 |             ax = sns.heatmap(info_criteria,
68 |                              mask=info_criteria.isnull(),
69 |                              ax=ax,
70 |                              annot=True,
71 |                              fmt='.0f'
72 |                              )
73 |             ax.set_title(criteria)
74 |         results_dict[str(interim_p) + ' ' + str(interim_d) + ' ' + str(interim_q)] = interim_bic
75 |     best_bic = min(results_dict.items(), key=operator.itemgetter(1))[1]
76 |     best_pdq = min(results_dict.items(), key=operator.itemgetter(1))[0]
77 |     best_p = int(best_pdq.split(' ')[0])
78 |     best_d = int(best_pdq.split(' ')[1])
79 |     best_q = int(best_pdq.split(' ')[2])
80 |     print('Best variable selected for VAR: %s' % ts_train.columns.tolist()[best_d])
81 |     y_train = ts_train.iloc[:, [0, best_d]]
82 |     bestmodel = VARMAX(y_train, order=(best_p, best_q), trend='c')
83 |     bestmodel = bestmodel.fit()
84 |     if verbose == 1:
85 |         bestmodel.plot_diagnostics(figsize=(16, 12))
86 |         ax = bestmodel.impulse_responses(12, orthogonalized=True).plot(figsize=(12, 4))
87 |         ax.set(xlabel='Time Steps', title='Impulse Response Functions')
88 |     res2 = bestmodel.get_forecast(forecast_period)
89 |     res2_df = res2.summary_frame()
90 |     rmse, norm_rmse = print_dynamic_rmse(ts_test.iloc[:,0], res2_df['mean'].values, ts_train.iloc[:,0])
91 |     return bestmodel, res2_df, rmse, norm_rmse
92 | 


--------------------------------------------------------------------------------
/Transmogrif_AI/TransmogrifAI.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# TransmogrifAI"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "#### TransmogrifAI (pronounced trăns-mŏgˈrə-fī) is an AutoML library written in Scala that runs on top of Apache Spark. It was developed with a focus on accelerating machine learning developer productivity through machine learning automation, and an API that enforces compile-time type-safety, modularity, and reuse. Through automation, it achieves accuracies close to hand-tuned models with almost 100x reduction in time.\n",
 15 |     "\n",
 16 |     "TransmogrifAI is an AutoML library written in Scala that runs on top of Apache Spark. It was developed with a focus on enhancing machine learning developer productivity through machine learning automation, and an API that enforces compile-time type-safety, modularity and reuse.\n",
 17 |     "\n",
 18 |     "Use TransmogrifAI if you need a machine learning library to:\n",
 19 |     "\n",
 20 |     "Rapidly train good quality machine learnt models with minimal hand tuning\n",
 21 |     "Build modular, reusable, strongly typed machine learning workflows\n",
 22 |     "\n",
 23 |     "Use TransmogrifAI if you need a machine learning library to:\n",
 24 |     "\n",
 25 |     "1. Build production ready machine learning applications in hours, not months\n",
 26 |     "2. Build machine learning models without getting a Ph.D. in machine learning\n",
 27 |     "3. Build modular, reusable, strongly typed machine learning workflows"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": []
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "# Importing Libraries"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "import com.salesforce.op._\n",
 51 |     "import com.salesforce.op.readers._\n",
 52 |     "import com.salesforce.op.features._\n",
 53 |     "import com.salesforce.op.features.types._\n",
 54 |     "import com.salesforce.op.stages.impl.classification._\n",
 55 |     "import org.apache.spark.SparkConf\n",
 56 |     "import org.apache.spark.sql.SparkSession"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": []
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "# Training"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "implicit val spark = SparkSession.builder.config(new SparkConf()).getOrCreate()\n",
 80 |     "import spark.implicits._"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "Predicting Titanic Survivors with TransmogrifAI\n",
 88 |     "\n",
 89 |     "The Titanic dataset is an often-cited dataset in the machine learning community. The goal is to build a machine learnt model that will predict survivors from the Titanic passenger manifest. Here is how you would build the model using TransmogrifAI:\n"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "#Read Titanic data as a DataFrame\n",
 99 |     "val passengersData = DataReaders.Simple.csvCase[Passenger](path = pathToData).readDataset().toDF()\n",
100 |     "\n",
101 |     "#Extract response and predictor Features\n",
102 |     "val (survived, predictors) = FeatureBuilder.fromDataFrame[RealNN](passengersData, response = \"survived\")\n",
103 |     "\n",
104 |     "#Automated feature engineering\n",
105 |     "val featureVector = predictors.transmogrify()\n",
106 |     "\n",
107 |     "#Automated feature validation and selection\n",
108 |     "val checkedFeatures = survived.sanityCheck(featureVector, removeBadFeatures = true)\n",
109 |     "\n",
110 |     "#Automated model selection\n",
111 |     "val pred = BinaryClassificationModelSelector().setInput(survived, checkedFeatures).getOutput()\n",
112 |     "\n",
113 |     "#Setting up a TransmogrifAI workflow and training the model\n",
114 |     "val model = new OpWorkflow().setInputDataset(passengersData).setResultFeatures(pred).train()\n",
115 |     "\n",
116 |     "print(\"Model summary:\\n\" + model.summaryPretty())"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": []
125 |   }
126 |  ],
127 |  "metadata": {
128 |   "kernelspec": {
129 |    "display_name": "Python 3",
130 |    "language": "python",
131 |    "name": "python3"
132 |   },
133 |   "language_info": {
134 |    "codemirror_mode": {
135 |     "name": "ipython",
136 |     "version": 3
137 |    },
138 |    "file_extension": ".py",
139 |    "mimetype": "text/x-python",
140 |    "name": "python",
141 |    "nbconvert_exporter": "python",
142 |    "pygments_lexer": "ipython3",
143 |    "version": "3.8.3"
144 |   }
145 |  },
146 |  "nbformat": 4,
147 |  "nbformat_minor": 4
148 | }
149 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/build_pyflux.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import itertools
  4 | import operator
  5 | # helper functions
  6 | from ..utils import print_static_rmse, print_dynamic_rmse
  7 | 
  8 | 
  9 | #########################################################
 10 | def build_pyflux_model(df, target, ar=3, ma=3,integ=1, forecast_period=2,
 11 |                        fitmethod='MLE', nsims=100, score_type='rmse', verbose=0):
 12 |     """
 13 |     Build a quick pyflux model with default parameters for AR, MA and I terms in ARIMA.
 14 |     You can build a rolling forecast using the rolling_forecast parameter.
 15 |     PyFlux is a fiendishly complicated program with very poor documentation.
 16 |     I had to dig deep into the API to figure these things out especially the
 17 |     """
 18 |     try:
 19 |         # imported pyflux pkg
 20 |         import pyflux as pf
 21 |     except:
 22 |         print('Pyflux is not installed - hence not running PyFlux model')
 23 |         return 'error','error','error','error'
 24 |     ts_df = df[:]
 25 |     ##############################################################################
 26 |     ts_train = ts_df[:-forecast_period]
 27 |     ts_test = ts_df[-forecast_period:]
 28 |     if verbose == 1:
 29 |         print('Data Set split into train %s and test %s for Cross Validation Purposes'
 30 |               % (ts_train.shape, ts_test.shape))
 31 |     #####################################################################################################
 32 |     if integ > 1:
 33 |         print('    Setting "integration"=1 since differenced predictions > 1 are difficult to interpret')
 34 |         integ = 1
 35 |     if fitmethod == 'M-H':
 36 |         print('    Assuming number of simulations = %d' % nsims)
 37 |     ####################################################################################################
 38 |     ###### define p,d,q parameters here ####################
 39 |     p = range(0, ar+1)
 40 |     q = range(0, ma+1)
 41 |     d = range(0, integ+1)  ### dont do much more than 1 differencing in PyFlux models since its hard to undo
 42 |     #### Generate all different combinations of p,d,q triplets ######
 43 |     pdq = list(itertools.product(p, d, q))
 44 |     eval_metrics = {}
 45 |     print('Cycling through various (p,d,q) parameters')
 46 |     for param in pdq:
 47 |         if verbose == 1:
 48 |             print('.', end="")
 49 |         model = pf.ARIMA(data=ts_train, ar=param[0], integ=param[1], ma=param[2], target=target)
 50 |         try:
 51 |             if fitmethod == 'MLE':
 52 |                 x = model.fit()
 53 |             elif fitmethod == 'M-H':
 54 |                 x = model.fit('M-H', nsims=nsims)
 55 |         except:
 56 |             x = model.fit('MLE')
 57 |         mu, actuals = model._model(model.latent_variables.get_z_values())
 58 |         predicted = model.link(mu)
 59 |         rmse, norm_rmse = print_static_rmse(actuals,predicted)
 60 |         if score_type == 'rmse':
 61 |             eval_metrics[param] = rmse
 62 |         else:
 63 |             eval_metrics[param] = norm_rmse
 64 |     bestpdq = min(eval_metrics.items(), key=operator.itemgetter(1))[0]
 65 |     print('\nBest Params Selected (based on %s): %s' % (score_type, bestpdq))
 66 |     bestmodel = pf.ARIMA(data=ts_train, ar=bestpdq[0], integ=bestpdq[1], ma=bestpdq[2], target=target)
 67 |     x = bestmodel.fit()
 68 |     if verbose == 1:
 69 |         bestmodel.plot_fit(figsize=(15, 5))
 70 |     #model.plot_predict_is(h=forecast_period,fit_once=False,fit_method=fitmethod)
 71 |     if verbose == 1:
 72 |         x.summary()
 73 |         n = int(0.5*len(df))
 74 |         bestmodel.plot_predict(h=forecast_period, past_values=n, intervals=True, figsize=(15, 5))
 75 |     forecast_df = bestmodel.predict(forecast_period, intervals=True)
 76 |     mu, actuals = bestmodel._model(bestmodel.latent_variables.get_z_values())
 77 |     predicted = bestmodel.link(mu)
 78 |     print('Dynamic %d-period Forecasts:' % forecast_period)
 79 |     if bestpdq[1] == 1:
 80 |         mod_target = 'Differenced ' + target
 81 |         res = restore_differenced_predictions(ts_test[target].values, forecast_df[mod_target],
 82 |                                               ts_train[target][-1:])
 83 |         rmse, norm_rmse = print_dynamic_rmse(ts_test[target].values, res, ts_train[target])
 84 |     else:
 85 |         rmse, norm_rmse = print_dynamic_rmse(ts_test[target].values,forecast_df[target].values, ts_train[target])
 86 |     return bestmodel, forecast_df, rmse, norm_rmse
 87 | 
 88 | 
 89 | def restore_differenced_predictions(actuals, predicted, start_value, func=None, periods=1, diff_yes=True):
 90 |     try:
 91 |         restored = pd.Series(index=start_value.index)
 92 |         restored.ix[start_value.ix[:periods].index] = start_value.values[:periods]
 93 |         rest = restored.ix[predicted.index]
 94 |         restored = pd.Series(np.r_[restored, rest], index=np.r_[start_value.index, rest.index])
 95 |         restored.ix[predicted.index] = predicted.values
 96 |         restored = restored[(periods-1):].cumsum()
 97 |         if func:
 98 |             restored = eval('np.' + func + '(restored)')
 99 |         return restored[periods:]
100 |     except:
101 |         restored = start_value.values+predicted
102 |         if func:
103 |             restored = eval('np.' + func + '(restored)')
104 |         return restored
105 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/etl.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import copy
  3 | import pdb
  4 | 
  5 | ##### This function loads a time series data and sets the index as a time series
  6 | def load_ts_data(filename, ts_column, sep, target):
  7 |     """
  8 |     This function loads a given filename into a pandas dataframe and sets the
  9 |     ts_column as a Time Series index. Note that filename should contain the full
 10 |     path to the file.
 11 |     """
 12 |     if isinstance(filename, str):
 13 |         codes_list = ['utf-8', 'iso-8859-1', 'cp1252', 'latin1']
 14 |         print('First loading %s and then setting %s as date time index...' % (filename, ts_column))
 15 |         for codex in codes_list:
 16 |             try:
 17 |                 df = pd.read_csv(filename, index_col=None, sep=sep, encoding=codex)
 18 |                 df.index = pd.to_datetime(df.pop(ts_column))
 19 |                 break
 20 |             except:
 21 |                 print('    Encoder %s or Date time type not working for reading this file...' % codex)
 22 |                 continue
 23 |     else:
 24 |         ### If filename is not a string, it must be a dataframe and can be loaded
 25 |         dft = copy.deepcopy(filename)
 26 |         try:
 27 |             dft.index = pd.to_datetime(dft.pop(ts_column))
 28 |             preds = [x for x in list(dft) if x not in [target]]
 29 |             df = dft[[target]+preds]
 30 |         except:
 31 |             print('Error: Could not convert Time Series column to an index. Please check your input and try again')
 32 |             return ''
 33 |     return df
 34 | 
 35 | 
 36 | def time_series_split(ts_df):
 37 |     """
 38 |     This utility splits any dataframe sent as a time series split using the sklearn function.
 39 |     """
 40 |     from sklearn.model_selection import TimeSeriesSplit
 41 |     tscv = TimeSeriesSplit(n_splits=2)
 42 |     train_index, test_index = list(tscv.split(ts_df))[1][0], list(tscv.split(ts_df))[1][1]
 43 |     ts_train, ts_test = ts_df[ts_df.index.isin(train_index)], ts_df[
 44 |                         ts_df.index.isin(test_index)]
 45 |     print(ts_train.shape, ts_test.shape)
 46 |     return ts_train, ts_test
 47 | 
 48 | 
 49 | def convert_timeseries_dataframe_to_supervised(df, namevars, target, n_in=1, n_out=0, dropT=True):
 50 |     """
 51 |     Transform a time series in dataframe format into a supervised learning dataset while
 52 |     keeping dataframe intact.
 53 |     Arguments:
 54 |         df: A timeseries dataframe that you want to convert to Supervised dataset.
 55 |         namevars: columns that you want to lag in the data frame. Other columns will be untouched.
 56 |         target: this is the target variable you intend to use in supervised learning
 57 |         n_in: Number of lag periods as input (X).
 58 |         n_out: Number of future periods (optional) as output for the taget variable (y).
 59 |         dropT: Boolean - whether or not to drop columns at time 't'.
 60 |         Returns:
 61 |         df: This is the transformed data frame with the time series columns laggged.
 62 |         Note that the original columns are dropped if you set the 'dropT' argument to True.
 63 |         If not, they are preserved.
 64 |     This Pandas DataFrame of lagged time series data is immediately available for supervised learning.
 65 |     """
 66 |     df = df[:]
 67 |     # Notice that we will create a sequence of columns from name vars with suffix (t-n,... t-1), etc.
 68 |     drops = []
 69 |     for i in range(n_in, -1, -1):
 70 |         if i == 0:
 71 |             for var in namevars:
 72 |                 addname = var + '(t)'
 73 |                 df.rename(columns={var:addname}, inplace=True)
 74 |                 drops.append(addname)
 75 |         else:
 76 |             for var in namevars:
 77 |                 addname = var + '(t-' + str(i) + ')'
 78 |                 df[addname] = df[var].shift(i)
 79 |     ## forecast sequence (t, t+1,... t+n)
 80 |     if n_out == 0:
 81 |         n_out = False
 82 |     for i in range(1, n_out):
 83 |         for var in namevars:
 84 |             addname = var + '(t+' + str(i) + ')'
 85 |             df[addname] = df[var].shift(-i)
 86 |     #	drop rows with NaN values
 87 |     df.dropna(inplace=True, axis=0)
 88 |     #	put it all together
 89 |     target = target+'(t)'
 90 |     if dropT:
 91 |         ### If dropT is true, all the "t" series of the target column (in case it is in the namevars)
 92 |         ### will be removed if you don't want the target to learn from its "t" values.
 93 |         ### Similarly, we will also drop all the "t" series of name_vars if you set dropT to Trueself.
 94 |         try:
 95 |             drops.remove(target)
 96 |         except:
 97 |             pass
 98 |         df.drop(drops, axis=1, inplace=True)
 99 |     preds = [x for x in list(df) if x not in [target]]
100 |     return df, target, preds
101 |     ############
102 | 
103 | 
104 | def find_max_min_value_in_a_dataframe(df, max_min='min'):
105 |     """
106 |     This returns the lowest or highest value in a df and its row value where it can be found.
107 |     Unfortunately, it does not return the column where it is found. So not used much.
108 |     """
109 |     if max_min == 'min':
110 |         return df.loc[:, list(df)].min(axis=1).min(), df.loc[:, list(df)].min(axis=1).idxmin()
111 |     else:
112 |         return df.loc[:, list(df)].max(axis=1).max(), df.loc[:, list(df)].min(axis=1).idxmax()
113 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/ar_based/param_finder.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import itertools
  4 | import operator
  5 | import copy
  6 | import matplotlib.pyplot as plt
  7 | import seaborn as sns
  8 | get_ipython().magic('matplotlib inline')
  9 | sns.set(style="white", color_codes=True)
 10 | # imported SARIMAX from statsmodels pkg for find_best_pdq_or_PDQ
 11 | from statsmodels.tsa.statespace.sarimax import SARIMAX
 12 | 
 13 | 
 14 | def find_lowest_pq(df):
 15 |     """
 16 |     This is an auto-ARIMA function that iterates through parameters pdq and finds the best
 17 |     based on aan eval metric sent in as input.
 18 | 
 19 |     This finds the row and column numbers of the lowest or highest value in a dataframe. All it needs is numeric values.
 20 |     It will return the row and column together as a string, you will have to split it into two.
 21 |     It will also return the lowest value in the dataframe by default but you can change it to "max".
 22 |     """
 23 |     dicti = {}
 24 |     for ma in list(df):
 25 |         try:
 26 |             dicti[ma + ' ' + df[ma].idxmin()] = df[ma].sort_values()[0]
 27 |         except:
 28 |             pass
 29 |     lowest_bic = min(dicti.items(), key=operator.itemgetter(1))[1]
 30 |     lowest_pq = min(dicti.items(), key=operator.itemgetter(1))[0]
 31 |     ma_q = int(lowest_pq.split(' ')[0][2:])
 32 |     ar_p = int(lowest_pq.split(' ')[1][2:])
 33 |     print('    Best AR order p = %d, MA order q = %d, Interim metric = %0.3f' % (ar_p, ma_q, lowest_bic))
 34 |     return ar_p, ma_q, lowest_bic
 35 | 
 36 | 
 37 | def find_best_pdq_or_PDQ(ts_train, metric, p_max, d_max, q_max, non_seasonal_pdq,
 38 |                          seasonal_period, seasonality=False, verbose=0):
 39 |     p_min = 0
 40 |     d_min = 0
 41 |     q_min = 0
 42 |     if seasonality:
 43 |         ns_p = non_seasonal_pdq[0]
 44 |         ns_d = non_seasonal_pdq[1]
 45 |         ns_q = non_seasonal_pdq[2]
 46 |     # Initialize a DataFrame to store the results
 47 |     iteration = 0
 48 |     results_dict = {}
 49 |     for d_val in range(d_min, d_max+1):
 50 |         print('\nDifferencing = %d' % d_val)
 51 |         results_bic = pd.DataFrame(index=['AR{}'.format(i) for i in range(p_min, p_max+1)],
 52 |                                    columns=['MA{}'.format(i) for i in range(q_min, q_max+1)])
 53 |         for p_val, q_val in itertools.product(range(p_min,p_max+1), range(q_min, q_max+1)):
 54 |             if p_val == 0 and d_val == 0 and q_val == 0:
 55 |                 results_bic.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = np.nan
 56 |                 continue
 57 |             try:
 58 |                 if seasonality:
 59 |                     #### In order to get forecasts to be in the same value ranges of the
 60 |                     #### orig_endogs, you must set the simple_differencing = False and
 61 |                     #### the start_params to be the same as ARIMA.
 62 |                     #### THat is the only way to ensure that the output of this
 63 |                     #### model is comparable to other ARIMA models
 64 |                     model = SARIMAX(ts_train, order=(ns_p, ns_d, ns_q),
 65 |                                     seasonal_order=(p_val, d_val, q_val, seasonal_period),
 66 |                                     enforce_stationarity=False,
 67 |                                     enforce_invertibility=False,
 68 |                                     simple_differencing=False, trend='ct',
 69 |                                     start_params=[0, 0, 0, 1])
 70 |                 else:
 71 |                     model = SARIMAX(ts_train, order=(p_val, d_val, q_val),
 72 |                                     enforce_stationarity=False,
 73 |                                     enforce_invertibility=False,
 74 |                                     simple_differencing=False, trend='ct',
 75 |                                     start_params=[0, 0, 0,1]
 76 |                                         )
 77 |                     results = model.fit()
 78 |                     results_bic.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = eval('results.' + metric)
 79 |                     if iteration % 10 == 0:
 80 |                         print('    Iteration %d completed...' % iteration)
 81 |                         iteration += 1
 82 |                     elif iteration >= 100:
 83 |                         print('    Ending Iterations at %d' % iteration)
 84 |                         break
 85 |             except:
 86 |                 iteration += 1
 87 |                 continue
 88 |         results_bic = results_bic[results_bic. columns].astype(float)
 89 |         interim_d = d_val
 90 |         if results_bic.isnull().all().all():
 91 |             print('    D = %d results in an empty ARMA set. Setting Seasonality to False since model might overfit' %d_val)
 92 |             #### Set Seasonality to False if this empty condition happens repeatedly ####
 93 |             seasonality = False
 94 |             continue
 95 |         else:
 96 |             seasonality = True
 97 |         interim_p, interim_q, interim_bic = find_lowest_pq(results_bic)
 98 |         if verbose == 1:
 99 |             fig, ax = plt.subplots(figsize=(20, 10))
100 |             ax = sns.heatmap(results_bic, mask=results_bic.isnull(), ax=ax, annot=True, fmt='.0f')
101 |             ax.set_title(metric)
102 |         results_dict[str(interim_p)+' '+str(interim_d)+' '+str(interim_q)] = interim_bic
103 |     try:
104 |         best_bic = min(results_dict.items(), key=operator.itemgetter(1))[1]
105 |         best_pdq = min(results_dict.items(), key=operator.itemgetter(1))[0]
106 |         best_p = int(best_pdq.split(' ')[0])
107 |         best_d = int(best_pdq.split(' ')[1])
108 |         best_q = int(best_pdq.split(' ')[2])
109 |     except:
110 |         best_p = copy.deepcopy(p_val)
111 |         best_q = copy.deepcopy(q_val)
112 |         best_d = copy.deepcopy(d_val)
113 |         best_bic = 0
114 |     return best_p, best_d, best_q, best_bic, seasonality
115 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/build_prophet.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import copy
  4 | import matplotlib.pyplot as plt
  5 | # helper functions
  6 | from ..utils import print_dynamic_rmse
  7 | # imported Prophet from fbprophet pkg
  8 | from fbprophet import Prophet
  9 | #### Suppress INFO messages from FB Prophet!
 10 | import logging
 11 | logging.getLogger('fbprophet').setLevel(logging.WARNING)
 12 | import pdb
 13 | def build_prophet_model(ts_df, time_col, target, forecast_period, time_interval,
 14 |                         score_type,
 15 |                         verbose, conf_int):
 16 |     """
 17 |     Build a Time Series Model using Facebook Prophet which is a powerful model.
 18 |     """
 19 |     ts_df = copy.deepcopy(ts_df)
 20 |     #df.rename(columns={time_col:'ds',target:'y'},inplace=True)
 21 |     ##### if you are going to use matplotlib with prophet data, it gives an error unless you do this.
 22 |     pd.plotting.register_matplotlib_converters()
 23 |     #### You have to import Prophet if you are going to build a Prophet model #############
 24 |     try:
 25 |         print('Preparing Time Series data for FB Prophet: sample row before\n', ts_df[time_col].head(1))
 26 |         df = ts_df.rename(columns={time_col: 'ds', target: 'y'})
 27 |         print('Time Series data: sample row after transformation\n', df.head(1))
 28 |     except:
 29 |         #### THis happens when time_col is not found but it's actually the index. In that case, reset index
 30 |         print('Preparing Time Series data for FB Prophet: sample row before\n', ts_df.head(1))
 31 |         df = ts_df.reset_index()
 32 |         df = df.rename(columns={time_col: 'ds', target: 'y'})
 33 |         print('Time Series data: sample row after transformation\n', df.head(1))
 34 |     actual = 'y'
 35 |     timecol = 'ds'
 36 |     dft = df[[timecol, actual]]
 37 |     ##### For most Financial time series data, 80% conf interval is enough...
 38 |     print('    Fit-Predict data (shape=%s) with Confidence Interval = %0.2f...' % (dft.shape, conf_int))
 39 |     ### Make Sure you lower your desired interval width from the normal 95% to a more realistic 80%
 40 |     model = Prophet(interval_width=conf_int)
 41 |     model.fit(dft)
 42 |     # Prophet is a Little Complicated - You need 2 steps to Forecast
 43 |     ## 1. You need to create a dataframe to hold the predictions which specifies datetime
 44 |     ##    periods that you want to predict. It automatically creates one with both past
 45 |     ##    and future dates.
 46 |     ## 2. You need to ask Prophet to make predictions for the past and future dates in
 47 |     ##    that dataframe above.
 48 |     ## So if you had 2905 rows of data, and ask Prophet to predict for 365 periods,
 49 |     ##    it will give you predictions of the past (2905) and an additional 365 rows
 50 |     ##    of future (total: 3270) rows of data.
 51 |     ### This is where we take the first steps to make a forecast using Prophet:
 52 |     ##   1. Create a dataframe with datetime index of past and future dates
 53 |     print('Building Forecast dataframe. Forecast Period = %d' % forecast_period)
 54 |     # Next we ask Prophet to make predictions for those dates in the dataframe along with predn intervals
 55 |     if time_interval in ['months', 'month', 'm']:
 56 |         time_int = 'M'
 57 |     elif time_interval in ['days', 'daily', 'd']:
 58 |         time_int = 'D'
 59 |     elif time_interval in ['weeks', 'weekly', 'w']:
 60 |         time_int = 'W'
 61 |         seasonal_period = 52
 62 |     elif time_interval in ['qtr', 'quarter', 'q']:
 63 |         time_int = 'Q'
 64 |     elif time_interval in ['years', 'year', 'annual', 'y', 'a']:
 65 |         time_int = 'Y'
 66 |     elif time_interval in ['hours', 'hourly', 'h']:
 67 |         time_int = 'H'
 68 |     elif time_interval in ['minutes', 'minute', 'min', 'n']:
 69 |         time_int = 'M'
 70 |     elif time_interval in ['seconds', 'second', 'sec', 's']:
 71 |         time_interval = 'S'
 72 |     else:
 73 |         time_int = 'W'
 74 |     future = model.make_future_dataframe(periods=forecast_period, freq=time_int)
 75 |     forecast = model.predict(future)
 76 |     act_n = len(dft)
 77 |     ####  We are going to plot Prophet's forecasts differently since it is better
 78 |     dfa = plot_prophet(dft, forecast)
 79 |     # Prophet makes Incredible Predictions Charts!
 80 |     ###  There can't be anything simpler than this to make Forecasts!
 81 |     #model.plot(forecast);  # make sure to add semi-colon in the end to avoid plotting twice
 82 |     # Also their Trend, Seasonality Charts are Spot On!
 83 |     try:
 84 |         model.plot_components(forecast);
 85 |     except:
 86 |         print('Error in FB Prophet components forecast. Continuing...')
 87 |     rmse, norm_rmse = print_dynamic_rmse(dfa['y'], dfa['yhat'], dfa['y'])
 88 |     #submit = dfplot[-forecast_period:]
 89 |     #submit.drop('Actuals',axis=1,inplace=True)
 90 |     #submit.rename(columns={'yhat':target},inplace=True)
 91 |     #print('Forecast Data frame size %s ready to submit' %(submit.shape,))
 92 |     return model, forecast, rmse, norm_rmse
 93 | 
 94 | 
 95 | def plot_prophet(dft, forecastdf):
 96 |     """
 97 |     This is a different way of plotting Prophet charts as described in the following article:
 98 |     Source: https://nextjournal.com/viebel/forecasting-time-series-data-with-prophet
 99 |     Reproduced with gratitude to the author.
100 |     """
101 |     dft = copy.deepcopy(dft)
102 |     forecastdf = copy.deepcopy(forecastdf)
103 |     dft.set_index('ds', inplace=True)
104 |     forecastdf.set_index('ds', inplace=True)
105 |     dft.index = pd.to_datetime(dft.index)
106 |     connect_date = dft.index[-2]
107 |     mask = (forecastdf.index > connect_date)
108 |     predict_df = forecastdf.loc[mask]
109 |     viz_df = dft.join(predict_df[['yhat', 'yhat_lower', 'yhat_upper']],
110 |                       how='outer')
111 |     fig,ax1 = plt.subplots(figsize=(20, 10))
112 |     ax1.plot(viz_df['y'], color='red')
113 |     ax1.plot(viz_df['yhat'], color='green')
114 |     ax1.fill_between(viz_df.index, viz_df['yhat_lower'], viz_df['yhat_upper'],
115 |                      alpha=0.2, color="darkgreen")
116 |     ax1.set_title('Actuals (Red) vs Forecast (Green)')
117 |     ax1.set_ylabel('Values')
118 |     ax1.set_xlabel('Date Time')
119 |     plt.show();
120 |     return viz_df
121 | 
122 | 
123 | # def print_dynamic_rmse(actuals, predicted, original):
124 | #     """
125 | #     This utility calculates rmse between actuals and predicted. However, it does one more.
126 | #     Since in dynamic forecast, we need the longer original, it calculates Normalized RMSE
127 | #     using the original array's std deviation. That way, the forecast of 2 values does not
128 | #     result in a larger Normalized RMSE since the std deviation of 2 values will be v small.
129 | #     """
130 | #     rmse = np.sqrt(np.mean((actuals - predicted)**2))
131 | #     norm_rmse = rmse/original.std()
132 | #     print('    RMSE = {:,.2f}'.format(rmse))
133 | #     print('    Std Deviation of Originals = {:,.2f}'.format(original.std()))
134 | #     print('    Normalized RMSE = %0.0f%%' %(100*norm_rmse))
135 | #     return rmse, norm_rmse
136 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/build_ml.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | # imported ML models from scikit-learn
  4 | from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit, \
  5 |                                     TimeSeriesSplit, cross_val_score
  6 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
  7 | from sklearn.ensemble import BaggingRegressor, ExtraTreesRegressor, \
  8 |                              RandomForestClassifier, ExtraTreesClassifier, \
  9 |                              AdaBoostRegressor, AdaBoostClassifier
 10 | from sklearn.linear_model import LinearRegression, LogisticRegression, RidgeCV
 11 | from sklearn.svm import LinearSVC, SVR, LinearSVR
 12 | from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
 13 | # imported specialized tree models from scikit-garden
 14 | # from skgarden import RandomForestQuantileRegressor
 15 | # helper functions
 16 | from ..utils import print_static_rmse, print_dynamic_rmse
 17 | import pdb
 18 | 
 19 | def run_ensemble_model(X, Y, modeltype='Regression', scoring='', verbose=0):
 20 |     """
 21 |     Quickly builds and runs multiple models for a clean data set(only numerics).
 22 |     """
 23 |     seed = 99
 24 |     if len(X) <= 100000 or X.shape[1] < 50:
 25 |         NUMS = 50
 26 |         FOLDS = 3
 27 |     else:
 28 |         NUMS = 20
 29 |         FOLDS = 5
 30 |     ## create Voting models
 31 |     estimators = []
 32 |     if modeltype == 'Regression':
 33 |         if scoring == '':
 34 |             scoring = 'neg_mean_squared_error'
 35 |         scv = ShuffleSplit(n_splits=FOLDS, random_state=seed)
 36 |         model5 = LinearRegression()
 37 |         results1 = cross_val_score(model5, X, Y, cv=scv, scoring=scoring)
 38 |         estimators.append(('Linear Model', model5, np.sqrt(abs(results1.mean()))))
 39 |         model6 = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(
 40 |                                    min_samples_leaf=2, max_depth=1, random_state=seed),
 41 |                                    n_estimators=NUMS, random_state=seed)
 42 |         results2 = cross_val_score(model6, X, Y, cv=scv, scoring=scoring)
 43 |         estimators.append(('Boosting', model6, np.sqrt(abs(results2.mean()))))
 44 |         model7 = RidgeCV(alphas=np.logspace(-10, -1, 50), cv=scv)
 45 |         results3 = cross_val_score(model7, X, Y, cv=scv, scoring=scoring)
 46 |         estimators.append(('Linear Regularization', model7, np.sqrt(abs(results3.mean()))))
 47 |         ## Create an ensemble model ####
 48 |         estimators_list = [(tuples[0], tuples[1]) for tuples in estimators]
 49 |         ensemble = BaggingRegressor(DecisionTreeRegressor(random_state=seed),
 50 |                                     n_estimators=NUMS, random_state=seed)
 51 |         results4 = cross_val_score(ensemble, X, Y, cv=scv, scoring=scoring)
 52 |         estimators.append(('Bagging', ensemble, np.sqrt(abs(results4.mean()))))
 53 |         if verbose == 1:
 54 |             print('\nLinear Model = %0.4f \nBoosting = %0.4f\nRegularization = %0.4f \nBagging = %0.4f' %(
 55 |             np.sqrt(abs(results1.mean()))/Y.std(), np.sqrt(abs(results2.mean()))/Y.std(),
 56 |             np.sqrt(abs(results3.mean()))/Y.std(), np.sqrt(abs(results4.mean()))/Y.std()))
 57 |         besttype = sorted(estimators, key=lambda x: x[2], reverse=False)[0][0]
 58 |         bestmodel = sorted(estimators, key=lambda x: x[2], reverse=False)[0][1]
 59 |         bestscore = sorted(estimators, key=lambda x: x[2], reverse=False)[0][2]/Y.std()
 60 |         if verbose == 1:
 61 |             print('    Best Model = %s with %0.2f Normalized RMSE score\n' %(besttype,bestscore))
 62 |     elif modeltype == 'TimeSeries' or modeltype =='Time Series' or modeltype == 'Time_Series':
 63 |         #### This section is for Time Series Models only ####
 64 |         if scoring == '':
 65 |             scoring = 'neg_mean_squared_error'
 66 |         tscv = TimeSeriesSplit(n_splits=FOLDS)
 67 |         scoring = 'neg_mean_squared_error'
 68 |         model5 = SVR(C=0.1, kernel='rbf', degree=2)
 69 |         results1 = cross_val_score(model5, X, Y, cv=tscv, scoring=scoring)
 70 |         estimators.append(('SVR', model5, np.sqrt(abs(results1.mean()))))
 71 |         model6 = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(
 72 |                                    min_samples_leaf=2, max_depth=1, random_state=seed),
 73 |                                    n_estimators=NUMS, random_state=seed)
 74 |         results2 = cross_val_score(model6, X, Y, cv=tscv, scoring=scoring)
 75 |         estimators.append(('Extra Trees', model6,np.sqrt(abs(results2.mean()))))
 76 |         model7 = LinearSVR(random_state=seed)
 77 |         results3 = cross_val_score(model7, X, Y, cv=tscv, scoring=scoring)
 78 |         estimators.append(('LinearSVR', model7, np.sqrt(abs(results3.mean()))))
 79 |         ## Create an ensemble model ####
 80 |         estimators_list = [(tuples[0], tuples[1]) for tuples in estimators]
 81 |         ensemble = BaggingRegressor(DecisionTreeRegressor(random_state=seed),
 82 |                                     n_estimators=NUMS, random_state=seed)
 83 |         results4 = cross_val_score(ensemble, X, Y, cv=tscv, scoring=scoring)
 84 |         estimators.append(('Bagging', ensemble, np.sqrt(abs(results4.mean()))))
 85 |         print('Running multiple models...')
 86 |         if verbose == 1:
 87 |             print('    Instance Based = %0.4f \n    Boosting = %0.4f\n    Linear Model = %0.4f \n    Bagging = %0.4f' %(
 88 |             np.sqrt(abs(results1.mean()))/Y.std(), np.sqrt(abs(results2.mean()))/Y.std(),
 89 |             np.sqrt(abs(results3.mean()))/Y.std(), np.sqrt(abs(results4.mean()))/Y.std()))
 90 |         besttype = sorted(estimators, key=lambda x: x[2], reverse=False)[0][0]
 91 |         bestmodel = sorted(estimators, key=lambda x: x[2], reverse=False)[0][1]
 92 |         bestscore = sorted(estimators, key=lambda x: x[2], reverse=False)[0][2]/Y.std()
 93 |         if verbose == 1:
 94 |             print('Best Model = %s with %0.2f Normalized RMSE score\n' % (besttype, bestscore))
 95 |         print('Model Results:')
 96 |     else:
 97 |         if scoring == '':
 98 |             scoring = 'f1'
 99 |         scv = StratifiedShuffleSplit(n_splits=FOLDS, random_state=seed)
100 |         model5 = LogisticRegression(random_state=seed)
101 |         results1 = cross_val_score(model5, X, Y, cv=scv, scoring=scoring)
102 |         estimators.append(('Logistic Regression', model5, abs(results1.mean())))
103 |         model6 = LinearDiscriminantAnalysis()
104 |         results2 = cross_val_score(model6, X, Y, cv=scv, scoring=scoring)
105 |         estimators.append(('Linear Discriminant', model6, abs(results2.mean())))
106 |         model7 = ExtraTreesClassifier(n_estimators=NUMS, min_samples_leaf=2, random_state=seed)
107 |         results3 = cross_val_score(model7, X, Y, cv=scv, scoring=scoring)
108 |         estimators.append(('Bagging', model7, abs(results3.mean())))
109 |         ## Create an ensemble model ####
110 |         estimators_list = [(tuples[0], tuples[1]) for tuples in estimators]
111 |         ensemble = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(
112 |                                       random_state=seed, max_depth=1, min_samples_leaf=2),
113 |                                       n_estimators=NUMS, random_state=seed)
114 |         results4 = cross_val_score(ensemble, X, Y, cv=scv, scoring=scoring)
115 |         estimators.append(('Boosting', ensemble, abs(results4.mean())))
116 |         if verbose == 1:
117 |             print('\nLogistic Regression = %0.4f \nLinear Discriminant = %0.4f \nBagging = %0.4f \nBoosting = %0.4f' %
118 |                   (abs(results1.mean()), abs(results2.mean()), abs(results3.mean()), abs(results4.mean())))
119 |         besttype = sorted(estimators, key=lambda x: x[2], reverse=True)[0][0]
120 |         bestmodel = sorted(estimators, key=lambda x: x[2], reverse=True)[0][1]
121 |         bestscore = sorted(estimators, key=lambda x: x[2], reverse=True)[0][2]
122 |         if verbose == 1:
123 |             print('    Best Model = %s with %0.2f %s score\n' % (besttype, bestscore, scoring))
124 |     return bestmodel, bestscore, besttype
125 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/val.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | import seaborn as sns
  5 | get_ipython().magic('matplotlib inline')
  6 | sns.set(style="white", color_codes=True)
  7 | 
  8 | from sklearn.model_selection import TimeSeriesSplit
  9 | from sklearn.model_selection import GridSearchCV
 10 | 
 11 | #########################################################
 12 | def cross_validation_time_series(model, df, preds, target,n_times=10,verbose=0):
 13 |     """
 14 |     This splits a time series data frame "n" times as specified in the input (default=10)
 15 |     Initially it will start with a certain number of rows in train but it will gradually
 16 |     increase train size in steps (which it will calculate automatically) while the
 17 |     number of test rows will remain the same (though their content will vary).
 18 |     This utility is based on sklearn's time_series_split()
 19 |     """
 20 |     if n_times > 10:
 21 |         print('More than 10 splits is not recommended. Setting n_times to 10')
 22 |         n_times = 10
 23 |     splits = TimeSeriesSplit(n_splits=n_times)
 24 |     index = 0
 25 |     X = df[preds].values
 26 |     y = df[target].values
 27 |     non_df = {}
 28 |     rmse_list = []
 29 |     for train_index, test_index in splits.split(X):
 30 |         X_train = X[train_index]
 31 |         y_train = y[train_index]
 32 |         X_test = X[test_index]
 33 |         y_test = y[test_index]
 34 |         if verbose == 1:
 35 |             print('Iteration %d: Total Observations = %d' %(index,len(X_train)+len(X_test)))
 36 |             print('    Training Index %d Observations: %s' %(len(train_index),train_index))
 37 |             print('    Testing Index %d Observations: %s' %(len(test_index),test_index))
 38 |         model.fit(X_train, y_train)
 39 |         rmse = print_rmse(y_test, model.predict(X_test))
 40 |         rmse_list.append(rmse)
 41 |         norm_rmse = rmse/y_test.std()
 42 |         print('     Split %d: Normalized RMSE = %0.2f' %(norm_rmse))
 43 |         non_df[index] = norm_rmse
 44 |         index += 1
 45 |     non_df = pd.Series(non_df)
 46 |     non_df.plot()
 47 |     ave_norm_rmse = np.mean(rmse_list)/y.std()
 48 |     print('Normalized RMSE over  entire data after %d splits = 0.2f' %(index,ave_norm_rmse))
 49 |     return ave_norm_rmse
 50 | ##########################################################
 51 | def rolling_validation_time_series(model, df, preds, target,train_size=0,
 52 |                                                     test_size=0, verbose=0):
 53 |     """
 54 |     This utility uses a Walk Forward or Rolling Period time series cross validation method.
 55 |     Initially it will start with a minimum number of observations to train the model.
 56 |     It then gradually increases the train size in steps (which it will calculate automatically)
 57 |     while fixing the number of test rows the same (though their content will vary).
 58 |     Once the train+test series exceeds the number of rows in data set, it stops.
 59 |     It does  not use SKLearn's Time Series Split. You need to provide the initial sizes
 60 |     of train and test and it will take care of the rest.
 61 |     """
 62 |     df = df[:]
 63 |     index = 0
 64 |     X = df[preds].values
 65 |     y = df[target].values
 66 |     non_df = {}
 67 |     rmse_list = []
 68 |     if train_size == 0:
 69 |         train_size = np.int(np.ceil(len(y)/2))
 70 |     if test_size == 0:
 71 |         test_size = np.int(np.ceil(len(y)/4))
 72 |     step_size = np.int(np.ceil(test_size/10))
 73 |     n_records = len(X)
 74 |     ### This contains the start point of test size for each K-Fold in time series
 75 |     test_list = np.floor(np.linspace(train_size,n_records-1,5)).tolist()
 76 |     for i in range(4):
 77 |         train_size = np.int(test_list[i])
 78 |         test_size = np.int(test_list[i+1] - test_list[i])
 79 |         X_train, X_test = X[:train_size],X[train_size:train_size+test_size]
 80 |         y_train, y_test = y[:train_size],y[train_size:train_size+test_size]
 81 |         model.fit(X_train, y_train)
 82 |         if i == 0:
 83 |             ### Since both start and end points are included, you have to subtract 1 from index in this
 84 |             df.loc[:train_size-1,'predictions'] = y[:train_size]
 85 |             df.loc[train_size:train_size+test_size-1,'predictions'] = model.predict(X_test)
 86 |         elif i == 3:
 87 |             test_size = np.int(len(X) - train_size)
 88 |             X_train, X_test = X[:train_size],X[train_size:train_size+test_size]
 89 |             y_train, y_test = y[:train_size],y[train_size:train_size+test_size]
 90 |             df.loc[train_size:train_size+test_size,'predictions'] = model.predict(X_test)
 91 |         else:
 92 |             df.loc[train_size:train_size+test_size-1,'predictions'] = model.predict(X_test)
 93 |         if len(y_train) + len(y_test) >= df.shape[0]:
 94 |             if verbose:
 95 |                 print('Iteration %d: Observations:%d' %(index+1,len(X_train)+len(X_test)))
 96 |                 print('    Train Size=%d, Test Size=%d' %(len(y_train),len(y_test)))
 97 |             rmse = print_rmse(y_test, model.predict(X_test))
 98 |             norm_rmse = rmse/y_test.std()
 99 |             non_df[i] = rmse
100 |             if verbose:
101 |                 print('Normalized RMSE = %0.2f' %norm_rmse)
102 |             non_df = pd.Series(non_df)
103 |             weighted_ave_rmse = np.average(non_df.values,weights=non_df.index,axis=0)
104 |             print('\nWeighted Average of RMSE (%d iterations) = %0.2f\n    Normalized Wtd Aver. RMSE (using std dev) = %0.2f'
105 |                                 %(index+1, weighted_ave_rmse,weighted_ave_rmse/y[:].std()))
106 |             #############################
107 |             if verbose == 1 or verbose == 2:
108 |                 fig, ax1 = plt.subplots(nrows=1,ncols=1,figsize=(12,8))
109 |                 ax1.plot(df[target],label='In-Sample Data', linestyle='-')
110 |                 ax1.plot(df['predictions'],'g',alpha=0.6,label='Rolling Forecast')
111 |                 ax1.set_xlabel('Time')
112 |                 ax1.set_ylabel('Values')
113 |                 ax1.legend(loc='best')
114 |             return weighted_ave_rmse, weighted_ave_rmse/y[:].std(), df
115 |         else:
116 |             if verbose:
117 |                 print('Iteration %d: Observations:%d' %(index+1,len(X_train)+len(X_test)))
118 |                 print('    Train Size=%d, Test Size=%d' %(len(y_train),len(y_test)))
119 |             rmse = print_rmse(y_test, model.predict(X_test))
120 |             norm_rmse = rmse/y_test.std()
121 |             non_df[i] = rmse
122 |             if verbose:
123 |                 print('Normalized RMSE = %0.2f' %norm_rmse)
124 |             index += 1
125 | 
126 | 
127 | ###################################################
128 | # Re-run the above statistical tests, and more. To be used when selecting viable models.
129 | def ts_model_validation(model_results):
130 |     """
131 |     Once you have built a time series model, how to validate it. This utility attempts to.
132 |     This is only done on SARIMAX models from statsmodels. Don't try it on other models.
133 |     The input is model_results which is the variable assigned to the model.fit() method.
134 |     """
135 |     het_method='breakvar'
136 |     norm_method='jarquebera'
137 |     sercor_method='ljungbox'
138 |     ########################
139 |     (het_stat, het_p) = model_results.test_heteroskedasticity(het_method)[0]
140 |     norm_stat, norm_p, skew, kurtosis = model_results.test_normality(norm_method)[0]
141 |     sercor_stat, sercor_p = model_results.test_serial_correlation(method=sercor_method)[0]
142 |     sercor_stat = sercor_stat[-1] # last number for the largest lag
143 |     sercor_p = sercor_p[-1] # last number for the largest lag
144 | 
145 |     # Run Durbin-Watson test on the standardized residuals.
146 |     # The statistic is approximately equal to 2*(1-r), where r is the sample autocorrelation of the residuals.
147 |     # Thus, for r == 0, indicating no serial correlation, the test statistic equals 2.
148 |     # This statistic will always be between 0 and 4. The closer to 0 the statistic,
149 |     # the more evidence for positive serial correlation. The closer to 4,
150 |     # the more evidence for negative serial correlation.
151 |     # Essentially, below 1 or above 3 is bad.
152 |     dw = sm.stats.stattools.durbin_watson(model_results.filter_results.standardized_forecasts_error[0, model_results.loglikelihood_burn:])
153 | 
154 |     # check whether roots are outside the unit circle (we want them to be);
155 |     # will be True when AR is not used (i.e., AR order = 0)
156 |     arroots_outside_unit_circle = np.all(np.abs(model_results.arroots) > 1)
157 |     # will be True when MA is not used (i.e., MA order = 0)
158 |     maroots_outside_unit_circle = np.all(np.abs(model_results.maroots) > 1)
159 | 
160 |     print('Test heteroskedasticity of residuals ({}): stat={:.3f}, p={:.3f}'.format(het_method, het_stat, het_p));
161 |     print('\nTest normality of residuals ({}): stat={:.3f}, p={:.3f}'.format(norm_method, norm_stat, norm_p));
162 |     print('\nTest serial correlation of residuals ({}): stat={:.3f}, p={:.3f}'.format(sercor_method, sercor_stat, sercor_p));
163 |     print('\nDurbin-Watson test on residuals: d={:.2f}\n\t(NB: 2 means no serial correlation, 0=pos, 4=neg)'.format(dw))
164 |     print('\nTest for all AR roots outside unit circle (>1): {}'.format(arroots_outside_unit_circle))
165 |     print('\nTest for all MA roots outside unit circle (>1): {}'.format(maroots_outside_unit_circle))
166 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/ar_based/sarimax.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import copy
  4 | import matplotlib.pyplot as plt
  5 | get_ipython().magic('matplotlib inline')
  6 | # imported SARIMAX from statsmodels pkg
  7 | from statsmodels.tsa.statespace.sarimax import SARIMAX
  8 | # helper functions
  9 | from ...utils import colorful, print_static_rmse, print_dynamic_rmse
 10 | from ...models.ar_based.param_finder import find_best_pdq_or_PDQ
 11 | 
 12 | 
 13 | def build_sarimax_model(ts_df, metric, seasonality=False, seasonal_period=None,
 14 |                         p_max=12, d_max=2, q_max=12, forecast_period=2, verbose=0):
 15 |     ############ Split the data set into train and test for Cross Validation Purposes ########
 16 |     ts_train = ts_df[:-forecast_period]
 17 |     ts_test = ts_df[-forecast_period:]
 18 |     if verbose == 1:
 19 |         print('Data Set split into train %s and test %s for Cross Validation Purposes'
 20 |                             % (ts_train.shape, ts_test.shape))
 21 |     ############# Now find the best pdq and PDQ parameters for the model #################
 22 |     if not seasonality:
 23 |         print('Building a Non Seasonal Model...')
 24 |         print('\nFinding best Non Seasonal Parameters:')
 25 |         best_p, best_d, best_q, best_bic,seasonality = find_best_pdq_or_PDQ(ts_train, metric,
 26 |                                 p_max, d_max, q_max, non_seasonal_pdq=None,
 27 |                                 seasonal_period=None, seasonality=False, verbose=verbose)
 28 |         print('\nBest model is: Non Seasonal SARIMAX(%d,%d,%d), %s = %0.3f' % (best_p, best_d,
 29 |                                                         best_q,metric, best_bic))
 30 |         #### In order to get forecasts to be in the same value ranges of the orig_endogs,
 31 |         #### you must  set the simple_differencing = False and the start_params to be the
 32 |         #### same as ARIMA.
 33 |         #### THat is the only way to ensure that the output of this model is
 34 |         #### comparable to other ARIMA models
 35 |         bestmodel = SARIMAX(ts_train, order=(best_p, best_d, best_q),
 36 |                                              enforce_stationarity=False,
 37 |                                              enforce_invertibility=False,
 38 |                                              trend='ct',
 39 |                                              start_params=[0, 0, 0, 1],
 40 |                                              simple_differencing=False)
 41 |     else:
 42 |         print(colorful.BOLD + 'Building a Seasonal Model...'+colorful.END)
 43 |         print(colorful.BOLD + '\n    Finding best Non-Seasonal pdq Parameters:' + colorful.END)
 44 |         best_p, best_d, best_q, best_bic, seasonality = find_best_pdq_or_PDQ(ts_train, metric,
 45 |                                              p_max, d_max, q_max,
 46 |                                              non_seasonal_pdq=None,
 47 |                                              seasonal_period=None,
 48 |                                              seasonality=False,verbose=verbose)
 49 |         print(colorful.BOLD + '\n    Finding best Seasonal PDQ Model Parameters:' + colorful.END)
 50 |         best_P, best_D, best_Q, best_bic, seasonality = find_best_pdq_or_PDQ(ts_train, metric,
 51 |                                              p_max, d_max, q_max,
 52 |                                              non_seasonal_pdq=(best_p, best_d, best_q),
 53 |                                              seasonal_period=seasonal_period,
 54 |                                              seasonality=True, verbose=verbose)
 55 |         if seasonality:
 56 |             print('\nBest model is a Seasonal SARIMAX(%d,%d,%d)*(%d,%d,%d,%d), %s = %0.3f' % (
 57 |                                              best_p, best_d, best_q, best_P,
 58 |                                              best_D, best_Q, seasonal_period, metric, best_bic))
 59 |             #### In order to get forecasts to be in the same value ranges of the orig_endogs,
 60 |             #### you must set the simple_differencing =False and the start_params to be
 61 |             #### the same as ARIMA.
 62 |             #### THat is the only way to ensure that the output of this model is
 63 |             #### comparable to other ARIMA models
 64 |             bestmodel = SARIMAX(ts_train, order=(best_p, best_d, best_q),
 65 |                                 seasonal_order=(best_P, best_D, best_Q, seasonal_period),
 66 |                                 enforce_stationarity=False,
 67 |                                 enforce_invertibility=False,
 68 |                                 simple_differencing=False, trend='ct',
 69 |                                 start_params=[0, 0, 0, 1])
 70 |         else:
 71 |             print('\nBest model is a Non Seasonal SARIMAX(%d,%d,%d)' % (
 72 |                                                 best_p, best_d, best_q))
 73 |             #### In order to get forecasts to be in the same value ranges of the orig_endogs,
 74 |             #### you must set the simple_differencing =False and the start_params to be
 75 |             #### the same as ARIMA.
 76 |             #### THat is the only way to ensure that the output of this model is
 77 |             #### comparable to other ARIMA models
 78 |             bestmodel = SARIMAX(ts_train, order=(best_p, best_d, best_q),
 79 |                                 enforce_stationarity=False,
 80 |                                 enforce_invertibility=False,
 81 |                                 trend='ct',
 82 |                                 start_params=[0, 0, 0, 1],
 83 |                                 simple_differencing=False)
 84 |     print(colorful.BOLD + 'Fitting best SARIMAX model for full data set'+colorful.END)
 85 |     try:
 86 |         results = bestmodel.fit()
 87 |         print('    Best %s metric = %0.1f' % (metric, eval('results.' + metric)))
 88 |     except:
 89 |         print('Error: Getting Singular Matrix. Please try using other PDQ parameters or turn off Seasonality')
 90 |         return bestmodel, None, np.inf, np.inf
 91 |     if verbose == 1:
 92 |         try:
 93 |             results.plot_diagnostics(figsize=(16, 12))
 94 |         except:
 95 |             print('Error: SARIMAX plot diagnostic. Continuing...')
 96 |     ### this is needed for static forecasts ####################
 97 |     y_truth = ts_train[:]
 98 |     y_forecasted = results.predict(dynamic=False)
 99 |     concatenated = pd.concat([y_truth, y_forecasted], axis=1, keys=['original', 'predicted'])
100 |     ### for SARIMAX, you don't have to restore differences since it predicts like actuals.###
101 |     if verbose == 1:
102 |         print('Static Forecasts:')
103 |         print_static_rmse(concatenated['original'].values[best_d:],
104 |                           concatenated['predicted'].values[best_d:],
105 |                           verbose=verbose)
106 |     ########### Dynamic One Step Ahead Forecast ###########################
107 |     ### Dynamic Forecats are a better representation of true predictive power
108 |     ## since they only use information from the time series up to a certain point,
109 |     ## and after that, forecasts are generated using values from previous forecasted
110 |     ## time points.
111 |     #################################################################################
112 |     # Now do dynamic forecast plotting for the last X steps of the data set ######
113 |     if verbose == 1:
114 |         ax = concatenated[['original', 'predicted']][best_d:].plot(figsize=(16, 12))
115 |         startdate = ts_df.index[-forecast_period-1]
116 |         pred_dynamic = results.get_prediction(start=startdate, dynamic=True, full_results=True)
117 |         pred_dynamic_ci = pred_dynamic.conf_int()
118 |         pred_dynamic.predicted_mean.plot(label='Dynamic Forecast', ax=ax)
119 |         try:
120 |             ax.fill_between(pred_dynamic_ci.index, pred_dynamic_ci.iloc[:, 0],
121 |                             pred_dynamic_ci.iloc[:, 1], color='k', alpha=.25)
122 |             ax.fill_betweenx(ax.get_ylim(), startdate, ts_train.index[-1], alpha=.1, zorder=-1)
123 |         except:
124 |             pass
125 |         ax.set_xlabel('Date')
126 |         ax.set_ylabel('Levels')
127 |         plt.legend()
128 |         plt.show()
129 |     # Extract the dynamic predicted and true values of our time series
130 |     y_forecasted = results.forecast(forecast_period)
131 |     if verbose == 1:
132 |         print(results.summary())
133 |     print('Dynamic %d-Period Forecast:' % (forecast_period,))
134 |     rmse, norm_rmse = print_dynamic_rmse(ts_test, y_forecasted, ts_train)
135 |     return bestmodel, results.get_forecast(forecast_period, full_results=False).summary_frame(), rmse, norm_rmse
136 | 
137 | 
138 | # def predicted_diffs_restored_SARIMAX(original, predicted, periods=1):
139 | #     """
140 | #     THIS UTILITY IS NEEDED ONLY WHEN WE HAVE SIMPLE DIFFERENCING SET TO TRUE IN SARIMAX!
141 | #     The number of periods is equal to the differencing order (d) in the SARIMAX mode.
142 | #     SARIMAX predicts a "differenced” prediction only when this simple_differencing=True.
143 | #     """
144 | #     restored = original.loc[~predicted.isnull()]
145 | #     predicted = predicted.loc[~predicted.isnull()]
146 | #     restored.iloc[periods:] = predicted[periods:]
147 | #     restored = restored.cumsum()
148 | #     res = pd.concat([original, predicted, restored], axis=1)
149 | #     res.columns = ['original', 'pred_as_diffs', 'predicted']
150 | #     res[['original', 'predicted']].plot()
151 | #     print_static_rmse(concatenated['original'], concatenated['predicted'])
152 | #     return res[['original', 'predicted']]
153 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/models/ar_based/arima.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import itertools
  4 | import operator
  5 | import copy
  6 | import matplotlib.pyplot as plt
  7 | import seaborn as sns
  8 | get_ipython().magic('matplotlib inline')
  9 | sns.set(style="white", color_codes=True)
 10 | # imported ARIMA from statsmodels pkg
 11 | from statsmodels.tsa.arima_model import ARIMA
 12 | # helper functions
 13 | from ...utils import print_static_rmse, print_dynamic_rmse
 14 | from ...models.ar_based.param_finder import find_lowest_pq
 15 | import pdb
 16 | 
 17 | def build_arima_model(ts_df, metric='aic', p_max=3, d_max=1, q_max=3,
 18 |                       forecast_period=2, method='mle', verbose=0):
 19 |     """
 20 |     This builds a Non Seasonal ARIMA model given a Univariate time series dataframe with time
 21 |     as the Index, ts_df can be a dataframe with one column only or a single array. Dont send
 22 |     Multiple Columns!!! Include only that variable that is a Time Series. DO NOT include
 23 |     Non-Stationary data. Make sure your Time Series is "Stationary"!! If not, this
 24 |     will give spurious results, since it automatically builds a Non-Seasonal model,
 25 |     you need not give it a Seasonal True/False flag.
 26 |     "metric": You can give it any of the following metrics as criteria: AIC, BIC, Deviance,
 27 |     Log-likelihood. Optionally, you can give it a fit method as one of the following:
 28 |     {'css-mle','mle','css'}
 29 |     """
 30 |     p_min = 0
 31 |     d_min = 0
 32 |     q_min = 0
 33 |     # Initialize a DataFrame to store the results
 34 |     iteration = 0
 35 |     results_dict = {}
 36 |     ################################################################################
 37 |     ####### YOU MUST Absolutely set this parameter correctly as "levels". If not,
 38 |     ####  YOU WILL GET DIFFERENCED PREDICTIONS WHICH ARE FIENDISHLY DIFFICULT TO UNDO.
 39 |     #### If you set this to levels, then you can do any order of differencing and
 40 |     ####  ARIMA will give you predictions in the same level as orignal values.
 41 |     ################################################################################
 42 |     pred_type = 'levels'
 43 |     #########################################################################
 44 |     ts_train = ts_df[:-forecast_period]
 45 |     ts_test = ts_df[-forecast_period:]
 46 |     if verbose == 1:
 47 |         print('Data Set split into train %s and test %s for Cross Validation Purposes'
 48 |               % (ts_train.shape, ts_test.shape))
 49 |     #########################################################################
 50 |     if ts_train.dtype == 'int64':
 51 |         ts_train = ts_train.astype(float)
 52 |     for d_val in range(d_min, d_max+1):
 53 |         print('\nDifferencing = %d' % d_val)
 54 |         results_bic = pd.DataFrame(index=['AR{}'.format(i) for i in range(p_min, p_max+1)],
 55 |                                    columns=['MA{}'.format(i) for i in range(q_min, q_max+1)])
 56 |         for p_val, q_val in itertools.product(range(p_min, p_max+1), range(q_min, q_max+1)):
 57 |             if p_val == 0 and d_val == 0 and q_val == 0:
 58 |                 results_bic.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = np.nan
 59 |                 continue
 60 |             else:
 61 |                 try:
 62 |                     model = ARIMA(ts_train, order=(p_val, d_val, q_val))
 63 |                     results = model.fit(transparams=False, method=method)
 64 |                     results_bic.loc['AR{}'.format(p_val), 'MA{}'.format(q_val)] = eval('results.' + metric)
 65 |                     if iteration % 10 == 0:
 66 |                         print(' Iteration %d completed...' % iteration)
 67 |                     iteration += 1
 68 |                     if iteration >= 100:
 69 |                         print('    Ending Iterations at %d' % iteration)
 70 |                         break
 71 |                 except:
 72 |                     iteration += 1
 73 |                     continue
 74 |         results_bic = results_bic[results_bic.columns].astype(float)
 75 |         interim_d = copy.deepcopy(d_val)
 76 |         interim_p, interim_q, interim_bic = find_lowest_pq(results_bic)
 77 |         if verbose == 1:
 78 |             fig, ax = plt.subplots(figsize=(20, 10))
 79 |             ax = sns.heatmap(results_bic,
 80 |                              mask=results_bic.isnull(),
 81 |                              ax=ax,
 82 |                              annot=True,
 83 |                              fmt='.0f')
 84 |             ax.set_title(metric)
 85 |         results_dict[str(interim_p) + ' ' + str(interim_d) + ' ' + str(interim_q)] = interim_bic
 86 |     best_bic = min(results_dict.items(), key=operator.itemgetter(1))[1]
 87 |     best_pdq = min(results_dict.items(), key=operator.itemgetter(1))[0]
 88 |     best_p = int(best_pdq.split(' ')[0])
 89 |     best_d = int(best_pdq.split(' ')[1])
 90 |     best_q = int(best_pdq.split(' ')[2])
 91 |     print('\nBest model is: Non Seasonal ARIMA(%d,%d,%d), %s = %0.3f' % (best_p, best_d, best_q,metric, best_bic))
 92 |     bestmodel = ARIMA(ts_train, order=(best_p, best_d, best_q))
 93 |     print('####    Fitting best model for full data set now. Will take time... ######')
 94 |     try:
 95 |         results = bestmodel.fit(transparams=True, method=method)
 96 |     except:
 97 |         results = bestmodel.fit(transparams=False, method=method)
 98 |     ### this is needed for static forecasts ####################
 99 |     y_truth = ts_train[:]
100 |     y_forecasted = results.predict(typ='levels')
101 |     concatenated = pd.concat([y_truth, y_forecasted], axis=1, keys=['original', 'predicted'])
102 |     if best_d == 0:
103 |         #### Do this for ARIMA only ######
104 |         ###  If there is no differencing DO NOT use predict_type since it will give an error = do not use "linear".
105 |         print('Static Forecasts:')
106 |         print_static_rmse(concatenated['original'].values, concatenated['predicted'].values, best_d)
107 |         start_date = ts_df.index[-forecast_period]
108 |         end_date = ts_df.index[-1]
109 |         pred_dynamic = results.predict(start=start_date, end=end_date, dynamic=True)
110 |         if verbose == 1:
111 |             ax = concatenated[['original', 'predicted']][best_d:].plot()
112 |             pred_dynamic.plot(label='Dynamic Forecast', ax=ax, figsize=(15, 5))
113 |             print('Dynamic %d-period Forecasts:' % (forecast_period,))
114 |             plt.legend()
115 |             plt.show()
116 |     else:
117 |         #### Do this for ARIMA only ######
118 |         ####  If there is differencing, you must use "levels" as the predict type to get original levels as actuals
119 |         pred_type = 'levels'
120 |         print('Static Forecasts:')
121 |         print_static_rmse(y_truth[best_d:], y_forecasted)
122 |         ########### Dynamic One Step Ahead Forecast ###########################
123 |         ### Dynamic Forecasts are a better representation of true predictive power
124 |         ## since they only use information from the time series up to a certain point,
125 |         ## and after that, forecasts are generated using values from previous forecasted
126 |         ## time points.
127 |         #################################################################################
128 |         start_date = ts_df.index[-forecast_period]
129 |         end_date = ts_df.index[-1]
130 |         pred_dynamic = results.predict(typ=pred_type, start=start_date, end=end_date, dynamic=True)
131 |         try:
132 |             pred_dynamic[pd.to_datetime((pred_dynamic.index-best_d).values[0])] = \
133 |                                      y_truth[pd.to_datetime((pred_dynamic.index-best_d).values[0])]
134 |         except:
135 |             print('Dynamic predictions erroring but continuing...')
136 |         pred_dynamic.sort_index(inplace=True)
137 |         print('\nDynamic %d-period Forecasts:' % forecast_period)
138 |         if verbose == 1:
139 |             ax = concatenated.plot()
140 |             pred_dynamic.plot(label='Dynamic Forecast', ax=ax, figsize=(15, 5))
141 |             ax.set_xlabel('Date')
142 |             ax.set_ylabel('Values')
143 |             plt.legend()
144 |             plt.show()
145 |     if verbose == 1:
146 |         try:
147 |             results.plot_diagnostics(figsize=(16, 12))
148 |         except:
149 |             pass
150 |     print(results.summary())
151 |     res_frame = pd.DataFrame([results.forecast(forecast_period)[0], results.forecast(forecast_period)[1],
152 |                                                results.forecast(forecast_period)[2]],
153 |                                                index=['mean','mean_se','mean_ci'],
154 |                                                columns=['Forecast_' + str(x) for x
155 |                                                in range(1, forecast_period+1)]).T
156 |     res_frame['mean_ci_lower'] = res_frame['mean_ci'].map(lambda x: x[0])
157 |     res_frame['mean_ci_upper'] = res_frame['mean_ci'].map(lambda x: x[1])
158 |     res_frame.drop('mean_ci', axis=1, inplace=True)
159 |     if verbose == 1:
160 |         print('Model Forecast(s):\n', res_frame)
161 |     rmse, norm_rmse = print_dynamic_rmse(ts_test, pred_dynamic, ts_train)
162 |     return bestmodel, res_frame, rmse, norm_rmse
163 | 
164 | 
165 | def predicted_diffs_restored_ARIMA(actuals, predicted, periods=1):
166 |     """
167 |     This utility is needed only we dont set typ="levels" in arima.fit() method.
168 |     Hence this utility caters only to ARIMA models in a few cases. Don't need it.
169 |     """
170 |     if periods == 0:
171 |         restored = predicted.copy()
172 |         restored.sort_index(inplace=True)
173 |         restored[0] = actuals[0]
174 |     else:
175 |         restored = actuals.copy()
176 |         restored.iloc[periods:] = predicted[periods:]
177 |         restored = restored[(periods-1):].cumsum()
178 |     res = pd.concat([actuals, predicted, restored], axis=1)
179 |     res.columns = ['original', 'pred_as_diffs', 'predicted']
180 |     print_static_rmse(res['original'].values, res['predicted'].values, periods-1)
181 |     return res[['original', 'predicted']]
182 | 


--------------------------------------------------------------------------------
/Auto_TS/auto_ts/utils/eda.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import matplotlib.dates as mdates
  4 | from itertools import cycle
  5 | import matplotlib.pyplot as plt
  6 | import seaborn as sns
  7 | get_ipython().magic('matplotlib inline')
  8 | sns.set(style="white", color_codes=True)
  9 | # TSA from Statsmodels
 10 | import statsmodels.tsa.api as smt
 11 | 
 12 | 
 13 | def time_series_plot(y, lags=31, title='Original Time Series', chart_type='line',
 14 |                      chart_time='years'):
 15 |     """
 16 |     Plot a Time Series along with how it will look after differencing and what its
 17 |     AR/MA lags will be by viewing the ACF and PACF, along with its histogram.
 18 |     You just need to provide the time series (y) as a Series. Index is assumed
 19 |     to be Pandas datetime. It assumes that you want to see default lags of 31.
 20 |     But you can modify it to suit.
 21 |     """
 22 |     colors = cycle('byrcmgkbyrcmgkbyrcmgkbyrcmgkbyr')
 23 |     fig = plt.figure(figsize=(20, 20))
 24 |     grid = plt.GridSpec(3, 2, wspace=0.5, hspace=0.5)
 25 |     fig.subplots_adjust(hspace=1)
 26 |     ########## Use the gridspec function ##############
 27 |     ts_ax = plt.subplot(grid[0, 0:])
 28 |     diff_ax = plt.subplot(grid[1, 0])
 29 |     hist_ax = plt.subplot(grid[1, 1])
 30 |     acf_ax = plt.subplot(grid[2, 0])
 31 |     pacf_ax = plt.subplot(grid[2, 1])
 32 |     ### Draw multiple kinds of graphs here to each subplot axis ###
 33 |     if chart_type == 'line':
 34 |         y.plot(ax=ts_ax, color=next(colors))
 35 |     else:
 36 |         if chart_time == 'years':
 37 |             majors = mdates.YearLocator()  # every year
 38 |             minors = mdates.MonthLocator()  # every month
 39 |             majorsFmt = mdates.DateFormatter('%Y')
 40 |         elif chart_time == 'months':
 41 |             majors = mdates.YearLocator()  # every year
 42 |             minors = mdates.MonthLocator()  # every month
 43 |             majorsFmt = mdates.DateFormatter('\n\n\n%b\n%Y')
 44 |         elif chart_time == 'weeks':
 45 |             majors = mdates.MonthLocator()
 46 |             minors = mdates.WeekdayLocator(byweekday=(1), interval=1)
 47 |             majorsFmt = mdates.DateFormatter('\n\n\n%b\n%Y')
 48 |         elif chart_time == 'days':
 49 |             majors = mdates.DayLocator(bymonthday=None, interval=1, tz=None)
 50 |             minors = mdates.HourLocator(byhour=None, interval=1, tz=None)
 51 |             majorsFmt = mdates.DateFormatter('\n\n\n%d\n%b')
 52 |         else:
 53 |             majors = mdates.YearLocator()  # every year
 54 |             minors = mdates.MonthLocator()  # every month
 55 |             majorsFmt = mdates.DateFormatter('\n\n\n%b\n%Y')
 56 |         try:
 57 |             #### this works in most cases but in some cases, it gives an error
 58 |             ts_ax.bar(y.index, height=y, width=20, color=list((y>0).astype(int).map({1:'g',0:'r'}).values))
 59 |         except:
 60 |             #### In some cases where y is a dataframe, this might work.
 61 |             yindex = y.index
 62 |             yvalues = y.values.ravel()
 63 |             ts_ax.bar(yindex, height=yvalues, width=20, color=list(using_where((yvalues>0).astype(int)).ravel()))
 64 |         ts_ax.xaxis.set_major_locator(majors)
 65 |         ts_ax.xaxis.set_major_formatter(majorsFmt)
 66 |         ts_ax.xaxis.set_minor_locator(minors)
 67 |         ts_ax.format_xdata = mdates.DateFormatter('%Y-%m-%d')
 68 |         ts_ax.grid(True)
 69 |     ts_ax.set_title(title)
 70 |     y.diff(1).plot(ax=diff_ax, color=next(colors))
 71 |     diff_ax.set_title('After Differencing = 1')
 72 |     y.plot(ax=hist_ax, kind='hist', bins=25, color=next(colors))
 73 |     hist_ax.set_title('Histogram for Original Series')
 74 |     try:
 75 |         smt.graphics.plot_acf(y, lags=lags, ax=acf_ax)
 76 |         acf_ax.set_title('ACF for Original Series')
 77 |     except:
 78 |         acf_ax.set_title('Data Error: Could not draw ACF for Original Series')
 79 |     try:
 80 |         smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax)
 81 |         pacf_ax.set_title('PACF for Original Series')
 82 |     except:
 83 |         pacf_ax.set_title('Data Error: Could not draw PACF for Original Series')
 84 |     [ax.set_xlim(0) for ax in [acf_ax, pacf_ax]]
 85 |     plt.show()
 86 | 
 87 | 
 88 | def using_where(x):
 89 |     return np.where(x == 1, 'g', 'r')
 90 | 
 91 | 
 92 | def top_correlation_to_name(stocks, column_name, searchstring, top=5):
 93 |     """
 94 |     ####################################################################################
 95 |     This function draws a correlation chart of the top "x" rows of a data frame that are highly
 96 |     correlated to a selected row in the dataframe. You can think of the rows of the input
 97 |     dataframe as containing stock prices or fund flows or product sales and the columns should
 98 |     contain time series data of prices or flows or sales over multiple time periods.
 99 |     Now this program will allow you to select the top 5 or 10 rows that are highly correlated
100 |     to a given row selected by the column: column_name and using a search string "searchstring".
101 |     The program will search for the search string in that column column_name and return a list
102 |     of 5 or 10 rows that are the most correlated to that selected row. If you give "top" as
103 |     a float ratio then it will use the ratio as the cut off point in the correlation
104 |     coefficient to select rows.
105 |     ####################################################################################
106 |     """
107 |     #### First increment top by 1 since you are asking for top X names in addition to the one you have, top += 1
108 |     incl = [x for x in list(stocks) if x not in column_name]
109 |     ### First drop all NA rows since they will mess up your correlations, stocks.dropna(inplace=True)
110 |     if stocks.empty:
111 |         print('After dropping NaNs, the data frame has become empty.')
112 |         return
113 |     ### Now find the highest correlated rows to the selected row ###
114 |     try:
115 |         index_val = search_string(stocks, column_name,searchstring).index[0]
116 |     except:
117 |         print('Not able to find the search string in the column.')
118 |         return
119 |     ### Bring that selected Row to the top of the Data Frame
120 |     df = stocks[:]
121 |     df["new"] = range(l, len(df)+l)
122 |     df.loc[index_val,"new"] = 0
123 |     stocks = df.sort_values("new").drop("new",axis=1)
124 |     stocks.reset_index(inplace=True,drop=True)
125 |     ##### Now calculate the correlation coefficients of other rows with the Top row
126 |     try:
127 |         cordf = pd.DataFrame(stocks[incl].T.corr().sort_values(0, ascending=False))
128 |     except:
129 |         print('Cannot calculate Correlations since Dataframe contains string values or objects.')
130 |         return
131 |     try:
132 |         cordf = stocks[column_name].join(cordf)
133 |     except:
134 |         cordf = pd.concat((stocks[column_name], cordf), axis=1)
135 |     #### Visualizing the top 5 or 10 or whatever cut-off they have given for Corr Coeff
136 |     if top >= 1:
137 |         top10index = cordf.sort_values(0, ascending=False).iloc[:top, :3].index
138 |         top10names = cordf.sort_values(0, ascending=False).iloc[:top, :3][column_name]
139 |         top10values = cordf.sort_values(0, ascending=False)[0].values[:top]
140 |     else:
141 |         top10index = cordf.sort_values(0, ascending=False)[
142 |                      cordf.sort_values(0, ascending=False)[0].values >= top].index
143 |         top10names = cordf.sort_values(0, ascending=False)[cordf.sort_values(
144 |                                        0, ascending=False)[0].values >= top][column_name]
145 |         top10alues = cordf.sort_values(0, ascending=False)[cordf.sort_values(
146 |                                        0, ascending=False)[0].values >= top][0]
147 |     print(top10names, top10values)
148 |     #### Now plot the top rows that are highly correlated based on condition above
149 |     stocksloc = stocks.iloc[top10index]
150 |     #### Visualizing using Matplotlib ###
151 |     stocksloc = stocksloc.T
152 |     stocksloc = stocksloc.reset_index(drop=True)
153 |     stocksloc.columns = stocksloc.iloc[0].values.tolist()
154 |     stocksloc.drop(0).plot(subplots=True, figsize=(15, 10), legend=False,
155 |                            title="Top %s Correlations to %s" % (top, searchstring))
156 |     [ax.legend(loc=1) for ax in plt.gcf().axes]
157 |     plt.tight_layout()
158 |     plt.show()
159 | 
160 | 
161 | def test_stationarity(timeseries, maxlag=2, regression='c', autolag=None,
162 |                       window=None, plot=False, verbose=False):
163 |     """
164 |     Check unit root stationarity of a time series array or an entire dataframe.
165 |     Note that you must send in a dataframe as df.values.ravel() - otherwise ERROR.
166 |     Null hypothesis: the series is non-stationary.
167 |     If p >= alpha, the series is non-stationary.
168 |     If p < alpha, reject the null hypothesis (has unit root stationarity).
169 |     Original source: http://www.analyticsvidhya.com/blog/2016/02/time-series-forecasting-codes-python/
170 |     Function: http://statsmodels.sourceforge.net/devel/generated/statsmodels.tsa.stattools.adfuller.html
171 |     window argument is only required for plotting rolling functions. Default=4.
172 |     """
173 |     # set defaults (from function page)
174 |     if type(timeseries) == pd.DataFrame:
175 |         print('modifying time series dataframe into an array to test')
176 |         timeseries = timeseries.values.ravel()
177 |     if regression is None:
178 |         regression = 'c'
179 |     if verbose:
180 |         print('Running Augmented Dickey-Fuller test with paramters:')
181 |         print('maxlag: {}'.format(maxlag))
182 |         print('regression: {}'.format(regression))
183 |         print('autolag: {}'.format(autolag))
184 |     alpha = 0.05
185 |     if plot:
186 |         if window is None:
187 |             window = 4
188 |         # Determing rolling statistics
189 |         rolmean = timeseries.rolling(window=window, center=False).mean()
190 |         rolstd = timeseries.rolling(window=window, center=False).std()
191 |         # Plot rolling statistics:
192 |         orig = plt.plot(timeseries, color='blue', label='Original')
193 |         mean = plt.plot(rolmean, color='red', label='Rolling Mean ({})'.format(window))
194 |         std = plt.plot(rolstd, color='black', label='Rolling Std ({})'.format(window))
195 |         plt.legend(loc='best')
196 |         plt.title('Rolling Mean & Standard Deviation')
197 |         plt.show(block=False)
198 |     # Perform Augmented Dickey-Fuller test:
199 |     try:
200 |         dftest = smt.adfuller(timeseries, maxlag=maxlag, regression=regression, autolag=autolag)
201 |         dfoutput = pd.Series(dftest[0:4], index=['Test Statistic',
202 |                                                  'p-value',
203 |                                                  '#Lags Used',
204 |                                                  'Number of Observations Used',
205 |                                                  ])
206 |         for key, value in dftest[4].items():
207 |             dfoutput['Critical Value (%s)' % key] = value
208 |         if verbose:
209 |             print('Results of Augmented Dickey-Fuller Test:')
210 |             print(dfoutput)
211 |         if dftest[1] >= alpha:
212 |             print(' this series is non-stationary')
213 |         else:
214 |             print(' this series is stationary')
215 |         return dfoutput
216 |     except:
217 |         print('Augment Dickey-Fuller test gives an error')
218 |         return
219 | 


--------------------------------------------------------------------------------
/AutoViz/AutoViz.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Untitled0.ipynb",
  7 |       "provenance": []
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     }
 13 |   },
 14 |   "cells": [
 15 |     {
 16 |       "cell_type": "markdown",
 17 |       "metadata": {
 18 |         "id": "9Jos5hZrX2vY",
 19 |         "colab_type": "text"
 20 |       },
 21 |       "source": [
 22 |         "# AutoViz:-\n",
 23 |         "AutoViz is a one-click visualization engine: It creates powerful charts that anyone from a beginner to an expert can use.\n",
 24 |         "\n",
 25 |         "AutoViz knows creating charts from any data manually is hard: It's even harder when you don't know what's in it. AutoViz starts by first analyzing your data to know if it is a Classification, Regression, Unsupervised or Time Series problem. It then chooses the best charts to maximize your insights...\n",
 26 |         "\n",
 27 |         "AutoViz can create charts from any flat file format: CSV, Excel or TXT. Just upload your data and AutoViz will send you the right charts that help you derive insights within minutes... It's that easy!\n",
 28 |         "\n",
 29 |         "Don't believe us? Try it for free. There is no limit to use... And we will never store your data nor sell your email to third party marketers. That's our Promise!"
 30 |       ]
 31 |     },
 32 |     {
 33 |       "cell_type": "markdown",
 34 |       "metadata": {
 35 |         "id": "OGPQF1R7fK_I",
 36 |         "colab_type": "text"
 37 |       },
 38 |       "source": [
 39 |         "AutoViz addresses many of the aforementioned challenges that can arise when performing data visualization work. The tool can be called using a single line of code by feeding it either a pandas dataframe object or a raw csv file to import.\n",
 40 |         "If the number of observations is large, AutoViz will take a random sample; likewise if the number of variables is large (which you can decide) AutoViz can find the most important features and plot impactful visualizations only using those automatically selected features. The user can set the sample number of rows and the maximum number of features to visualize by simply passing a parameter to AutoViz. AutoViz is capable of adapting to any number of different data contexts such as regression, classification, or even time-series data. It also delivers output incredibly quickly.\n",
 41 |         "\n",
 42 |         "AutoViz can be implemented in 4 simple steps:\n",
 43 |         "\n",
 44 |         "Install using \"pip install autoviz\"\n",
 45 |         "Import with “from autoviz.AutoViz_Class import AutoViz_Class”\n",
 46 |         "Instantiate a class “AV = AutoViz_Class()”\n",
 47 |         "Run an experiment in the following line with our data set"
 48 |       ]
 49 |     },
 50 |     {
 51 |       "cell_type": "markdown",
 52 |       "metadata": {
 53 |         "id": "0GQjN1kRYhrU",
 54 |         "colab_type": "text"
 55 |       },
 56 |       "source": [
 57 |         "## Install Libraies and importing important libraries"
 58 |       ]
 59 |     },
 60 |     {
 61 |       "cell_type": "code",
 62 |       "metadata": {
 63 |         "id": "PMwuRT2wYjQZ",
 64 |         "colab_type": "code",
 65 |         "colab": {
 66 |           "base_uri": "https://localhost:8080/",
 67 |           "height": 972
 68 |         },
 69 |         "outputId": "98a80171-66ae-4a75-d487-db6bd9c2cac9"
 70 |       },
 71 |       "source": [
 72 |         "!pip install autoviml\n",
 73 |         "!pip install shap\n",
 74 |         "!pip install autoviz\n"
 75 |       ],
 76 |       "execution_count": 7,
 77 |       "outputs": [
 78 |         {
 79 |           "output_type": "stream",
 80 |           "text": [
 81 |             "Collecting autoviz\n",
 82 |             "  Downloading https://files.pythonhosted.org/packages/06/2d/8301752598960dbf92e73c01779cc5d56e2ef6d47cb8d23ff139c0144c93/autoviz-0.0.68-py3-none-any.whl\n",
 83 |             "Requirement already satisfied: jupyter in /usr/local/lib/python3.6/dist-packages (from autoviz) (1.0.0)\n",
 84 |             "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from autoviz) (3.2.1)\n",
 85 |             "Requirement already satisfied: ipython in /usr/local/lib/python3.6/dist-packages (from autoviz) (5.5.0)\n",
 86 |             "Requirement already satisfied: seaborn in /usr/local/lib/python3.6/dist-packages (from autoviz) (0.10.1)\n",
 87 |             "Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from autoviz) (1.0.3)\n",
 88 |             "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from autoviz) (0.22.2.post1)\n",
 89 |             "Requirement already satisfied: ipywidgets in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (7.5.1)\n",
 90 |             "Requirement already satisfied: qtconsole in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (4.7.4)\n",
 91 |             "Requirement already satisfied: notebook in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (5.2.2)\n",
 92 |             "Requirement already satisfied: nbconvert in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (5.6.1)\n",
 93 |             "Requirement already satisfied: ipykernel in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (4.10.1)\n",
 94 |             "Requirement already satisfied: jupyter-console in /usr/local/lib/python3.6/dist-packages (from jupyter->autoviz) (5.2.0)\n",
 95 |             "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->autoviz) (2.4.7)\n",
 96 |             "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->autoviz) (1.2.0)\n",
 97 |             "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->autoviz) (0.10.0)\n",
 98 |             "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.6/dist-packages (from matplotlib->autoviz) (1.18.4)\n",
 99 |             "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->autoviz) (2.8.1)\n",
100 |             "Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (4.4.2)\n",
101 |             "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (46.4.0)\n",
102 |             "Requirement already satisfied: pexpect; sys_platform != \"win32\" in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (4.8.0)\n",
103 |             "Requirement already satisfied: pygments in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (2.1.3)\n",
104 |             "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (4.3.3)\n",
105 |             "Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (1.0.18)\n",
106 |             "Requirement already satisfied: pickleshare in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (0.7.5)\n",
107 |             "Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.6/dist-packages (from ipython->autoviz) (0.8.1)\n",
108 |             "Requirement already satisfied: scipy>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from seaborn->autoviz) (1.4.1)\n",
109 |             "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->autoviz) (2018.9)\n",
110 |             "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->autoviz) (0.15.1)\n",
111 |             "Requirement already satisfied: widgetsnbextension~=3.5.0 in /usr/local/lib/python3.6/dist-packages (from ipywidgets->jupyter->autoviz) (3.5.1)\n",
112 |             "Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.6/dist-packages (from ipywidgets->jupyter->autoviz) (5.0.6)\n",
113 |             "Requirement already satisfied: jupyter-client>=4.1 in /usr/local/lib/python3.6/dist-packages (from qtconsole->jupyter->autoviz) (5.3.4)\n",
114 |             "Requirement already satisfied: qtpy in /usr/local/lib/python3.6/dist-packages (from qtconsole->jupyter->autoviz) (1.9.0)\n",
115 |             "Requirement already satisfied: pyzmq>=17.1 in /usr/local/lib/python3.6/dist-packages (from qtconsole->jupyter->autoviz) (19.0.1)\n",
116 |             "Requirement already satisfied: jupyter-core in /usr/local/lib/python3.6/dist-packages (from qtconsole->jupyter->autoviz) (4.6.3)\n",
117 |             "Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.6/dist-packages (from qtconsole->jupyter->autoviz) (0.2.0)\n",
118 |             "Requirement already satisfied: jinja2 in /usr/local/lib/python3.6/dist-packages (from notebook->jupyter->autoviz) (2.11.2)\n",
119 |             "Requirement already satisfied: tornado>=4 in /usr/local/lib/python3.6/dist-packages (from notebook->jupyter->autoviz) (4.5.3)\n",
120 |             "Requirement already satisfied: terminado>=0.3.3; sys_platform != \"win32\" in /usr/local/lib/python3.6/dist-packages (from notebook->jupyter->autoviz) (0.8.3)\n",
121 |             "Requirement already satisfied: testpath in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (0.4.4)\n",
122 |             "Requirement already satisfied: bleach in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (3.1.5)\n",
123 |             "Requirement already satisfied: defusedxml in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (0.6.0)\n",
124 |             "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (1.4.2)\n",
125 |             "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (0.3)\n",
126 |             "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.6/dist-packages (from nbconvert->jupyter->autoviz) (0.8.4)\n",
127 |             "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from cycler>=0.10->matplotlib->autoviz) (1.12.0)\n",
128 |             "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.6/dist-packages (from pexpect; sys_platform != \"win32\"->ipython->autoviz) (0.6.0)\n",
129 |             "Requirement already satisfied: wcwidth in /usr/local/lib/python3.6/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython->autoviz) (0.1.9)\n",
130 |             "Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /usr/local/lib/python3.6/dist-packages (from nbformat>=4.2.0->ipywidgets->jupyter->autoviz) (2.6.0)\n",
131 |             "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.6/dist-packages (from jinja2->notebook->jupyter->autoviz) (1.1.1)\n",
132 |             "Requirement already satisfied: webencodings in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert->jupyter->autoviz) (0.5.1)\n",
133 |             "Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from bleach->nbconvert->jupyter->autoviz) (20.4)\n",
134 |             "Installing collected packages: autoviz\n",
135 |             "Successfully installed autoviz-0.0.68\n"
136 |           ],
137 |           "name": "stdout"
138 |         }
139 |       ]
140 |     },
141 |     {
142 |       "cell_type": "code",
143 |       "metadata": {
144 |         "id": "IMdxDkhjXoWC",
145 |         "colab_type": "code",
146 |         "colab": {}
147 |       },
148 |       "source": [
149 |         "from __future__ import print_function\n",
150 |         "import sys,tempfile,urllib,os\n",
151 |         "import pandas as pd\n",
152 |         "import numpy as np\n"
153 |       ],
154 |       "execution_count": 0,
155 |       "outputs": []
156 |     },
157 |     {
158 |       "cell_type": "markdown",
159 |       "metadata": {
160 |         "id": "nVgNy-D4ZkpA",
161 |         "colab_type": "text"
162 |       },
163 |       "source": [
164 |         "## Load Data"
165 |       ]
166 |     },
167 |     {
168 |       "cell_type": "code",
169 |       "metadata": {
170 |         "id": "ioNPMOZwYr0s",
171 |         "colab_type": "code",
172 |         "colab": {
173 |           "base_uri": "https://localhost:8080/",
174 |           "height": 204
175 |         },
176 |         "outputId": "661a1a14-2471-4f3d-dc42-412459a2d4a8"
177 |       },
178 |       "source": [
179 |         "data = pd.read_csv('diabetes.csv')\n",
180 |         "data.head()"
181 |       ],
182 |       "execution_count": 63,
183 |       "outputs": [
184 |         {
185 |           "output_type": "execute_result",
186 |           "data": {
187 |             "text/html": [
188 |               "<div>\n",
189 |               "<style scoped>\n",
190 |               "    .dataframe tbody tr th:only-of-type {\n",
191 |               "        vertical-align: middle;\n",
192 |               "    }\n",
193 |               "\n",
194 |               "    .dataframe tbody tr th {\n",
195 |               "        vertical-align: top;\n",
196 |               "    }\n",
197 |               "\n",
198 |               "    .dataframe thead th {\n",
199 |               "        text-align: right;\n",
200 |               "    }\n",
201 |               "</style>\n",
202 |               "<table border=\"1\" class=\"dataframe\">\n",
203 |               "  <thead>\n",
204 |               "    <tr style=\"text-align: right;\">\n",
205 |               "      <th></th>\n",
206 |               "      <th>Pregnancies</th>\n",
207 |               "      <th>Glucose</th>\n",
208 |               "      <th>BloodPressure</th>\n",
209 |               "      <th>SkinThickness</th>\n",
210 |               "      <th>Insulin</th>\n",
211 |               "      <th>BMI</th>\n",
212 |               "      <th>DiabetesPedigreeFunction</th>\n",
213 |               "      <th>Age</th>\n",
214 |               "      <th>Outcome</th>\n",
215 |               "    </tr>\n",
216 |               "  </thead>\n",
217 |               "  <tbody>\n",
218 |               "    <tr>\n",
219 |               "      <th>0</th>\n",
220 |               "      <td>6</td>\n",
221 |               "      <td>148</td>\n",
222 |               "      <td>72</td>\n",
223 |               "      <td>35</td>\n",
224 |               "      <td>0</td>\n",
225 |               "      <td>33.6</td>\n",
226 |               "      <td>0.627</td>\n",
227 |               "      <td>50</td>\n",
228 |               "      <td>1</td>\n",
229 |               "    </tr>\n",
230 |               "    <tr>\n",
231 |               "      <th>1</th>\n",
232 |               "      <td>1</td>\n",
233 |               "      <td>85</td>\n",
234 |               "      <td>66</td>\n",
235 |               "      <td>29</td>\n",
236 |               "      <td>0</td>\n",
237 |               "      <td>26.6</td>\n",
238 |               "      <td>0.351</td>\n",
239 |               "      <td>31</td>\n",
240 |               "      <td>0</td>\n",
241 |               "    </tr>\n",
242 |               "    <tr>\n",
243 |               "      <th>2</th>\n",
244 |               "      <td>8</td>\n",
245 |               "      <td>183</td>\n",
246 |               "      <td>64</td>\n",
247 |               "      <td>0</td>\n",
248 |               "      <td>0</td>\n",
249 |               "      <td>23.3</td>\n",
250 |               "      <td>0.672</td>\n",
251 |               "      <td>32</td>\n",
252 |               "      <td>1</td>\n",
253 |               "    </tr>\n",
254 |               "    <tr>\n",
255 |               "      <th>3</th>\n",
256 |               "      <td>1</td>\n",
257 |               "      <td>89</td>\n",
258 |               "      <td>66</td>\n",
259 |               "      <td>23</td>\n",
260 |               "      <td>94</td>\n",
261 |               "      <td>28.1</td>\n",
262 |               "      <td>0.167</td>\n",
263 |               "      <td>21</td>\n",
264 |               "      <td>0</td>\n",
265 |               "    </tr>\n",
266 |               "    <tr>\n",
267 |               "      <th>4</th>\n",
268 |               "      <td>0</td>\n",
269 |               "      <td>137</td>\n",
270 |               "      <td>40</td>\n",
271 |               "      <td>35</td>\n",
272 |               "      <td>168</td>\n",
273 |               "      <td>43.1</td>\n",
274 |               "      <td>2.288</td>\n",
275 |               "      <td>33</td>\n",
276 |               "      <td>1</td>\n",
277 |               "    </tr>\n",
278 |               "  </tbody>\n",
279 |               "</table>\n",
280 |               "</div>"
281 |             ],
282 |             "text/plain": [
283 |               "   Pregnancies  Glucose  BloodPressure  ...  DiabetesPedigreeFunction  Age  Outcome\n",
284 |               "0            6      148             72  ...                     0.627   50        1\n",
285 |               "1            1       85             66  ...                     0.351   31        0\n",
286 |               "2            8      183             64  ...                     0.672   32        1\n",
287 |               "3            1       89             66  ...                     0.167   21        0\n",
288 |               "4            0      137             40  ...                     2.288   33        1\n",
289 |               "\n",
290 |               "[5 rows x 9 columns]"
291 |             ]
292 |           },
293 |           "metadata": {
294 |             "tags": []
295 |           },
296 |           "execution_count": 63
297 |         }
298 |       ]
299 |     },
300 |     {
301 |       "cell_type": "markdown",
302 |       "metadata": {
303 |         "id": "cib6qKMAZomi",
304 |         "colab_type": "text"
305 |       },
306 |       "source": [
307 |         "## Checking missing values for more robust visualization"
308 |       ]
309 |     },
310 |     {
311 |       "cell_type": "code",
312 |       "metadata": {
313 |         "id": "j6Ob3X9ZYsIh",
314 |         "colab_type": "code",
315 |         "colab": {
316 |           "base_uri": "https://localhost:8080/",
317 |           "height": 187
318 |         },
319 |         "outputId": "49cb2f73-f66f-4f65-939e-87ef8801b55f"
320 |       },
321 |       "source": [
322 |         "data.isna().sum()"
323 |       ],
324 |       "execution_count": 64,
325 |       "outputs": [
326 |         {
327 |           "output_type": "execute_result",
328 |           "data": {
329 |             "text/plain": [
330 |               "Pregnancies                 0\n",
331 |               "Glucose                     0\n",
332 |               "BloodPressure               0\n",
333 |               "SkinThickness               0\n",
334 |               "Insulin                     0\n",
335 |               "BMI                         0\n",
336 |               "DiabetesPedigreeFunction    0\n",
337 |               "Age                         0\n",
338 |               "Outcome                     0\n",
339 |               "dtype: int64"
340 |             ]
341 |           },
342 |           "metadata": {
343 |             "tags": []
344 |           },
345 |           "execution_count": 64
346 |         }
347 |       ]
348 |     },
349 |     {
350 |       "cell_type": "markdown",
351 |       "metadata": {
352 |         "id": "4kA6w-h5bYSm",
353 |         "colab_type": "text"
354 |       },
355 |       "source": [
356 |         "### Import Autoviz Library and make as object after data preprocessing"
357 |       ]
358 |     },
359 |     {
360 |       "cell_type": "code",
361 |       "metadata": {
362 |         "id": "t_QahUqkaYVj",
363 |         "colab_type": "code",
364 |         "colab": {}
365 |       },
366 |       "source": [
367 |         "from autoviz.AutoViz_Class import AutoViz_Class\n",
368 |         "\n",
369 |         "Av = AutoViz_Class()"
370 |       ],
371 |       "execution_count": 0,
372 |       "outputs": []
373 |     },
374 |     {
375 |       "cell_type": "code",
376 |       "metadata": {
377 |         "id": "vkx81yI7a_Fc",
378 |         "colab_type": "code",
379 |         "colab": {
380 |           "base_uri": "https://localhost:8080/",
381 |           "height": 119
382 |         },
383 |         "outputId": "0cf84419-1e45-45a9-b93e-881a3afe9a3b"
384 |       },
385 |       "source": [
386 |         "dft= Av.AutoViz('data.csv', sep=',', depVar='Outcome', dfte=None, header=0, verbose=0, lowess=False, chart_format='svg', max_rows_analyzed=7500, max_cols_analyzed=50)"
387 |       ],
388 |       "execution_count": 68,
389 |       "outputs": [
390 |         {
391 |           "output_type": "stream",
392 |           "text": [
393 |             "File encoding decoder utf-8 does not work for this file\n",
394 |             "File encoding decoder iso-8859-11 does not work for this file\n",
395 |             "File encoding decoder cpl252 does not work for this file\n",
396 |             "File encoding decoder latin1 does not work for this file\n",
397 |             "None of the decoders work...\n",
398 |             "Not able to read or load file. Please check your inputs and try again...\n"
399 |           ],
400 |           "name": "stdout"
401 |         }
402 |       ]
403 |     },
404 |     {
405 |       "cell_type": "code",
406 |       "metadata": {
407 |         "id": "kZ5ivEJbfVju",
408 |         "colab_type": "code",
409 |         "colab": {}
410 |       },
411 |       "source": [
412 |         ""
413 |       ],
414 |       "execution_count": 0,
415 |       "outputs": []
416 |     }
417 |   ]
418 | }


--------------------------------------------------------------------------------
/AutoGluon/AutoGluon.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "AutoGluon.ipynb",
  7 |       "provenance": [],
  8 |       "toc_visible": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     }
 14 |   },
 15 |   "cells": [
 16 |     {
 17 |       "cell_type": "markdown",
 18 |       "metadata": {
 19 |         "id": "RK9pKgXjlCqD",
 20 |         "colab_type": "text"
 21 |       },
 22 |       "source": [
 23 |         "# AutoGluon: AutoML Toolkit for Deep Learning¶\n",
 24 |         "AutoGluon enables easy-to-use and easy-to-extend AutoML with a focus on deep learning and real-world applications spanning image, text, or tabular data. Intended for both ML beginners and experts, AutoGluon enables you to:\n",
 25 |         "\n",
 26 |         "Quickly prototype deep learning solutions for your data with few lines of code.\n",
 27 |         "\n",
 28 |         "Leverage automatic hyperparameter tuning, model selection / architecture search, and data processing.\n",
 29 |         "\n",
 30 |         "Automatically utilize state-of-the-art deep learning techniques without expert knowledge.\n",
 31 |         "\n",
 32 |         "Easily improve existing bespoke models and data pipelines, or customize AutoGluon for your use-case."
 33 |       ]
 34 |     },
 35 |     {
 36 |       "cell_type": "markdown",
 37 |       "metadata": {
 38 |         "id": "Dzp7nl6Rlbuw",
 39 |         "colab_type": "text"
 40 |       },
 41 |       "source": [
 42 |         "## Install Libraries"
 43 |       ]
 44 |     },
 45 |     {
 46 |       "cell_type": "code",
 47 |       "metadata": {
 48 |         "id": "fRD9UM82lBTy",
 49 |         "colab_type": "code",
 50 |         "colab": {
 51 |           "base_uri": "https://localhost:8080/",
 52 |           "height": 1000
 53 |         },
 54 |         "outputId": "2641aed7-f100-46d3-ee7d-0608c02737a0"
 55 |       },
 56 |       "source": [
 57 |         "!pip install --upgrade mxnet\n",
 58 |         "!pip install autogluon"
 59 |       ],
 60 |       "execution_count": 8,
 61 |       "outputs": [
 62 |         {
 63 |           "output_type": "stream",
 64 |           "text": [
 65 |             "Requirement already up-to-date: mxnet in /usr/local/lib/python3.6/dist-packages (1.6.0)\n",
 66 |             "Requirement already satisfied, skipping upgrade: graphviz<0.9.0,>=0.8.1 in /usr/local/lib/python3.6/dist-packages (from mxnet) (0.8.4)\n",
 67 |             "Requirement already satisfied, skipping upgrade: requests<3,>=2.20.0 in /usr/local/lib/python3.6/dist-packages (from mxnet) (2.23.0)\n",
 68 |             "Requirement already satisfied, skipping upgrade: numpy<2.0.0,>1.16.0 in /usr/local/lib/python3.6/dist-packages (from mxnet) (1.18.4)\n",
 69 |             "Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.20.0->mxnet) (3.0.4)\n",
 70 |             "Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.20.0->mxnet) (1.24.3)\n",
 71 |             "Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.20.0->mxnet) (2020.4.5.1)\n",
 72 |             "Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.20.0->mxnet) (2.9)\n",
 73 |             "Requirement already satisfied: autogluon in /usr/local/lib/python3.6/dist-packages (0.0.9)\n",
 74 |             "Requirement already satisfied: pytest in /usr/local/lib/python3.6/dist-packages (from autogluon) (3.6.4)\n",
 75 |             "Requirement already satisfied: ConfigSpace<=0.4.10 in /usr/local/lib/python3.6/dist-packages (from autogluon) (0.4.10)\n",
 76 |             "Requirement already satisfied: scikit-learn<0.23,>=0.20.0 in /usr/local/lib/python3.6/dist-packages (from autogluon) (0.22.2.post1)\n",
 77 |             "Requirement already satisfied: networkx<3.0,>=2.3 in /usr/local/lib/python3.6/dist-packages (from autogluon) (2.4)\n",
 78 |             "Requirement already satisfied: Pillow<=6.2.1 in /usr/local/lib/python3.6/dist-packages (from autogluon) (6.2.1)\n",
 79 |             "Requirement already satisfied: graphviz in /usr/local/lib/python3.6/dist-packages (from autogluon) (0.8.4)\n",
 80 |             "Requirement already satisfied: distributed>=2.6.0 in /usr/local/lib/python3.6/dist-packages (from autogluon) (2.17.0)\n",
 81 |             "Requirement already satisfied: dask>=2.6.0 in /usr/local/lib/python3.6/dist-packages (from autogluon) (2.12.0)\n",
 82 |             "Requirement already satisfied: catboost<0.24 in /usr/local/lib/python3.6/dist-packages (from autogluon) (0.23.2)\n",
 83 |             "Requirement already satisfied: boto3 in /usr/local/lib/python3.6/dist-packages (from autogluon) (1.13.13)\n",
 84 |             "Requirement already satisfied: numpy>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from autogluon) (1.18.4)\n",
 85 |             "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from autogluon) (3.2.1)\n",
 86 |             "Requirement already satisfied: paramiko>=2.4 in /usr/local/lib/python3.6/dist-packages (from autogluon) (2.7.1)\n",
 87 |             "Requirement already satisfied: lightgbm<3.0,>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from autogluon) (2.3.1)\n",
 88 |             "Requirement already satisfied: cython in /usr/local/lib/python3.6/dist-packages (from autogluon) (0.29.18)\n",
 89 |             "Requirement already satisfied: gluoncv>=0.5.0 in /usr/local/lib/python3.6/dist-packages (from autogluon) (0.7.0)\n",
 90 |             "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from autogluon) (2.23.0)\n",
 91 |             "Requirement already satisfied: scikit-optimize in /usr/local/lib/python3.6/dist-packages (from autogluon) (0.7.4)\n",
 92 |             "Requirement already satisfied: tornado>=5.0.1 in /usr/local/lib/python3.6/dist-packages (from autogluon) (6.0.4)\n",
 93 |             "Requirement already satisfied: cryptography>=2.8 in /usr/local/lib/python3.6/dist-packages (from autogluon) (2.9.2)\n",
 94 |             "Requirement already satisfied: pandas<1.0,>=0.24.0 in /usr/local/lib/python3.6/dist-packages (from autogluon) (0.25.3)\n",
 95 |             "Requirement already satisfied: tqdm>=4.38.0 in /usr/local/lib/python3.6/dist-packages (from autogluon) (4.41.1)\n",
 96 |             "Requirement already satisfied: gluonnlp==0.8.1 in /usr/local/lib/python3.6/dist-packages (from autogluon) (0.8.1)\n",
 97 |             "Requirement already satisfied: scipy>=1.3.3 in /usr/local/lib/python3.6/dist-packages (from autogluon) (1.4.1)\n",
 98 |             "Requirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.6/dist-packages (from autogluon) (5.4.8)\n",
 99 |             "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.6/dist-packages (from pytest->autogluon) (19.3.0)\n",
100 |             "Requirement already satisfied: more-itertools>=4.0.0 in /usr/local/lib/python3.6/dist-packages (from pytest->autogluon) (8.3.0)\n",
101 |             "Requirement already satisfied: atomicwrites>=1.0 in /usr/local/lib/python3.6/dist-packages (from pytest->autogluon) (1.4.0)\n",
102 |             "Requirement already satisfied: pluggy<0.8,>=0.5 in /usr/local/lib/python3.6/dist-packages (from pytest->autogluon) (0.7.1)\n",
103 |             "Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from pytest->autogluon) (1.12.0)\n",
104 |             "Requirement already satisfied: py>=1.5.0 in /usr/local/lib/python3.6/dist-packages (from pytest->autogluon) (1.8.1)\n",
105 |             "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from pytest->autogluon) (46.4.0)\n",
106 |             "Requirement already satisfied: typing in /usr/local/lib/python3.6/dist-packages (from ConfigSpace<=0.4.10->autogluon) (3.6.6)\n",
107 |             "Requirement already satisfied: pyparsing in /usr/local/lib/python3.6/dist-packages (from ConfigSpace<=0.4.10->autogluon) (2.4.7)\n",
108 |             "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn<0.23,>=0.20.0->autogluon) (0.15.1)\n",
109 |             "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.6/dist-packages (from networkx<3.0,>=2.3->autogluon) (4.4.2)\n",
110 |             "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.6/dist-packages (from distributed>=2.6.0->autogluon) (2.1.0)\n",
111 |             "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.6/dist-packages (from distributed>=2.6.0->autogluon) (0.10.0)\n",
112 |             "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from distributed>=2.6.0->autogluon) (2.0.0)\n",
113 |             "Requirement already satisfied: cloudpickle>=1.3.0 in /usr/local/lib/python3.6/dist-packages (from distributed>=2.6.0->autogluon) (1.3.0)\n",
114 |             "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from distributed>=2.6.0->autogluon) (1.6.0)\n",
115 |             "Requirement already satisfied: contextvars; python_version < \"3.7\" in /usr/local/lib/python3.6/dist-packages (from distributed>=2.6.0->autogluon) (2.4)\n",
116 |             "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from distributed>=2.6.0->autogluon) (1.0.0)\n",
117 |             "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from distributed>=2.6.0->autogluon) (3.13)\n",
118 |             "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.6/dist-packages (from distributed>=2.6.0->autogluon) (7.1.2)\n",
119 |             "Requirement already satisfied: plotly in /usr/local/lib/python3.6/dist-packages (from catboost<0.24->autogluon) (4.4.1)\n",
120 |             "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /usr/local/lib/python3.6/dist-packages (from boto3->autogluon) (0.10.0)\n",
121 |             "Requirement already satisfied: botocore<1.17.0,>=1.16.13 in /usr/local/lib/python3.6/dist-packages (from boto3->autogluon) (1.16.13)\n",
122 |             "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /usr/local/lib/python3.6/dist-packages (from boto3->autogluon) (0.3.3)\n",
123 |             "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->autogluon) (2.8.1)\n",
124 |             "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->autogluon) (0.10.0)\n",
125 |             "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->autogluon) (1.2.0)\n",
126 |             "Requirement already satisfied: pynacl>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from paramiko>=2.4->autogluon) (1.4.0)\n",
127 |             "Requirement already satisfied: bcrypt>=3.1.3 in /usr/local/lib/python3.6/dist-packages (from paramiko>=2.4->autogluon) (3.1.7)\n",
128 |             "Requirement already satisfied: portalocker in /usr/local/lib/python3.6/dist-packages (from gluoncv>=0.5.0->autogluon) (1.7.0)\n",
129 |             "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->autogluon) (3.0.4)\n",
130 |             "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->autogluon) (2.9)\n",
131 |             "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->autogluon) (2020.4.5.1)\n",
132 |             "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->autogluon) (1.24.3)\n",
133 |             "Requirement already satisfied: pyaml>=16.9 in /usr/local/lib/python3.6/dist-packages (from scikit-optimize->autogluon) (20.4.0)\n",
134 |             "Requirement already satisfied: cffi!=1.11.3,>=1.8 in /usr/local/lib/python3.6/dist-packages (from cryptography>=2.8->autogluon) (1.14.0)\n",
135 |             "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas<1.0,>=0.24.0->autogluon) (2018.9)\n",
136 |             "Requirement already satisfied: heapdict in /usr/local/lib/python3.6/dist-packages (from zict>=0.1.3->distributed>=2.6.0->autogluon) (1.0.1)\n",
137 |             "Requirement already satisfied: immutables>=0.9 in /usr/local/lib/python3.6/dist-packages (from contextvars; python_version < \"3.7\"->distributed>=2.6.0->autogluon) (0.14)\n",
138 |             "Requirement already satisfied: retrying>=1.3.3 in /usr/local/lib/python3.6/dist-packages (from plotly->catboost<0.24->autogluon) (1.3.3)\n",
139 |             "Requirement already satisfied: docutils<0.16,>=0.10 in /usr/local/lib/python3.6/dist-packages (from botocore<1.17.0,>=1.16.13->boto3->autogluon) (0.15.2)\n",
140 |             "Requirement already satisfied: pycparser in /usr/local/lib/python3.6/dist-packages (from cffi!=1.11.3,>=1.8->cryptography>=2.8->autogluon) (2.20)\n"
141 |           ],
142 |           "name": "stdout"
143 |         }
144 |       ]
145 |     },
146 |     {
147 |       "cell_type": "markdown",
148 |       "metadata": {
149 |         "id": "52KQA5nwlh_9",
150 |         "colab_type": "text"
151 |       },
152 |       "source": [
153 |         "## Import Libraries"
154 |       ]
155 |     },
156 |     {
157 |       "cell_type": "code",
158 |       "metadata": {
159 |         "id": "BTkhMtZnlLAe",
160 |         "colab_type": "code",
161 |         "colab": {}
162 |       },
163 |       "source": [
164 |         "import pandas as pd\n",
165 |         "import numpy as np\n",
166 |         "import autogluon as ag"
167 |       ],
168 |       "execution_count": 0,
169 |       "outputs": []
170 |     },
171 |     {
172 |       "cell_type": "code",
173 |       "metadata": {
174 |         "id": "wNvddlGQlXn9",
175 |         "colab_type": "code",
176 |         "colab": {}
177 |       },
178 |       "source": [
179 |         "from autogluon import TabularPrediction as task"
180 |       ],
181 |       "execution_count": 0,
182 |       "outputs": []
183 |     },
184 |     {
185 |       "cell_type": "markdown",
186 |       "metadata": {
187 |         "id": "NiATln3hrUMw",
188 |         "colab_type": "text"
189 |       },
190 |       "source": [
191 |         "## Load data"
192 |       ]
193 |     },
194 |     {
195 |       "cell_type": "code",
196 |       "metadata": {
197 |         "id": "X-co46I2nGOC",
198 |         "colab_type": "code",
199 |         "colab": {}
200 |       },
201 |       "source": [
202 |         "data =pd.read_csv('Patient_data.csv')"
203 |       ],
204 |       "execution_count": 0,
205 |       "outputs": []
206 |     },
207 |     {
208 |       "cell_type": "code",
209 |       "metadata": {
210 |         "id": "HpqidXYmnUJU",
211 |         "colab_type": "code",
212 |         "colab": {}
213 |       },
214 |       "source": [
215 |         "data"
216 |       ],
217 |       "execution_count": 0,
218 |       "outputs": []
219 |     },
220 |     {
221 |       "cell_type": "markdown",
222 |       "metadata": {
223 |         "id": "sh6fm_-2rWWg",
224 |         "colab_type": "text"
225 |       },
226 |       "source": [
227 |         "## Train_Test Split"
228 |       ]
229 |     },
230 |     {
231 |       "cell_type": "code",
232 |       "metadata": {
233 |         "id": "i3RqnifxnZlh",
234 |         "colab_type": "code",
235 |         "colab": {}
236 |       },
237 |       "source": [
238 |         "size = int(0.7*data.shape[0])\n",
239 |         "train_df = data[:size]\n",
240 |         "test_df = data[size:]"
241 |       ],
242 |       "execution_count": 0,
243 |       "outputs": []
244 |     },
245 |     {
246 |       "cell_type": "code",
247 |       "metadata": {
248 |         "id": "T-uCrjtRoKjc",
249 |         "colab_type": "code",
250 |         "colab": {
251 |           "base_uri": "https://localhost:8080/",
252 |           "height": 34
253 |         },
254 |         "outputId": "bd2bd4f5-c7f8-4960-b7c0-1ce03cdc6fb5"
255 |       },
256 |       "source": [
257 |         "test_df.shape"
258 |       ],
259 |       "execution_count": 19,
260 |       "outputs": [
261 |         {
262 |           "output_type": "execute_result",
263 |           "data": {
264 |             "text/plain": [
265 |               "(93, 7)"
266 |             ]
267 |           },
268 |           "metadata": {
269 |             "tags": []
270 |           },
271 |           "execution_count": 19
272 |         }
273 |       ]
274 |     },
275 |     {
276 |       "cell_type": "code",
277 |       "metadata": {
278 |         "id": "Mri8cwMdoTZ8",
279 |         "colab_type": "code",
280 |         "colab": {}
281 |       },
282 |       "source": [
283 |         "train_data=task.Dataset(df=train_df)\n",
284 |         "test_data=task.Dataset(df=test_df)"
285 |       ],
286 |       "execution_count": 0,
287 |       "outputs": []
288 |     },
289 |     {
290 |       "cell_type": "code",
291 |       "metadata": {
292 |         "id": "QPX2_E_CokRb",
293 |         "colab_type": "code",
294 |         "colab": {}
295 |       },
296 |       "source": [
297 |         "label_column= 'class'\n",
298 |         "train_data[label_column].describe()"
299 |       ],
300 |       "execution_count": 0,
301 |       "outputs": []
302 |     },
303 |     {
304 |       "cell_type": "markdown",
305 |       "metadata": {
306 |         "id": "cNRCFy3trfFt",
307 |         "colab_type": "text"
308 |       },
309 |       "source": [
310 |         "## Build Model"
311 |       ]
312 |     },
313 |     {
314 |       "cell_type": "code",
315 |       "metadata": {
316 |         "id": "bdDw7CZUo7QL",
317 |         "colab_type": "code",
318 |         "colab": {}
319 |       },
320 |       "source": [
321 |         "pred= task.fit(train_data=train_data,label=label_column, eval_metric ='accuracy')"
322 |       ],
323 |       "execution_count": 0,
324 |       "outputs": []
325 |     },
326 |     {
327 |       "cell_type": "code",
328 |       "metadata": {
329 |         "id": "3bOIHHL9pLOF",
330 |         "colab_type": "code",
331 |         "colab": {}
332 |       },
333 |       "source": [
334 |         "y_test= test_data[label_column]\n",
335 |         "\n",
336 |         "test_data_n= test_data.drop(labels=[label_column],axis=1)\n",
337 |         "test_data_n"
338 |       ],
339 |       "execution_count": 0,
340 |       "outputs": []
341 |     },
342 |     {
343 |       "cell_type": "code",
344 |       "metadata": {
345 |         "id": "v-AJbhGFpo_U",
346 |         "colab_type": "code",
347 |         "colab": {}
348 |       },
349 |       "source": [
350 |         "y_pred=pred.predict(test_data_n)\n",
351 |         "y_pred"
352 |       ],
353 |       "execution_count": 0,
354 |       "outputs": []
355 |     },
356 |     {
357 |       "cell_type": "code",
358 |       "metadata": {
359 |         "id": "RNNyqG0wp61l",
360 |         "colab_type": "code",
361 |         "colab": {}
362 |       },
363 |       "source": [
364 |         "pred_f= pred.evaluate_predictions(y_true=y_test, y_pred =y_pred ,auxilary_metrics =True)"
365 |       ],
366 |       "execution_count": 0,
367 |       "outputs": []
368 |     },
369 |     {
370 |       "cell_type": "code",
371 |       "metadata": {
372 |         "id": "CmA8SYf_qP_6",
373 |         "colab_type": "code",
374 |         "colab": {}
375 |       },
376 |       "source": [
377 |         "pred.problem_type"
378 |       ],
379 |       "execution_count": 0,
380 |       "outputs": []
381 |     },
382 |     {
383 |       "cell_type": "code",
384 |       "metadata": {
385 |         "id": "4iPT6uj1qYas",
386 |         "colab_type": "code",
387 |         "colab": {}
388 |       },
389 |       "source": [
390 |         "pred.feature_type"
391 |       ],
392 |       "execution_count": 0,
393 |       "outputs": []
394 |     },
395 |     {
396 |       "cell_type": "code",
397 |       "metadata": {
398 |         "id": "MLFdFpn6qeXd",
399 |         "colab_type": "code",
400 |         "colab": {}
401 |       },
402 |       "source": [
403 |         "pred.predict_proba(test_data_n)"
404 |       ],
405 |       "execution_count": 0,
406 |       "outputs": []
407 |     },
408 |     {
409 |       "cell_type": "markdown",
410 |       "metadata": {
411 |         "id": "6m_qo9tIrjRL",
412 |         "colab_type": "text"
413 |       },
414 |       "source": [
415 |         "## Check Leaderboard"
416 |       ]
417 |     },
418 |     {
419 |       "cell_type": "code",
420 |       "metadata": {
421 |         "id": "eq2D9oV7qelY",
422 |         "colab_type": "code",
423 |         "colab": {}
424 |       },
425 |       "source": [
426 |         "pred.leaderboard()"
427 |       ],
428 |       "execution_count": 0,
429 |       "outputs": []
430 |     },
431 |     {
432 |       "cell_type": "code",
433 |       "metadata": {
434 |         "id": "1x2GSoRrqexI",
435 |         "colab_type": "code",
436 |         "colab": {}
437 |       },
438 |       "source": [
439 |         "pred.fit_summary()"
440 |       ],
441 |       "execution_count": 0,
442 |       "outputs": []
443 |     },
444 |     {
445 |       "cell_type": "code",
446 |       "metadata": {
447 |         "id": "rrbQr-nprABQ",
448 |         "colab_type": "code",
449 |         "colab": {}
450 |       },
451 |       "source": [
452 |         ""
453 |       ],
454 |       "execution_count": 0,
455 |       "outputs": []
456 |     },
457 |     {
458 |       "cell_type": "markdown",
459 |       "metadata": {
460 |         "id": "b6lo2Wv-rAou",
461 |         "colab_type": "text"
462 |       },
463 |       "source": [
464 |         "In my system there is issue so i just created template for that."
465 |       ]
466 |     },
467 |     {
468 |       "cell_type": "code",
469 |       "metadata": {
470 |         "id": "I-F99fFkrPy8",
471 |         "colab_type": "code",
472 |         "colab": {}
473 |       },
474 |       "source": [
475 |         ""
476 |       ],
477 |       "execution_count": 0,
478 |       "outputs": []
479 |     }
480 |   ]
481 | }


--------------------------------------------------------------------------------
/lazy-predict/LazyPredict-notebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "LazyPredict.ipynb",
  7 |       "provenance": []
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     }
 13 |   },
 14 |   "cells": [
 15 |     {
 16 |       "cell_type": "code",
 17 |       "metadata": {
 18 |         "colab": {
 19 |           "base_uri": "https://localhost:8080/"
 20 |         },
 21 |         "id": "IkDJlg7k-Uvk",
 22 |         "outputId": "3e66f683-1cbf-4152-d443-1b3e876611ea"
 23 |       },
 24 |       "source": [
 25 |         "!pip install lazypredict"
 26 |       ],
 27 |       "execution_count": 2,
 28 |       "outputs": [
 29 |         {
 30 |           "output_type": "stream",
 31 |           "text": [
 32 |             "Collecting lazypredict\n",
 33 |             "  Downloading https://files.pythonhosted.org/packages/5f/db/1566dca1050ea74e9474dca0f1e7bbffcb0c3e694cf92e7e6e7ef9fca3af/lazypredict-0.2.7-py2.py3-none-any.whl\n",
 34 |             "Requirement already satisfied: Click>=7.0 in /usr/local/lib/python3.6/dist-packages (from lazypredict) (7.1.2)\n",
 35 |             "Installing collected packages: lazypredict\n",
 36 |             "Successfully installed lazypredict-0.2.7\n"
 37 |           ],
 38 |           "name": "stdout"
 39 |         }
 40 |       ]
 41 |     },
 42 |     {
 43 |       "cell_type": "code",
 44 |       "metadata": {
 45 |         "colab": {
 46 |           "base_uri": "https://localhost:8080/"
 47 |         },
 48 |         "id": "M2sO02rc-YMO",
 49 |         "outputId": "c97bfb60-9bf6-4dbf-8a97-d6816d82129a"
 50 |       },
 51 |       "source": [
 52 |         "from lazypredict.Supervised import LazyRegressor"
 53 |       ],
 54 |       "execution_count": 3,
 55 |       "outputs": [
 56 |         {
 57 |           "output_type": "stream",
 58 |           "text": [
 59 |             "/usr/local/lib/python3.6/dist-packages/sklearn/utils/deprecation.py:144: FutureWarning: The sklearn.utils.testing module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.utils. Anything that cannot be imported from sklearn.utils is now part of the private API.\n",
 60 |             "  warnings.warn(message, FutureWarning)\n"
 61 |           ],
 62 |           "name": "stderr"
 63 |         }
 64 |       ]
 65 |     },
 66 |     {
 67 |       "cell_type": "code",
 68 |       "metadata": {
 69 |         "id": "SUPi1STO-PAC"
 70 |       },
 71 |       "source": [
 72 |         "import numpy as np\r\n",
 73 |         "import pandas as pd\r\n",
 74 |         "from sklearn.model_selection import train_test_split"
 75 |       ],
 76 |       "execution_count": 4,
 77 |       "outputs": []
 78 |     },
 79 |     {
 80 |       "cell_type": "code",
 81 |       "metadata": {
 82 |         "id": "Vs-eb4gY-RLA"
 83 |       },
 84 |       "source": [
 85 |         "df = pd.read_csv(\"Data.csv\")"
 86 |       ],
 87 |       "execution_count": 5,
 88 |       "outputs": []
 89 |     },
 90 |     {
 91 |       "cell_type": "code",
 92 |       "metadata": {
 93 |         "colab": {
 94 |           "base_uri": "https://localhost:8080/",
 95 |           "height": 141
 96 |         },
 97 |         "id": "HkkVoDSO-c4v",
 98 |         "outputId": "93b14064-840b-4495-98fb-fcc51041fcdc"
 99 |       },
100 |       "source": [
101 |         "df.head(3)"
102 |       ],
103 |       "execution_count": 6,
104 |       "outputs": [
105 |         {
106 |           "output_type": "execute_result",
107 |           "data": {
108 |             "text/html": [
109 |               "<div>\n",
110 |               "<style scoped>\n",
111 |               "    .dataframe tbody tr th:only-of-type {\n",
112 |               "        vertical-align: middle;\n",
113 |               "    }\n",
114 |               "\n",
115 |               "    .dataframe tbody tr th {\n",
116 |               "        vertical-align: top;\n",
117 |               "    }\n",
118 |               "\n",
119 |               "    .dataframe thead th {\n",
120 |               "        text-align: right;\n",
121 |               "    }\n",
122 |               "</style>\n",
123 |               "<table border=\"1\" class=\"dataframe\">\n",
124 |               "  <thead>\n",
125 |               "    <tr style=\"text-align: right;\">\n",
126 |               "      <th></th>\n",
127 |               "      <th>TV</th>\n",
128 |               "      <th>radio</th>\n",
129 |               "      <th>newspaper</th>\n",
130 |               "      <th>sales</th>\n",
131 |               "    </tr>\n",
132 |               "  </thead>\n",
133 |               "  <tbody>\n",
134 |               "    <tr>\n",
135 |               "      <th>0</th>\n",
136 |               "      <td>230.10</td>\n",
137 |               "      <td>37.80</td>\n",
138 |               "      <td>69.20</td>\n",
139 |               "      <td>22.10</td>\n",
140 |               "    </tr>\n",
141 |               "    <tr>\n",
142 |               "      <th>1</th>\n",
143 |               "      <td>44.50</td>\n",
144 |               "      <td>39.30</td>\n",
145 |               "      <td>45.10</td>\n",
146 |               "      <td>10.40</td>\n",
147 |               "    </tr>\n",
148 |               "    <tr>\n",
149 |               "      <th>2</th>\n",
150 |               "      <td>17.20</td>\n",
151 |               "      <td>45.90</td>\n",
152 |               "      <td>69.30</td>\n",
153 |               "      <td>9.30</td>\n",
154 |               "    </tr>\n",
155 |               "  </tbody>\n",
156 |               "</table>\n",
157 |               "</div>"
158 |             ],
159 |             "text/plain": [
160 |               "      TV  radio  newspaper  sales\n",
161 |               "0 230.10  37.80      69.20  22.10\n",
162 |               "1  44.50  39.30      45.10  10.40\n",
163 |               "2  17.20  45.90      69.30   9.30"
164 |             ]
165 |           },
166 |           "metadata": {
167 |             "tags": []
168 |           },
169 |           "execution_count": 6
170 |         }
171 |       ]
172 |     },
173 |     {
174 |       "cell_type": "code",
175 |       "metadata": {
176 |         "id": "Aeq2oSnr-d9r"
177 |       },
178 |       "source": [
179 |         "target = \"sales\"\r\n",
180 |         "X = df.loc[:, df.columns != target]\r\n",
181 |         "y = df.loc[:, df.columns == target]"
182 |       ],
183 |       "execution_count": 7,
184 |       "outputs": []
185 |     },
186 |     {
187 |       "cell_type": "code",
188 |       "metadata": {
189 |         "id": "xc0MpM7N-tbf"
190 |       },
191 |       "source": [
192 |         "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, \r\n",
193 |         "                                                    random_state=42)"
194 |       ],
195 |       "execution_count": 9,
196 |       "outputs": []
197 |     },
198 |     {
199 |       "cell_type": "code",
200 |       "metadata": {
201 |         "id": "bqI9lK6F-xhD"
202 |       },
203 |       "source": [
204 |         "reg = LazyRegressor(verbose=0,ignore_warnings=False, custom_metric=None )"
205 |       ],
206 |       "execution_count": 10,
207 |       "outputs": []
208 |     },
209 |     {
210 |       "cell_type": "code",
211 |       "metadata": {
212 |         "colab": {
213 |           "base_uri": "https://localhost:8080/"
214 |         },
215 |         "id": "tNpgY52l-zzh",
216 |         "outputId": "b6d54ac9-2204-423a-e01b-f9b8551b3a3f"
217 |       },
218 |       "source": [
219 |         "models,predictions = reg.fit(X_train, X_test, y_train, y_test)"
220 |       ],
221 |       "execution_count": 11,
222 |       "outputs": [
223 |         {
224 |           "output_type": "stream",
225 |           "text": [
226 |             " 97%|█████████▋| 38/39 [00:01<00:00, 19.40it/s]"
227 |           ],
228 |           "name": "stderr"
229 |         },
230 |         {
231 |           "output_type": "stream",
232 |           "text": [
233 |             "StackingRegressor model failed to execute\n",
234 |             "__init__() missing 1 required positional argument: 'estimators'\n",
235 |             "[16:29:47] WARNING: /workspace/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
236 |           ],
237 |           "name": "stdout"
238 |         },
239 |         {
240 |           "output_type": "stream",
241 |           "text": [
242 |             "\r100%|██████████| 39/39 [00:01<00:00, 20.91it/s]\n"
243 |           ],
244 |           "name": "stderr"
245 |         }
246 |       ]
247 |     },
248 |     {
249 |       "cell_type": "code",
250 |       "metadata": {
251 |         "colab": {
252 |           "base_uri": "https://localhost:8080/",
253 |           "height": 1000
254 |         },
255 |         "id": "Gta3dVaT-2r3",
256 |         "outputId": "b1e4bd95-bd9f-47cf-842b-21e033c2d1ff"
257 |       },
258 |       "source": [
259 |         "models"
260 |       ],
261 |       "execution_count": 12,
262 |       "outputs": [
263 |         {
264 |           "output_type": "execute_result",
265 |           "data": {
266 |             "text/html": [
267 |               "<div>\n",
268 |               "<style scoped>\n",
269 |               "    .dataframe tbody tr th:only-of-type {\n",
270 |               "        vertical-align: middle;\n",
271 |               "    }\n",
272 |               "\n",
273 |               "    .dataframe tbody tr th {\n",
274 |               "        vertical-align: top;\n",
275 |               "    }\n",
276 |               "\n",
277 |               "    .dataframe thead th {\n",
278 |               "        text-align: right;\n",
279 |               "    }\n",
280 |               "</style>\n",
281 |               "<table border=\"1\" class=\"dataframe\">\n",
282 |               "  <thead>\n",
283 |               "    <tr style=\"text-align: right;\">\n",
284 |               "      <th></th>\n",
285 |               "      <th>R-Squared</th>\n",
286 |               "      <th>RMSE</th>\n",
287 |               "      <th>Time Taken</th>\n",
288 |               "    </tr>\n",
289 |               "    <tr>\n",
290 |               "      <th>Model</th>\n",
291 |               "      <th></th>\n",
292 |               "      <th></th>\n",
293 |               "      <th></th>\n",
294 |               "    </tr>\n",
295 |               "  </thead>\n",
296 |               "  <tbody>\n",
297 |               "    <tr>\n",
298 |               "      <th>ExtraTreesRegressor</th>\n",
299 |               "      <td>0.99</td>\n",
300 |               "      <td>0.61</td>\n",
301 |               "      <td>0.12</td>\n",
302 |               "    </tr>\n",
303 |               "    <tr>\n",
304 |               "      <th>GradientBoostingRegressor</th>\n",
305 |               "      <td>0.98</td>\n",
306 |               "      <td>0.73</td>\n",
307 |               "      <td>0.06</td>\n",
308 |               "    </tr>\n",
309 |               "    <tr>\n",
310 |               "      <th>XGBRegressor</th>\n",
311 |               "      <td>0.98</td>\n",
312 |               "      <td>0.75</td>\n",
313 |               "      <td>0.16</td>\n",
314 |               "    </tr>\n",
315 |               "    <tr>\n",
316 |               "      <th>RandomForestRegressor</th>\n",
317 |               "      <td>0.98</td>\n",
318 |               "      <td>0.77</td>\n",
319 |               "      <td>0.20</td>\n",
320 |               "    </tr>\n",
321 |               "    <tr>\n",
322 |               "      <th>BaggingRegressor</th>\n",
323 |               "      <td>0.98</td>\n",
324 |               "      <td>0.87</td>\n",
325 |               "      <td>0.03</td>\n",
326 |               "    </tr>\n",
327 |               "    <tr>\n",
328 |               "      <th>LGBMRegressor</th>\n",
329 |               "      <td>0.97</td>\n",
330 |               "      <td>0.89</td>\n",
331 |               "      <td>0.05</td>\n",
332 |               "    </tr>\n",
333 |               "    <tr>\n",
334 |               "      <th>HistGradientBoostingRegressor</th>\n",
335 |               "      <td>0.97</td>\n",
336 |               "      <td>0.97</td>\n",
337 |               "      <td>0.11</td>\n",
338 |               "    </tr>\n",
339 |               "    <tr>\n",
340 |               "      <th>AdaBoostRegressor</th>\n",
341 |               "      <td>0.96</td>\n",
342 |               "      <td>1.16</td>\n",
343 |               "      <td>0.12</td>\n",
344 |               "    </tr>\n",
345 |               "    <tr>\n",
346 |               "      <th>ExtraTreeRegressor</th>\n",
347 |               "      <td>0.94</td>\n",
348 |               "      <td>1.37</td>\n",
349 |               "      <td>0.01</td>\n",
350 |               "    </tr>\n",
351 |               "    <tr>\n",
352 |               "      <th>DecisionTreeRegressor</th>\n",
353 |               "      <td>0.93</td>\n",
354 |               "      <td>1.48</td>\n",
355 |               "      <td>0.01</td>\n",
356 |               "    </tr>\n",
357 |               "    <tr>\n",
358 |               "      <th>KNeighborsRegressor</th>\n",
359 |               "      <td>0.93</td>\n",
360 |               "      <td>1.49</td>\n",
361 |               "      <td>0.01</td>\n",
362 |               "    </tr>\n",
363 |               "    <tr>\n",
364 |               "      <th>SVR</th>\n",
365 |               "      <td>0.91</td>\n",
366 |               "      <td>1.69</td>\n",
367 |               "      <td>0.01</td>\n",
368 |               "    </tr>\n",
369 |               "    <tr>\n",
370 |               "      <th>NuSVR</th>\n",
371 |               "      <td>0.91</td>\n",
372 |               "      <td>1.72</td>\n",
373 |               "      <td>0.03</td>\n",
374 |               "    </tr>\n",
375 |               "    <tr>\n",
376 |               "      <th>OrthogonalMatchingPursuitCV</th>\n",
377 |               "      <td>0.90</td>\n",
378 |               "      <td>1.77</td>\n",
379 |               "      <td>0.03</td>\n",
380 |               "    </tr>\n",
381 |               "    <tr>\n",
382 |               "      <th>TransformedTargetRegressor</th>\n",
383 |               "      <td>0.90</td>\n",
384 |               "      <td>1.78</td>\n",
385 |               "      <td>0.01</td>\n",
386 |               "    </tr>\n",
387 |               "    <tr>\n",
388 |               "      <th>Lars</th>\n",
389 |               "      <td>0.90</td>\n",
390 |               "      <td>1.78</td>\n",
391 |               "      <td>0.03</td>\n",
392 |               "    </tr>\n",
393 |               "    <tr>\n",
394 |               "      <th>LinearRegression</th>\n",
395 |               "      <td>0.90</td>\n",
396 |               "      <td>1.78</td>\n",
397 |               "      <td>0.01</td>\n",
398 |               "    </tr>\n",
399 |               "    <tr>\n",
400 |               "      <th>RidgeCV</th>\n",
401 |               "      <td>0.90</td>\n",
402 |               "      <td>1.78</td>\n",
403 |               "      <td>0.01</td>\n",
404 |               "    </tr>\n",
405 |               "    <tr>\n",
406 |               "      <th>BayesianRidge</th>\n",
407 |               "      <td>0.90</td>\n",
408 |               "      <td>1.78</td>\n",
409 |               "      <td>0.05</td>\n",
410 |               "    </tr>\n",
411 |               "    <tr>\n",
412 |               "      <th>SGDRegressor</th>\n",
413 |               "      <td>0.90</td>\n",
414 |               "      <td>1.78</td>\n",
415 |               "      <td>0.01</td>\n",
416 |               "    </tr>\n",
417 |               "    <tr>\n",
418 |               "      <th>LassoCV</th>\n",
419 |               "      <td>0.90</td>\n",
420 |               "      <td>1.78</td>\n",
421 |               "      <td>0.07</td>\n",
422 |               "    </tr>\n",
423 |               "    <tr>\n",
424 |               "      <th>LarsCV</th>\n",
425 |               "      <td>0.90</td>\n",
426 |               "      <td>1.78</td>\n",
427 |               "      <td>0.06</td>\n",
428 |               "    </tr>\n",
429 |               "    <tr>\n",
430 |               "      <th>LassoLarsCV</th>\n",
431 |               "      <td>0.90</td>\n",
432 |               "      <td>1.78</td>\n",
433 |               "      <td>0.02</td>\n",
434 |               "    </tr>\n",
435 |               "    <tr>\n",
436 |               "      <th>LassoLarsIC</th>\n",
437 |               "      <td>0.90</td>\n",
438 |               "      <td>1.79</td>\n",
439 |               "      <td>0.01</td>\n",
440 |               "    </tr>\n",
441 |               "    <tr>\n",
442 |               "      <th>Ridge</th>\n",
443 |               "      <td>0.90</td>\n",
444 |               "      <td>1.79</td>\n",
445 |               "      <td>0.01</td>\n",
446 |               "    </tr>\n",
447 |               "    <tr>\n",
448 |               "      <th>ElasticNetCV</th>\n",
449 |               "      <td>0.90</td>\n",
450 |               "      <td>1.79</td>\n",
451 |               "      <td>0.05</td>\n",
452 |               "    </tr>\n",
453 |               "    <tr>\n",
454 |               "      <th>LinearSVR</th>\n",
455 |               "      <td>0.89</td>\n",
456 |               "      <td>1.85</td>\n",
457 |               "      <td>0.01</td>\n",
458 |               "    </tr>\n",
459 |               "    <tr>\n",
460 |               "      <th>HuberRegressor</th>\n",
461 |               "      <td>0.89</td>\n",
462 |               "      <td>1.89</td>\n",
463 |               "      <td>0.02</td>\n",
464 |               "    </tr>\n",
465 |               "    <tr>\n",
466 |               "      <th>PassiveAggressiveRegressor</th>\n",
467 |               "      <td>0.85</td>\n",
468 |               "      <td>2.15</td>\n",
469 |               "      <td>0.02</td>\n",
470 |               "    </tr>\n",
471 |               "    <tr>\n",
472 |               "      <th>RANSACRegressor</th>\n",
473 |               "      <td>0.85</td>\n",
474 |               "      <td>2.17</td>\n",
475 |               "      <td>0.02</td>\n",
476 |               "    </tr>\n",
477 |               "    <tr>\n",
478 |               "      <th>Lasso</th>\n",
479 |               "      <td>0.82</td>\n",
480 |               "      <td>2.40</td>\n",
481 |               "      <td>0.02</td>\n",
482 |               "    </tr>\n",
483 |               "    <tr>\n",
484 |               "      <th>ElasticNet</th>\n",
485 |               "      <td>0.73</td>\n",
486 |               "      <td>2.94</td>\n",
487 |               "      <td>0.02</td>\n",
488 |               "    </tr>\n",
489 |               "    <tr>\n",
490 |               "      <th>OrthogonalMatchingPursuit</th>\n",
491 |               "      <td>0.68</td>\n",
492 |               "      <td>3.19</td>\n",
493 |               "      <td>0.02</td>\n",
494 |               "    </tr>\n",
495 |               "    <tr>\n",
496 |               "      <th>DummyRegressor</th>\n",
497 |               "      <td>-0.00</td>\n",
498 |               "      <td>5.63</td>\n",
499 |               "      <td>0.01</td>\n",
500 |               "    </tr>\n",
501 |               "    <tr>\n",
502 |               "      <th>LassoLars</th>\n",
503 |               "      <td>-0.00</td>\n",
504 |               "      <td>5.63</td>\n",
505 |               "      <td>0.01</td>\n",
506 |               "    </tr>\n",
507 |               "    <tr>\n",
508 |               "      <th>MLPRegressor</th>\n",
509 |               "      <td>-0.20</td>\n",
510 |               "      <td>6.17</td>\n",
511 |               "      <td>0.34</td>\n",
512 |               "    </tr>\n",
513 |               "    <tr>\n",
514 |               "      <th>GaussianProcessRegressor</th>\n",
515 |               "      <td>-1.03</td>\n",
516 |               "      <td>8.01</td>\n",
517 |               "      <td>0.02</td>\n",
518 |               "    </tr>\n",
519 |               "    <tr>\n",
520 |               "      <th>KernelRidge</th>\n",
521 |               "      <td>-5.31</td>\n",
522 |               "      <td>14.11</td>\n",
523 |               "      <td>0.02</td>\n",
524 |               "    </tr>\n",
525 |               "  </tbody>\n",
526 |               "</table>\n",
527 |               "</div>"
528 |             ],
529 |             "text/plain": [
530 |               "                               R-Squared  RMSE  Time Taken\n",
531 |               "Model                                                     \n",
532 |               "ExtraTreesRegressor                 0.99  0.61        0.12\n",
533 |               "GradientBoostingRegressor           0.98  0.73        0.06\n",
534 |               "XGBRegressor                        0.98  0.75        0.16\n",
535 |               "RandomForestRegressor               0.98  0.77        0.20\n",
536 |               "BaggingRegressor                    0.98  0.87        0.03\n",
537 |               "LGBMRegressor                       0.97  0.89        0.05\n",
538 |               "HistGradientBoostingRegressor       0.97  0.97        0.11\n",
539 |               "AdaBoostRegressor                   0.96  1.16        0.12\n",
540 |               "ExtraTreeRegressor                  0.94  1.37        0.01\n",
541 |               "DecisionTreeRegressor               0.93  1.48        0.01\n",
542 |               "KNeighborsRegressor                 0.93  1.49        0.01\n",
543 |               "SVR                                 0.91  1.69        0.01\n",
544 |               "NuSVR                               0.91  1.72        0.03\n",
545 |               "OrthogonalMatchingPursuitCV         0.90  1.77        0.03\n",
546 |               "TransformedTargetRegressor          0.90  1.78        0.01\n",
547 |               "Lars                                0.90  1.78        0.03\n",
548 |               "LinearRegression                    0.90  1.78        0.01\n",
549 |               "RidgeCV                             0.90  1.78        0.01\n",
550 |               "BayesianRidge                       0.90  1.78        0.05\n",
551 |               "SGDRegressor                        0.90  1.78        0.01\n",
552 |               "LassoCV                             0.90  1.78        0.07\n",
553 |               "LarsCV                              0.90  1.78        0.06\n",
554 |               "LassoLarsCV                         0.90  1.78        0.02\n",
555 |               "LassoLarsIC                         0.90  1.79        0.01\n",
556 |               "Ridge                               0.90  1.79        0.01\n",
557 |               "ElasticNetCV                        0.90  1.79        0.05\n",
558 |               "LinearSVR                           0.89  1.85        0.01\n",
559 |               "HuberRegressor                      0.89  1.89        0.02\n",
560 |               "PassiveAggressiveRegressor          0.85  2.15        0.02\n",
561 |               "RANSACRegressor                     0.85  2.17        0.02\n",
562 |               "Lasso                               0.82  2.40        0.02\n",
563 |               "ElasticNet                          0.73  2.94        0.02\n",
564 |               "OrthogonalMatchingPursuit           0.68  3.19        0.02\n",
565 |               "DummyRegressor                     -0.00  5.63        0.01\n",
566 |               "LassoLars                          -0.00  5.63        0.01\n",
567 |               "MLPRegressor                       -0.20  6.17        0.34\n",
568 |               "GaussianProcessRegressor           -1.03  8.01        0.02\n",
569 |               "KernelRidge                        -5.31 14.11        0.02"
570 |             ]
571 |           },
572 |           "metadata": {
573 |             "tags": []
574 |           },
575 |           "execution_count": 12
576 |         }
577 |       ]
578 |     }
579 |   ]
580 | }


--------------------------------------------------------------------------------
/AutoGluon/Patient_data.csv:
--------------------------------------------------------------------------------
  1 | pelvic_incidence,pelvic_tilt numeric,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,class
  2 | 63.0278175,22.55258597,39.60911701,40.47523153,98.67291675,-0.254399986,Abnormal
  3 | 39.05695098,10.06099147,25.01537822,28.99595951,114.4054254,4.564258645,Abnormal
  4 | 68.83202098,22.21848205,50.09219357,46.61353893,105.9851355,-3.530317314,Abnormal
  5 | 69.29700807,24.65287791,44.31123813,44.64413017,101.8684951,11.21152344,Abnormal
  6 | 49.71285934,9.652074879,28.317406,40.06078446,108.1687249,7.918500615,Abnormal
  7 | 40.25019968,13.92190658,25.1249496,26.32829311,130.3278713,2.230651729,Abnormal
  8 | 53.43292815,15.86433612,37.16593387,37.56859203,120.5675233,5.988550702,Abnormal
  9 | 45.36675362,10.75561143,29.03834896,34.61114218,117.2700675,-10.67587083,Abnormal
 10 | 43.79019026,13.5337531,42.69081398,30.25643716,125.0028927,13.28901817,Abnormal
 11 | 36.68635286,5.010884121,41.9487509,31.67546874,84.24141517,0.664437117,Abnormal
 12 | 49.70660953,13.04097405,31.33450009,36.66563548,108.6482654,-7.825985755,Abnormal
 13 | 31.23238734,17.71581923,15.5,13.51656811,120.0553988,0.499751446,Abnormal
 14 | 48.91555137,19.96455616,40.26379358,28.95099521,119.321358,8.028894629,Abnormal
 15 | 53.5721702,20.46082824,33.1,33.11134196,110.9666978,7.044802938,Abnormal
 16 | 57.30022656,24.1888846,46.99999999,33.11134196,116.8065868,5.766946943,Abnormal
 17 | 44.31890674,12.53799164,36.098763,31.78091509,124.1158358,5.415825143,Abnormal
 18 | 63.83498162,20.36250706,54.55243367,43.47247456,112.3094915,-0.622526643,Abnormal
 19 | 31.27601184,3.14466948,32.56299592,28.13134236,129.0114183,3.623020073,Abnormal
 20 | 38.69791243,13.44474904,31,25.25316339,123.1592507,1.429185758,Abnormal
 21 | 41.72996308,12.25407408,30.12258646,29.475889,116.5857056,-1.244402488,Abnormal
 22 | 43.92283983,14.17795853,37.8325467,29.7448813,134.4610156,6.451647637,Abnormal
 23 | 54.91944259,21.06233245,42.19999999,33.85711014,125.2127163,2.432561437,Abnormal
 24 | 63.07361096,24.41380271,53.99999999,38.65980825,106.4243295,15.77969683,Abnormal
 25 | 45.54078988,13.06959759,30.29832059,32.47119229,117.9808303,-4.987129618,Abnormal
 26 | 36.12568347,22.75875277,29,13.3669307,115.5771163,-3.237562489,Abnormal
 27 | 54.12492019,26.65048856,35.32974693,27.47443163,121.447011,1.571204816,Abnormal
 28 | 26.14792141,10.75945357,14,15.38846783,125.2032956,-10.09310817,Abnormal
 29 | 43.58096394,16.5088837,46.99999999,27.07208024,109.271634,8.992815727,Abnormal
 30 | 44.5510115,21.93114655,26.78591597,22.61986495,111.0729197,2.652320636,Abnormal
 31 | 66.87921138,24.89199889,49.27859673,41.9872125,113.4770183,-2.005891748,Abnormal
 32 | 50.81926781,15.40221253,42.52893886,35.41705528,112.192804,10.86956554,Abnormal
 33 | 46.39026008,11.07904664,32.13655345,35.31121344,98.77454633,6.386831648,Abnormal
 34 | 44.93667457,17.44383762,27.78057555,27.49283695,117.9803245,5.569619587,Abnormal
 35 | 38.66325708,12.98644139,39.99999999,25.67681568,124.914118,2.703008052,Abnormal
 36 | 59.59554032,31.99824445,46.56025198,27.59729587,119.3303537,1.474285836,Abnormal
 37 | 31.48421834,7.82622134,24.28481815,23.657997,113.8331446,4.393080498,Abnormal
 38 | 32.09098679,6.989378081,35.99819848,25.10160871,132.264735,6.413427708,Abnormal
 39 | 35.70345781,19.44325311,20.7,16.26020471,137.5406125,-0.263489651,Abnormal
 40 | 55.84328595,28.84744756,47.69054322,26.99583839,123.3118449,2.812426855,Abnormal
 41 | 52.41938511,19.01156052,35.87265953,33.40782459,116.5597709,1.694705102,Abnormal
 42 | 35.49244617,11.7016723,15.59036345,23.79077387,106.9388517,-3.460357991,Abnormal
 43 | 46.44207842,8.39503589,29.0372302,38.04704253,115.4814047,2.045475795,Abnormal
 44 | 53.85479842,19.23064334,32.77905978,34.62415508,121.6709148,5.329843204,Abnormal
 45 | 66.28539377,26.32784484,47.49999999,39.95754893,121.2196839,-0.799624469,Abnormal
 46 | 56.03021778,16.2979149,62.27527456,39.73230287,114.0231172,-2.325683841,Abnormal
 47 | 50.91244034,23.01516931,46.99999999,27.89727103,117.4222591,-2.526701511,Abnormal
 48 | 48.332638,22.22778399,36.18199318,26.10485401,117.3846251,6.481709096,Abnormal
 49 | 41.35250407,16.57736351,30.70619135,24.77514057,113.2666746,-4.497957556,Abnormal
 50 | 40.55735663,17.97778407,34,22.57957256,121.0462458,-1.537383074,Abnormal
 51 | 41.76773173,17.89940172,20.0308863,23.86833001,118.3633889,2.062962549,Abnormal
 52 | 55.28585178,20.44011836,34,34.84573342,115.8770174,3.558372358,Abnormal
 53 | 74.43359316,41.55733141,27.7,32.87626175,107.9493045,5.000088788,Abnormal
 54 | 50.20966979,29.76012218,36.10400731,20.44954761,128.2925148,5.740614083,Abnormal
 55 | 30.14993632,11.91744524,34,18.23249108,112.6841408,11.46322327,Abnormal
 56 | 41.17167989,17.32120599,33.46940277,23.85047391,116.3778894,-9.569249858,Abnormal
 57 | 47.65772963,13.27738491,36.67998541,34.38034472,98.24978071,6.273012173,Abnormal
 58 | 43.34960621,7.467468964,28.06548279,35.88213725,112.7761866,5.753277458,Abnormal
 59 | 46.85578065,15.35151393,38,31.50426672,116.2509174,1.662705589,Abnormal
 60 | 43.20318499,19.66314572,35,23.54003927,124.8461088,-2.919075955,Abnormal
 61 | 48.10923638,14.93072472,35.56468278,33.17851166,124.0564518,7.947904861,Abnormal
 62 | 74.37767772,32.05310438,78.77201304,42.32457334,143.5606905,56.12590603,Abnormal
 63 | 89.68056731,32.70443487,83.13073216,56.97613244,129.9554764,92.02727682,Abnormal
 64 | 44.529051,9.433234213,51.99999999,35.09581679,134.7117723,29.10657504,Abnormal
 65 | 77.69057712,21.38064464,64.42944191,56.30993248,114.818751,26.93184095,Abnormal
 66 | 76.1472121,21.93618556,82.96150249,54.21102654,123.9320096,10.43197194,Abnormal
 67 | 83.93300857,41.28630543,61.99999999,42.64670314,115.012334,26.58810016,Abnormal
 68 | 78.49173027,22.1817978,59.99999999,56.30993248,118.5303266,27.38321314,Abnormal
 69 | 75.64973136,19.33979889,64.14868477,56.30993248,95.9036288,69.55130292,Abnormal
 70 | 72.07627839,18.94617604,50.99999999,53.13010236,114.2130126,1.01004051,Abnormal
 71 | 58.59952852,-0.261499046,51.49999999,58.86102756,102.0428116,28.05969711,Abnormal
 72 | 72.56070163,17.38519079,51.99999999,55.17551084,119.1937238,32.10853735,Abnormal
 73 | 86.90079431,32.9281677,47.79434664,53.97262661,135.0753635,101.7190919,Abnormal
 74 | 84.97413208,33.02117462,60.85987263,51.95295747,125.6595336,74.33340864,Abnormal
 75 | 55.512212,20.09515673,43.99999999,35.41705528,122.648753,34.55294641,Abnormal
 76 | 72.2223343,23.07771056,90.99999999,49.14462374,137.7366546,56.80409277,Abnormal
 77 | 70.22145219,39.82272448,68.11840309,30.39872771,148.5255624,145.3781432,Abnormal
 78 | 86.75360946,36.04301632,69.22104479,50.71059314,139.414504,110.8607824,Abnormal
 79 | 58.78254775,7.667044186,53.33894082,51.11550357,98.50115697,51.58412476,Abnormal
 80 | 67.41253785,17.44279712,60.14464036,49.96974073,111.12397,33.15764573,Abnormal
 81 | 47.74467877,12.08935067,38.99999999,35.6553281,117.5120039,21.68240136,Abnormal
 82 | 77.10657122,30.46999418,69.48062839,46.63657704,112.1516,70.75908308,Abnormal
 83 | 74.00554124,21.12240192,57.37950226,52.88313932,120.2059626,74.55516588,Abnormal
 84 | 88.62390839,29.08945331,47.56426247,59.53445508,121.7647796,51.80589921,Abnormal
 85 | 81.10410039,24.79416792,77.88702048,56.30993247,151.8398566,65.21461611,Abnormal
 86 | 76.32600187,42.39620445,57.19999999,33.92979742,124.267007,50.12745689,Abnormal
 87 | 45.44374959,9.906071798,44.99999999,35.53767779,163.0710405,20.31531532,Abnormal
 88 | 59.78526526,17.87932332,59.20646143,41.90594194,119.3191109,22.12386874,Abnormal
 89 | 44.91414916,10.21899563,44.63091389,34.69515353,130.0756599,37.36453993,Abnormal
 90 | 56.60577127,16.80020017,41.99999999,39.80557109,127.2945222,24.0185747,Abnormal
 91 | 71.18681115,23.89620111,43.6966651,47.29061004,119.8649383,27.28398451,Abnormal
 92 | 81.65603206,28.74886935,58.23282055,52.9071627,114.7698556,30.60914842,Abnormal
 93 | 70.95272771,20.15993121,62.85910914,50.7927965,116.1779325,32.522331,Abnormal
 94 | 85.35231529,15.84491006,71.66865979,69.50740523,124.4197875,76.0206034,Abnormal
 95 | 58.10193455,14.83763914,79.64983825,43.26429541,113.5876551,50.23787808,Abnormal
 96 | 94.17482232,15.38076983,67.70572132,78.79405249,114.8901128,53.25522004,Abnormal
 97 | 57.52235608,33.64707522,50.90985841,23.87528085,140.9817119,148.7537109,Abnormal
 98 | 96.65731511,19.46158117,90.21149828,77.19573393,120.6730408,64.08099841,Abnormal
 99 | 74.72074622,19.75694203,82.73535954,54.96380419,109.3565941,33.30606685,Abnormal
100 | 77.65511874,22.4329501,93.89277881,55.22216863,123.0557067,61.2111866,Abnormal
101 | 58.52162283,13.92228609,41.46785522,44.59933674,115.514798,30.3879839,Abnormal
102 | 84.5856071,30.36168482,65.47948563,54.22392228,108.0102185,25.11847846,Abnormal
103 | 79.93857026,18.7740711,63.31183486,61.16449915,114.787107,38.53874133,Abnormal
104 | 70.39930842,13.46998624,61.19999999,56.92932218,102.3375244,25.53842852,Abnormal
105 | 49.78212054,6.46680486,52.99999999,43.31531568,110.8647831,25.33564729,Abnormal
106 | 77.40933294,29.39654543,63.23230243,48.0127875,118.4507311,93.56373734,Abnormal
107 | 65.00796426,27.60260762,50.94751899,37.40535663,116.5811088,7.015977884,Abnormal
108 | 65.01377322,9.838262375,57.73583722,55.17551084,94.73852542,49.69695462,Abnormal
109 | 78.42595126,33.42595126,76.27743927,45,138.5541111,77.15517241,Abnormal
110 | 63.17298709,6.330910974,62.99999999,56.84207612,110.6440206,42.60807567,Abnormal
111 | 68.61300092,15.0822353,63.01469619,53.53076561,123.4311742,39.49798659,Abnormal
112 | 63.90063261,13.7062037,62.12433389,50.19442891,114.1292425,41.42282844,Abnormal
113 | 84.99895554,29.61009772,83.35219438,55.38885782,126.9129899,71.32117542,Abnormal
114 | 42.02138603,-6.554948347,67.89999999,48.57633437,111.5857819,27.33867086,Abnormal
115 | 69.75666532,19.27929659,48.49999999,50.47736873,96.49136982,51.1696403,Abnormal
116 | 80.98807441,36.84317181,86.96060151,44.1449026,141.0881494,85.87215224,Abnormal
117 | 129.8340406,8.404475005,48.38405705,121.4295656,107.690466,418.5430821,Abnormal
118 | 70.48410444,12.48948765,62.41714208,57.99461679,114.1900488,56.90244779,Abnormal
119 | 86.04127982,38.75066978,47.87140494,47.29061004,122.0929536,61.98827709,Abnormal
120 | 65.53600255,24.15748726,45.77516991,41.3785153,136.4403015,16.37808564,Abnormal
121 | 60.7538935,15.7538935,43.19915768,45,113.0533309,31.69354839,Abnormal
122 | 54.74177518,12.09507205,40.99999999,42.64670314,117.6432188,40.3823266,Abnormal
123 | 83.87994081,23.07742686,87.14151223,60.80251395,124.6460723,80.55560527,Abnormal
124 | 80.07491418,48.06953097,52.40343873,32.00538321,110.7099121,67.72731595,Abnormal
125 | 65.66534698,10.54067533,56.48913545,55.12467166,109.1627768,53.93202006,Abnormal
126 | 74.71722805,14.32167879,32.5,60.39554926,107.1822176,37.01708012,Abnormal
127 | 48.06062649,5.687032126,57.05716117,42.37359436,95.44375749,32.83587702,Abnormal
128 | 70.67689818,21.70440224,59.18116082,48.97249594,103.0083545,27.8101478,Abnormal
129 | 80.43342782,16.998479,66.53601753,63.43494882,116.4389807,57.78125,Abnormal
130 | 90.51396072,28.27250132,69.8139423,62.2414594,100.8921596,58.82364821,Abnormal
131 | 77.23689752,16.73762214,49.77553438,60.49927538,110.6903772,39.7871542,Abnormal
132 | 50.06678595,9.120340183,32.16846267,40.94644577,99.71245318,26.76669655,Abnormal
133 | 69.78100617,13.77746531,57.99999999,56.00354085,118.9306656,17.91456046,Abnormal
134 | 69.62628302,21.12275138,52.76659472,48.50353164,116.8030913,54.81686729,Abnormal
135 | 81.75441933,20.12346562,70.56044038,61.63095371,119.4250857,55.50688907,Abnormal
136 | 52.20469309,17.21267289,78.09496877,34.9920202,136.9725168,54.93913416,Abnormal
137 | 77.12134424,30.3498745,77.48108264,46.77146974,110.6111484,82.09360704,Abnormal
138 | 88.0244989,39.84466878,81.77447308,48.17983012,116.6015376,56.76608323,Abnormal
139 | 83.39660609,34.31098931,78.42329287,49.08561678,110.4665164,49.67209559,Abnormal
140 | 72.05403412,24.70073725,79.87401586,47.35329687,107.1723576,56.42615873,Abnormal
141 | 85.09550254,21.06989651,91.73479193,64.02560604,109.062312,38.03283108,Abnormal
142 | 69.56348614,15.4011391,74.43849743,54.16234705,105.0673556,29.70121083,Abnormal
143 | 89.5049473,48.90365265,72.0034229,40.60129465,134.6342912,118.3533701,Abnormal
144 | 85.29017283,18.27888963,100.7442198,67.0112832,110.6607005,58.88494802,Abnormal
145 | 60.62621697,20.5959577,64.53526221,40.03025927,117.2255542,104.8592474,Abnormal
146 | 60.04417717,14.30965614,58.03886519,45.73452103,105.1316639,30.40913315,Abnormal
147 | 85.64378664,42.68919513,78.7506635,42.95459151,105.1440758,42.88742577,Abnormal
148 | 85.58171024,30.45703858,78.23137949,55.12467166,114.8660487,68.37612182,Abnormal
149 | 55.08076562,-3.759929872,55.99999999,58.84069549,109.9153669,31.77358318,Abnormal
150 | 65.75567895,9.832874231,50.82289501,55.92280472,104.3949585,39.30721246,Abnormal
151 | 79.24967118,23.94482471,40.79669829,55.30484647,98.62251165,36.7063954,Abnormal
152 | 81.11260488,20.69044356,60.68700588,60.42216132,94.01878339,40.51098228,Abnormal
153 | 48.0306238,3.969814743,58.34451924,44.06080905,125.3509625,35.00007784,Abnormal
154 | 63.40448058,14.11532726,48.13680562,49.28915333,111.9160075,31.78449499,Abnormal
155 | 57.28694488,15.1493501,63.99999999,42.13759477,116.7353868,30.34120327,Abnormal
156 | 41.18776972,5.792973871,42.86739151,35.39479584,103.3488802,27.66027669,Abnormal
157 | 66.80479632,14.55160171,72.08491177,52.25319461,82.45603817,41.6854736,Abnormal
158 | 79.4769781,26.73226755,70.65098189,52.74471055,118.5886691,61.70059824,Abnormal
159 | 44.21646446,1.507074501,46.11033909,42.70938996,108.6295666,42.81048066,Abnormal
160 | 57.03509717,0.34572799,49.19800263,56.68936918,103.0486975,52.16514503,Abnormal
161 | 64.27481758,12.50864276,68.70237672,51.76617482,95.25245421,39.40982612,Abnormal
162 | 92.02630795,35.39267395,77.41696348,56.633634,115.72353,58.05754155,Abnormal
163 | 67.26314926,7.194661096,51.69688681,60.06848816,97.8010854,42.13694325,Abnormal
164 | 118.1446548,38.44950127,50.83851954,79.69515353,81.0245406,74.04376736,Abnormal
165 | 115.9232606,37.51543601,76.79999999,78.40782459,104.6986033,81.19892712,Abnormal
166 | 53.94165809,9.306594428,43.10049819,44.63506366,124.3978211,25.0821266,Abnormal
167 | 83.7031774,20.26822858,77.1105979,63.43494882,125.4801739,69.279571,Abnormal
168 | 56.99140382,6.87408897,57.00900516,50.11731485,109.978045,36.81011057,Abnormal
169 | 72.34359434,16.42078962,59.86901238,55.92280472,70.08257486,12.07264427,Abnormal
170 | 95.38259648,24.82263131,95.15763273,70.55996517,89.3075466,57.66084135,Abnormal
171 | 44.25347645,1.101086714,38,43.15238973,98.27410705,23.9106354,Abnormal
172 | 64.80954139,15.17407796,58.83999352,49.63546343,111.679961,21.40719845,Abnormal
173 | 78.40125389,14.04225971,79.69426258,64.35899418,104.7312342,12.39285327,Abnormal
174 | 56.66829282,13.45820343,43.76970978,43.21008939,93.69220863,21.10812135,Abnormal
175 | 50.82502875,9.064729049,56.29999999,41.7602997,78.99945411,23.04152435,Abnormal
176 | 61.41173702,25.38436364,39.09686927,36.02737339,103.4045971,21.84340688,Abnormal
177 | 56.56382381,8.961261611,52.57784639,47.6025622,98.77711506,50.70187326,Abnormal
178 | 67.02766447,13.28150221,66.15040334,53.74616226,100.7154129,33.98913551,Abnormal
179 | 80.81777144,19.23898066,61.64245116,61.57879078,89.47183446,44.167602,Abnormal
180 | 80.65431956,26.34437939,60.89811835,54.30994017,120.1034928,52.46755185,Abnormal
181 | 68.72190982,49.4318636,68.0560124,19.29004622,125.0185168,54.69128928,Abnormal
182 | 37.90391014,4.47909896,24.71027447,33.42481118,157.848799,33.60702661,Abnormal
183 | 64.62400798,15.22530262,67.63216653,49.39870535,90.298468,31.32641123,Abnormal
184 | 75.43774787,31.53945399,89.59999999,43.89829388,106.8295898,54.96578902,Abnormal
185 | 71.00194076,37.51577195,84.53709256,33.48616882,125.1642324,67.77118983,Abnormal
186 | 81.05661087,20.80149217,91.78449512,60.2551187,125.430176,38.18178176,Abnormal
187 | 91.46874146,24.50817744,84.62027202,66.96056402,117.3078968,52.62304673,Abnormal
188 | 81.08232025,21.25584028,78.76675639,59.82647997,90.07187999,49.159426,Abnormal
189 | 60.419932,5.265665422,59.8142356,55.15426658,109.0330745,30.26578534,Abnormal
190 | 85.68094951,38.65003527,82.68097744,47.03091424,120.8407069,61.95903428,Abnormal
191 | 82.4065243,29.27642195,77.05456489,53.13010235,117.0422439,62.76534831,Abnormal
192 | 43.7182623,9.811985315,51.99999999,33.90627699,88.43424213,40.88092253,Abnormal
193 | 86.472905,40.30376567,61.14101155,46.16913933,97.4041888,55.75222146,Abnormal
194 | 74.46908181,33.28315665,66.94210105,41.18592517,146.4660009,124.9844057,Abnormal
195 | 70.25043628,10.34012252,76.37007032,59.91031376,119.2370072,32.66650243,Abnormal
196 | 72.64385013,18.92911726,67.99999999,53.71473287,116.9634162,25.38424676,Abnormal
197 | 71.24176388,5.268270454,85.99958417,65.97349342,110.703107,38.2598637,Abnormal
198 | 63.7723908,12.76338484,65.36052425,51.00900596,89.82274067,55.99545386,Abnormal
199 | 58.82837872,37.57787321,125.7423855,21.25050551,135.6294176,117.3146829,Abnormal
200 | 74.85448008,13.90908417,62.69325884,60.9453959,115.2087008,33.17225512,Abnormal
201 | 75.29847847,16.67148361,61.29620362,58.62699486,118.8833881,31.57582292,Abnormal
202 | 63.36433898,20.02462134,67.49870507,43.33971763,130.9992576,37.55670552,Abnormal
203 | 67.51305267,33.2755899,96.28306169,34.23746278,145.6010328,88.30148594,Abnormal
204 | 76.31402766,41.93368293,93.2848628,34.38034472,132.2672855,101.2187828,Abnormal
205 | 73.63596236,9.711317947,62.99999999,63.92464442,98.72792982,26.97578722,Abnormal
206 | 56.53505139,14.37718927,44.99154663,42.15786212,101.7233343,25.77317356,Abnormal
207 | 80.11157156,33.94243223,85.10160773,46.16913933,125.5936237,100.2921068,Abnormal
208 | 95.48022873,46.55005318,58.99999999,48.93017555,96.68390337,77.28307195,Abnormal
209 | 74.09473084,18.82372712,76.03215571,55.27100372,128.4057314,73.38821617,Abnormal
210 | 87.67908663,20.36561331,93.82241589,67.31347333,120.9448288,76.73062904,Abnormal
211 | 48.25991962,16.41746236,36.32913708,31.84245726,94.88233607,28.34379914,Abnormal
212 | 38.50527283,16.96429691,35.11281407,21.54097592,127.6328747,7.986683227,Normal
213 | 54.92085752,18.96842952,51.60145541,35.952428,125.8466462,2.001642472,Normal
214 | 44.36249017,8.945434892,46.90209626,35.41705528,129.220682,4.994195288,Normal
215 | 48.3189305,17.45212105,47.99999999,30.86680945,128.9803079,-0.910940567,Normal
216 | 45.70178875,10.65985935,42.5778464,35.0419294,130.1783144,-3.38890999,Normal
217 | 30.74193812,13.35496594,35.90352597,17.38697218,142.4101072,-2.005372903,Normal
218 | 50.91310144,6.6769999,30.89652243,44.23610154,118.151531,-1.057985526,Normal
219 | 38.12658854,6.557617408,50.44507473,31.56897113,132.114805,6.338199339,Normal
220 | 51.62467183,15.96934373,35,35.6553281,129.385308,1.00922834,Normal
221 | 64.31186727,26.32836901,50.95896417,37.98349826,106.1777511,3.118221289,Normal
222 | 44.48927476,21.78643263,31.47415392,22.70284212,113.7784936,-0.284129366,Normal
223 | 54.9509702,5.865353416,52.99999999,49.08561678,126.9703283,-0.631602951,Normal
224 | 56.10377352,13.10630665,62.63701952,42.99746687,116.2285032,31.17276727,Normal
225 | 69.3988184,18.89840693,75.96636144,50.50041147,103.5825398,-0.44366081,Normal
226 | 89.83467631,22.63921678,90.56346144,67.19545953,100.5011917,3.040973261,Normal
227 | 59.72614016,7.724872599,55.34348527,52.00126756,125.1742214,3.235159224,Normal
228 | 63.95952166,16.06094486,63.12373633,47.8985768,142.3601245,6.298970934,Normal
229 | 61.54059876,19.67695713,52.89222856,41.86364163,118.6862678,4.815031084,Normal
230 | 38.04655072,8.30166942,26.23683004,29.7448813,123.8034132,3.885773488,Normal
231 | 43.43645061,10.09574326,36.03222439,33.34070735,137.4396942,-3.114450861,Normal
232 | 65.61180231,23.13791922,62.58217893,42.47388309,124.1280012,-4.083298414,Normal
233 | 53.91105429,12.93931796,38.99999999,40.97173633,118.1930354,5.074353176,Normal
234 | 43.11795103,13.81574355,40.34738779,29.30220748,128.5177217,0.970926407,Normal
235 | 40.6832291,9.148437195,31.02159252,31.53479191,139.1184721,-2.511618596,Normal
236 | 37.7319919,9.386298276,41.99999999,28.34569362,135.740926,13.68304672,Normal
237 | 63.92947003,19.97109671,40.17704963,43.95837332,113.0659387,-11.05817866,Normal
238 | 61.82162717,13.59710457,63.99999999,48.22452261,121.779803,1.296191194,Normal
239 | 62.14080535,13.96097523,57.99999999,48.17983012,133.2818339,4.955105669,Normal
240 | 69.00491277,13.29178975,55.5701429,55.71312302,126.6116215,10.83201105,Normal
241 | 56.44702568,19.44449915,43.5778464,37.00252653,139.1896903,-1.859688529,Normal
242 | 41.6469159,8.835549101,36.03197484,32.8113668,116.5551679,-6.054537956,Normal
243 | 51.52935759,13.51784732,35,38.01151027,126.7185156,13.92833085,Normal
244 | 39.08726449,5.536602477,26.93203835,33.55066201,131.5844199,-0.75946135,Normal
245 | 34.64992241,7.514782784,42.99999999,27.13513962,123.9877408,-4.082937601,Normal
246 | 63.02630005,27.33624023,51.60501665,35.69005983,114.5066078,7.439869802,Normal
247 | 47.80555887,10.68869819,53.99999999,37.11686068,125.3911378,-0.402523218,Normal
248 | 46.63786363,15.85371711,39.99999999,30.78414653,119.3776026,9.06458168,Normal
249 | 49.82813487,16.73643493,28,33.09169994,121.4355585,1.91330704,Normal
250 | 47.31964755,8.573680295,35.56025198,38.74596726,120.5769719,1.630663508,Normal
251 | 50.75329025,20.23505957,37,30.51823068,122.343516,2.288487746,Normal
252 | 36.15782981,-0.810514093,33.62731353,36.96834391,135.9369096,-2.092506504,Normal
253 | 40.74699612,1.835524271,49.99999999,38.91147185,139.2471502,0.668556793,Normal
254 | 42.91804052,-5.845994341,57.99999999,48.76403486,121.6068586,-3.362044654,Normal
255 | 63.79242525,21.34532339,65.99999999,42.44710185,119.5503909,12.38260373,Normal
256 | 72.95564397,19.57697146,61.00707117,53.37867251,111.2340468,0.813491154,Normal
257 | 67.53818154,14.65504222,58.00142908,52.88313932,123.6322597,25.9702063,Normal
258 | 54.75251965,9.752519649,47.99999999,45,123.0379985,8.235294118,Normal
259 | 50.16007802,-2.970024337,41.99999999,53.13010235,131.8024914,-8.290203373,Normal
260 | 40.34929637,10.19474845,37.96774659,30.15454792,128.0099272,0.458901373,Normal
261 | 63.61919213,16.93450781,49.34926218,46.68468432,117.0897469,-0.357811974,Normal
262 | 54.14240778,11.93511014,42.99999999,42.20729763,122.2090834,0.153549242,Normal
263 | 74.97602148,14.92170492,53.73007172,60.05431656,105.6453997,1.594747729,Normal
264 | 42.51727249,14.37567126,25.32356538,28.14160123,128.9056892,0.75702014,Normal
265 | 33.78884314,3.675109986,25.5,30.11373315,128.3253556,-1.776111234,Normal
266 | 54.5036853,6.819910138,46.99999999,47.68377516,111.7911722,-4.406769011,Normal
267 | 48.17074627,9.594216702,39.71092029,38.57652956,135.6233101,5.360050572,Normal
268 | 46.37408781,10.21590237,42.69999999,36.15818544,121.2476572,-0.54202201,Normal
269 | 52.86221391,9.410371613,46.98805181,43.4518423,123.0912395,1.856659161,Normal
270 | 57.1458515,16.48909145,42.84214764,40.65676005,113.8061775,5.0151857,Normal
271 | 37.14014978,16.48123972,24,20.65891006,125.0143609,7.366425398,Normal
272 | 51.31177106,8.875541276,56.99999999,42.43622979,126.4722584,-2.144043911,Normal
273 | 42.51561014,16.54121618,41.99999999,25.97439396,120.631941,7.876730692,Normal
274 | 39.35870531,7.011261806,37,32.3474435,117.8187599,1.904048199,Normal
275 | 35.8775708,1.112373561,43.45725694,34.76519724,126.9239062,-1.632238263,Normal
276 | 43.1919153,9.976663803,28.93814927,33.21525149,123.4674001,1.741017579,Normal
277 | 67.28971201,16.7175142,50.99999999,50.5721978,137.5917777,4.960343813,Normal
278 | 51.32546366,13.63122319,33.25857782,37.69424047,131.3061224,1.78886965,Normal
279 | 65.7563482,13.20692644,43.99999999,52.54942177,129.3935728,-1.982120038,Normal
280 | 40.41336566,-1.329412398,30.98276809,41.74277806,119.3356546,-6.173674823,Normal
281 | 48.80190855,18.01776202,51.99999999,30.78414653,139.1504066,10.44286169,Normal
282 | 50.08615264,13.43004422,34.45754051,36.65610842,119.1346221,3.089484465,Normal
283 | 64.26150724,14.49786554,43.90250363,49.76364169,115.3882683,5.951454368,Normal
284 | 53.68337998,13.44702168,41.58429713,40.23635831,113.9137026,2.737035292,Normal
285 | 48.99595771,13.11382047,51.87351997,35.88213725,126.3981876,0.535471617,Normal
286 | 59.16761171,14.56274875,43.19915768,44.60486296,121.0356423,2.830504124,Normal
287 | 67.80469442,16.55066167,43.25680184,51.25403274,119.6856451,4.867539941,Normal
288 | 61.73487533,17.11431203,46.89999999,44.6205633,120.9201997,3.087725997,Normal
289 | 33.04168754,-0.324678459,19.0710746,33.366366,120.3886112,9.354364925,Normal
290 | 74.56501543,15.72431994,58.61858244,58.84069549,105.417304,0.599247113,Normal
291 | 44.43070103,14.17426387,32.2434952,30.25643716,131.7176127,-3.604255336,Normal
292 | 36.42248549,13.87942449,20.24256187,22.543061,126.0768612,0.179717077,Normal
293 | 51.07983294,14.20993529,35.95122893,36.86989765,115.8037111,6.905089963,Normal
294 | 34.75673809,2.631739646,29.50438112,32.12499844,127.1398495,-0.460894198,Normal
295 | 48.90290434,5.587588658,55.49999999,43.31531568,137.1082886,19.85475919,Normal
296 | 46.23639915,10.0627701,37,36.17362905,128.0636203,-5.100053328,Normal
297 | 46.42636614,6.620795049,48.09999999,39.80557109,130.3500956,2.449382401,Normal
298 | 39.65690201,16.20883944,36.67485694,23.44806258,131.922009,-4.968979881,Normal
299 | 45.57548229,18.75913544,33.77414297,26.81634684,116.7970069,3.131909921,Normal
300 | 66.50717865,20.89767207,31.72747138,45.60950658,128.9029049,1.517203356,Normal
301 | 82.90535054,29.89411893,58.25054221,53.01123161,110.7089577,6.079337831,Normal
302 | 50.67667667,6.461501271,35,44.2151754,116.5879699,-0.214710615,Normal
303 | 89.01487529,26.07598143,69.02125897,62.93889386,111.4810746,6.061508401,Normal
304 | 54.60031622,21.48897426,29.36021618,33.11134196,118.3433212,-1.471067262,Normal
305 | 34.38229939,2.062682882,32.39081996,32.31961651,128.3001991,-3.365515555,Normal
306 | 45.07545026,12.30695118,44.58317718,32.76849908,147.8946372,-8.941709421,Normal
307 | 47.90356517,13.61668819,36,34.28687698,117.4490622,-4.245395422,Normal
308 | 53.93674778,20.72149628,29.22053381,33.21525149,114.365845,-0.421010392,Normal
309 | 61.44659663,22.6949683,46.17034732,38.75162833,125.6707246,-2.707879517,Normal
310 | 45.25279209,8.693157364,41.5831264,36.55963472,118.5458418,0.214750167,Normal
311 | 33.84164075,5.073991409,36.64123294,28.76764934,123.9452436,-0.199249089,Normal
312 | 


--------------------------------------------------------------------------------
/Auto_ViML/Patient_data.csv:
--------------------------------------------------------------------------------
  1 | pelvic_incidence,pelvic_tilt numeric,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,class
  2 | 63.0278175,22.55258597,39.60911701,40.47523153,98.67291675,-0.254399986,Abnormal
  3 | 39.05695098,10.06099147,25.01537822,28.99595951,114.4054254,4.564258645,Abnormal
  4 | 68.83202098,22.21848205,50.09219357,46.61353893,105.9851355,-3.530317314,Abnormal
  5 | 69.29700807,24.65287791,44.31123813,44.64413017,101.8684951,11.21152344,Abnormal
  6 | 49.71285934,9.652074879,28.317406,40.06078446,108.1687249,7.918500615,Abnormal
  7 | 40.25019968,13.92190658,25.1249496,26.32829311,130.3278713,2.230651729,Abnormal
  8 | 53.43292815,15.86433612,37.16593387,37.56859203,120.5675233,5.988550702,Abnormal
  9 | 45.36675362,10.75561143,29.03834896,34.61114218,117.2700675,-10.67587083,Abnormal
 10 | 43.79019026,13.5337531,42.69081398,30.25643716,125.0028927,13.28901817,Abnormal
 11 | 36.68635286,5.010884121,41.9487509,31.67546874,84.24141517,0.664437117,Abnormal
 12 | 49.70660953,13.04097405,31.33450009,36.66563548,108.6482654,-7.825985755,Abnormal
 13 | 31.23238734,17.71581923,15.5,13.51656811,120.0553988,0.499751446,Abnormal
 14 | 48.91555137,19.96455616,40.26379358,28.95099521,119.321358,8.028894629,Abnormal
 15 | 53.5721702,20.46082824,33.1,33.11134196,110.9666978,7.044802938,Abnormal
 16 | 57.30022656,24.1888846,46.99999999,33.11134196,116.8065868,5.766946943,Abnormal
 17 | 44.31890674,12.53799164,36.098763,31.78091509,124.1158358,5.415825143,Abnormal
 18 | 63.83498162,20.36250706,54.55243367,43.47247456,112.3094915,-0.622526643,Abnormal
 19 | 31.27601184,3.14466948,32.56299592,28.13134236,129.0114183,3.623020073,Abnormal
 20 | 38.69791243,13.44474904,31,25.25316339,123.1592507,1.429185758,Abnormal
 21 | 41.72996308,12.25407408,30.12258646,29.475889,116.5857056,-1.244402488,Abnormal
 22 | 43.92283983,14.17795853,37.8325467,29.7448813,134.4610156,6.451647637,Abnormal
 23 | 54.91944259,21.06233245,42.19999999,33.85711014,125.2127163,2.432561437,Abnormal
 24 | 63.07361096,24.41380271,53.99999999,38.65980825,106.4243295,15.77969683,Abnormal
 25 | 45.54078988,13.06959759,30.29832059,32.47119229,117.9808303,-4.987129618,Abnormal
 26 | 36.12568347,22.75875277,29,13.3669307,115.5771163,-3.237562489,Abnormal
 27 | 54.12492019,26.65048856,35.32974693,27.47443163,121.447011,1.571204816,Abnormal
 28 | 26.14792141,10.75945357,14,15.38846783,125.2032956,-10.09310817,Abnormal
 29 | 43.58096394,16.5088837,46.99999999,27.07208024,109.271634,8.992815727,Abnormal
 30 | 44.5510115,21.93114655,26.78591597,22.61986495,111.0729197,2.652320636,Abnormal
 31 | 66.87921138,24.89199889,49.27859673,41.9872125,113.4770183,-2.005891748,Abnormal
 32 | 50.81926781,15.40221253,42.52893886,35.41705528,112.192804,10.86956554,Abnormal
 33 | 46.39026008,11.07904664,32.13655345,35.31121344,98.77454633,6.386831648,Abnormal
 34 | 44.93667457,17.44383762,27.78057555,27.49283695,117.9803245,5.569619587,Abnormal
 35 | 38.66325708,12.98644139,39.99999999,25.67681568,124.914118,2.703008052,Abnormal
 36 | 59.59554032,31.99824445,46.56025198,27.59729587,119.3303537,1.474285836,Abnormal
 37 | 31.48421834,7.82622134,24.28481815,23.657997,113.8331446,4.393080498,Abnormal
 38 | 32.09098679,6.989378081,35.99819848,25.10160871,132.264735,6.413427708,Abnormal
 39 | 35.70345781,19.44325311,20.7,16.26020471,137.5406125,-0.263489651,Abnormal
 40 | 55.84328595,28.84744756,47.69054322,26.99583839,123.3118449,2.812426855,Abnormal
 41 | 52.41938511,19.01156052,35.87265953,33.40782459,116.5597709,1.694705102,Abnormal
 42 | 35.49244617,11.7016723,15.59036345,23.79077387,106.9388517,-3.460357991,Abnormal
 43 | 46.44207842,8.39503589,29.0372302,38.04704253,115.4814047,2.045475795,Abnormal
 44 | 53.85479842,19.23064334,32.77905978,34.62415508,121.6709148,5.329843204,Abnormal
 45 | 66.28539377,26.32784484,47.49999999,39.95754893,121.2196839,-0.799624469,Abnormal
 46 | 56.03021778,16.2979149,62.27527456,39.73230287,114.0231172,-2.325683841,Abnormal
 47 | 50.91244034,23.01516931,46.99999999,27.89727103,117.4222591,-2.526701511,Abnormal
 48 | 48.332638,22.22778399,36.18199318,26.10485401,117.3846251,6.481709096,Abnormal
 49 | 41.35250407,16.57736351,30.70619135,24.77514057,113.2666746,-4.497957556,Abnormal
 50 | 40.55735663,17.97778407,34,22.57957256,121.0462458,-1.537383074,Abnormal
 51 | 41.76773173,17.89940172,20.0308863,23.86833001,118.3633889,2.062962549,Abnormal
 52 | 55.28585178,20.44011836,34,34.84573342,115.8770174,3.558372358,Abnormal
 53 | 74.43359316,41.55733141,27.7,32.87626175,107.9493045,5.000088788,Abnormal
 54 | 50.20966979,29.76012218,36.10400731,20.44954761,128.2925148,5.740614083,Abnormal
 55 | 30.14993632,11.91744524,34,18.23249108,112.6841408,11.46322327,Abnormal
 56 | 41.17167989,17.32120599,33.46940277,23.85047391,116.3778894,-9.569249858,Abnormal
 57 | 47.65772963,13.27738491,36.67998541,34.38034472,98.24978071,6.273012173,Abnormal
 58 | 43.34960621,7.467468964,28.06548279,35.88213725,112.7761866,5.753277458,Abnormal
 59 | 46.85578065,15.35151393,38,31.50426672,116.2509174,1.662705589,Abnormal
 60 | 43.20318499,19.66314572,35,23.54003927,124.8461088,-2.919075955,Abnormal
 61 | 48.10923638,14.93072472,35.56468278,33.17851166,124.0564518,7.947904861,Abnormal
 62 | 74.37767772,32.05310438,78.77201304,42.32457334,143.5606905,56.12590603,Abnormal
 63 | 89.68056731,32.70443487,83.13073216,56.97613244,129.9554764,92.02727682,Abnormal
 64 | 44.529051,9.433234213,51.99999999,35.09581679,134.7117723,29.10657504,Abnormal
 65 | 77.69057712,21.38064464,64.42944191,56.30993248,114.818751,26.93184095,Abnormal
 66 | 76.1472121,21.93618556,82.96150249,54.21102654,123.9320096,10.43197194,Abnormal
 67 | 83.93300857,41.28630543,61.99999999,42.64670314,115.012334,26.58810016,Abnormal
 68 | 78.49173027,22.1817978,59.99999999,56.30993248,118.5303266,27.38321314,Abnormal
 69 | 75.64973136,19.33979889,64.14868477,56.30993248,95.9036288,69.55130292,Abnormal
 70 | 72.07627839,18.94617604,50.99999999,53.13010236,114.2130126,1.01004051,Abnormal
 71 | 58.59952852,-0.261499046,51.49999999,58.86102756,102.0428116,28.05969711,Abnormal
 72 | 72.56070163,17.38519079,51.99999999,55.17551084,119.1937238,32.10853735,Abnormal
 73 | 86.90079431,32.9281677,47.79434664,53.97262661,135.0753635,101.7190919,Abnormal
 74 | 84.97413208,33.02117462,60.85987263,51.95295747,125.6595336,74.33340864,Abnormal
 75 | 55.512212,20.09515673,43.99999999,35.41705528,122.648753,34.55294641,Abnormal
 76 | 72.2223343,23.07771056,90.99999999,49.14462374,137.7366546,56.80409277,Abnormal
 77 | 70.22145219,39.82272448,68.11840309,30.39872771,148.5255624,145.3781432,Abnormal
 78 | 86.75360946,36.04301632,69.22104479,50.71059314,139.414504,110.8607824,Abnormal
 79 | 58.78254775,7.667044186,53.33894082,51.11550357,98.50115697,51.58412476,Abnormal
 80 | 67.41253785,17.44279712,60.14464036,49.96974073,111.12397,33.15764573,Abnormal
 81 | 47.74467877,12.08935067,38.99999999,35.6553281,117.5120039,21.68240136,Abnormal
 82 | 77.10657122,30.46999418,69.48062839,46.63657704,112.1516,70.75908308,Abnormal
 83 | 74.00554124,21.12240192,57.37950226,52.88313932,120.2059626,74.55516588,Abnormal
 84 | 88.62390839,29.08945331,47.56426247,59.53445508,121.7647796,51.80589921,Abnormal
 85 | 81.10410039,24.79416792,77.88702048,56.30993247,151.8398566,65.21461611,Abnormal
 86 | 76.32600187,42.39620445,57.19999999,33.92979742,124.267007,50.12745689,Abnormal
 87 | 45.44374959,9.906071798,44.99999999,35.53767779,163.0710405,20.31531532,Abnormal
 88 | 59.78526526,17.87932332,59.20646143,41.90594194,119.3191109,22.12386874,Abnormal
 89 | 44.91414916,10.21899563,44.63091389,34.69515353,130.0756599,37.36453993,Abnormal
 90 | 56.60577127,16.80020017,41.99999999,39.80557109,127.2945222,24.0185747,Abnormal
 91 | 71.18681115,23.89620111,43.6966651,47.29061004,119.8649383,27.28398451,Abnormal
 92 | 81.65603206,28.74886935,58.23282055,52.9071627,114.7698556,30.60914842,Abnormal
 93 | 70.95272771,20.15993121,62.85910914,50.7927965,116.1779325,32.522331,Abnormal
 94 | 85.35231529,15.84491006,71.66865979,69.50740523,124.4197875,76.0206034,Abnormal
 95 | 58.10193455,14.83763914,79.64983825,43.26429541,113.5876551,50.23787808,Abnormal
 96 | 94.17482232,15.38076983,67.70572132,78.79405249,114.8901128,53.25522004,Abnormal
 97 | 57.52235608,33.64707522,50.90985841,23.87528085,140.9817119,148.7537109,Abnormal
 98 | 96.65731511,19.46158117,90.21149828,77.19573393,120.6730408,64.08099841,Abnormal
 99 | 74.72074622,19.75694203,82.73535954,54.96380419,109.3565941,33.30606685,Abnormal
100 | 77.65511874,22.4329501,93.89277881,55.22216863,123.0557067,61.2111866,Abnormal
101 | 58.52162283,13.92228609,41.46785522,44.59933674,115.514798,30.3879839,Abnormal
102 | 84.5856071,30.36168482,65.47948563,54.22392228,108.0102185,25.11847846,Abnormal
103 | 79.93857026,18.7740711,63.31183486,61.16449915,114.787107,38.53874133,Abnormal
104 | 70.39930842,13.46998624,61.19999999,56.92932218,102.3375244,25.53842852,Abnormal
105 | 49.78212054,6.46680486,52.99999999,43.31531568,110.8647831,25.33564729,Abnormal
106 | 77.40933294,29.39654543,63.23230243,48.0127875,118.4507311,93.56373734,Abnormal
107 | 65.00796426,27.60260762,50.94751899,37.40535663,116.5811088,7.015977884,Abnormal
108 | 65.01377322,9.838262375,57.73583722,55.17551084,94.73852542,49.69695462,Abnormal
109 | 78.42595126,33.42595126,76.27743927,45,138.5541111,77.15517241,Abnormal
110 | 63.17298709,6.330910974,62.99999999,56.84207612,110.6440206,42.60807567,Abnormal
111 | 68.61300092,15.0822353,63.01469619,53.53076561,123.4311742,39.49798659,Abnormal
112 | 63.90063261,13.7062037,62.12433389,50.19442891,114.1292425,41.42282844,Abnormal
113 | 84.99895554,29.61009772,83.35219438,55.38885782,126.9129899,71.32117542,Abnormal
114 | 42.02138603,-6.554948347,67.89999999,48.57633437,111.5857819,27.33867086,Abnormal
115 | 69.75666532,19.27929659,48.49999999,50.47736873,96.49136982,51.1696403,Abnormal
116 | 80.98807441,36.84317181,86.96060151,44.1449026,141.0881494,85.87215224,Abnormal
117 | 129.8340406,8.404475005,48.38405705,121.4295656,107.690466,418.5430821,Abnormal
118 | 70.48410444,12.48948765,62.41714208,57.99461679,114.1900488,56.90244779,Abnormal
119 | 86.04127982,38.75066978,47.87140494,47.29061004,122.0929536,61.98827709,Abnormal
120 | 65.53600255,24.15748726,45.77516991,41.3785153,136.4403015,16.37808564,Abnormal
121 | 60.7538935,15.7538935,43.19915768,45,113.0533309,31.69354839,Abnormal
122 | 54.74177518,12.09507205,40.99999999,42.64670314,117.6432188,40.3823266,Abnormal
123 | 83.87994081,23.07742686,87.14151223,60.80251395,124.6460723,80.55560527,Abnormal
124 | 80.07491418,48.06953097,52.40343873,32.00538321,110.7099121,67.72731595,Abnormal
125 | 65.66534698,10.54067533,56.48913545,55.12467166,109.1627768,53.93202006,Abnormal
126 | 74.71722805,14.32167879,32.5,60.39554926,107.1822176,37.01708012,Abnormal
127 | 48.06062649,5.687032126,57.05716117,42.37359436,95.44375749,32.83587702,Abnormal
128 | 70.67689818,21.70440224,59.18116082,48.97249594,103.0083545,27.8101478,Abnormal
129 | 80.43342782,16.998479,66.53601753,63.43494882,116.4389807,57.78125,Abnormal
130 | 90.51396072,28.27250132,69.8139423,62.2414594,100.8921596,58.82364821,Abnormal
131 | 77.23689752,16.73762214,49.77553438,60.49927538,110.6903772,39.7871542,Abnormal
132 | 50.06678595,9.120340183,32.16846267,40.94644577,99.71245318,26.76669655,Abnormal
133 | 69.78100617,13.77746531,57.99999999,56.00354085,118.9306656,17.91456046,Abnormal
134 | 69.62628302,21.12275138,52.76659472,48.50353164,116.8030913,54.81686729,Abnormal
135 | 81.75441933,20.12346562,70.56044038,61.63095371,119.4250857,55.50688907,Abnormal
136 | 52.20469309,17.21267289,78.09496877,34.9920202,136.9725168,54.93913416,Abnormal
137 | 77.12134424,30.3498745,77.48108264,46.77146974,110.6111484,82.09360704,Abnormal
138 | 88.0244989,39.84466878,81.77447308,48.17983012,116.6015376,56.76608323,Abnormal
139 | 83.39660609,34.31098931,78.42329287,49.08561678,110.4665164,49.67209559,Abnormal
140 | 72.05403412,24.70073725,79.87401586,47.35329687,107.1723576,56.42615873,Abnormal
141 | 85.09550254,21.06989651,91.73479193,64.02560604,109.062312,38.03283108,Abnormal
142 | 69.56348614,15.4011391,74.43849743,54.16234705,105.0673556,29.70121083,Abnormal
143 | 89.5049473,48.90365265,72.0034229,40.60129465,134.6342912,118.3533701,Abnormal
144 | 85.29017283,18.27888963,100.7442198,67.0112832,110.6607005,58.88494802,Abnormal
145 | 60.62621697,20.5959577,64.53526221,40.03025927,117.2255542,104.8592474,Abnormal
146 | 60.04417717,14.30965614,58.03886519,45.73452103,105.1316639,30.40913315,Abnormal
147 | 85.64378664,42.68919513,78.7506635,42.95459151,105.1440758,42.88742577,Abnormal
148 | 85.58171024,30.45703858,78.23137949,55.12467166,114.8660487,68.37612182,Abnormal
149 | 55.08076562,-3.759929872,55.99999999,58.84069549,109.9153669,31.77358318,Abnormal
150 | 65.75567895,9.832874231,50.82289501,55.92280472,104.3949585,39.30721246,Abnormal
151 | 79.24967118,23.94482471,40.79669829,55.30484647,98.62251165,36.7063954,Abnormal
152 | 81.11260488,20.69044356,60.68700588,60.42216132,94.01878339,40.51098228,Abnormal
153 | 48.0306238,3.969814743,58.34451924,44.06080905,125.3509625,35.00007784,Abnormal
154 | 63.40448058,14.11532726,48.13680562,49.28915333,111.9160075,31.78449499,Abnormal
155 | 57.28694488,15.1493501,63.99999999,42.13759477,116.7353868,30.34120327,Abnormal
156 | 41.18776972,5.792973871,42.86739151,35.39479584,103.3488802,27.66027669,Abnormal
157 | 66.80479632,14.55160171,72.08491177,52.25319461,82.45603817,41.6854736,Abnormal
158 | 79.4769781,26.73226755,70.65098189,52.74471055,118.5886691,61.70059824,Abnormal
159 | 44.21646446,1.507074501,46.11033909,42.70938996,108.6295666,42.81048066,Abnormal
160 | 57.03509717,0.34572799,49.19800263,56.68936918,103.0486975,52.16514503,Abnormal
161 | 64.27481758,12.50864276,68.70237672,51.76617482,95.25245421,39.40982612,Abnormal
162 | 92.02630795,35.39267395,77.41696348,56.633634,115.72353,58.05754155,Abnormal
163 | 67.26314926,7.194661096,51.69688681,60.06848816,97.8010854,42.13694325,Abnormal
164 | 118.1446548,38.44950127,50.83851954,79.69515353,81.0245406,74.04376736,Abnormal
165 | 115.9232606,37.51543601,76.79999999,78.40782459,104.6986033,81.19892712,Abnormal
166 | 53.94165809,9.306594428,43.10049819,44.63506366,124.3978211,25.0821266,Abnormal
167 | 83.7031774,20.26822858,77.1105979,63.43494882,125.4801739,69.279571,Abnormal
168 | 56.99140382,6.87408897,57.00900516,50.11731485,109.978045,36.81011057,Abnormal
169 | 72.34359434,16.42078962,59.86901238,55.92280472,70.08257486,12.07264427,Abnormal
170 | 95.38259648,24.82263131,95.15763273,70.55996517,89.3075466,57.66084135,Abnormal
171 | 44.25347645,1.101086714,38,43.15238973,98.27410705,23.9106354,Abnormal
172 | 64.80954139,15.17407796,58.83999352,49.63546343,111.679961,21.40719845,Abnormal
173 | 78.40125389,14.04225971,79.69426258,64.35899418,104.7312342,12.39285327,Abnormal
174 | 56.66829282,13.45820343,43.76970978,43.21008939,93.69220863,21.10812135,Abnormal
175 | 50.82502875,9.064729049,56.29999999,41.7602997,78.99945411,23.04152435,Abnormal
176 | 61.41173702,25.38436364,39.09686927,36.02737339,103.4045971,21.84340688,Abnormal
177 | 56.56382381,8.961261611,52.57784639,47.6025622,98.77711506,50.70187326,Abnormal
178 | 67.02766447,13.28150221,66.15040334,53.74616226,100.7154129,33.98913551,Abnormal
179 | 80.81777144,19.23898066,61.64245116,61.57879078,89.47183446,44.167602,Abnormal
180 | 80.65431956,26.34437939,60.89811835,54.30994017,120.1034928,52.46755185,Abnormal
181 | 68.72190982,49.4318636,68.0560124,19.29004622,125.0185168,54.69128928,Abnormal
182 | 37.90391014,4.47909896,24.71027447,33.42481118,157.848799,33.60702661,Abnormal
183 | 64.62400798,15.22530262,67.63216653,49.39870535,90.298468,31.32641123,Abnormal
184 | 75.43774787,31.53945399,89.59999999,43.89829388,106.8295898,54.96578902,Abnormal
185 | 71.00194076,37.51577195,84.53709256,33.48616882,125.1642324,67.77118983,Abnormal
186 | 81.05661087,20.80149217,91.78449512,60.2551187,125.430176,38.18178176,Abnormal
187 | 91.46874146,24.50817744,84.62027202,66.96056402,117.3078968,52.62304673,Abnormal
188 | 81.08232025,21.25584028,78.76675639,59.82647997,90.07187999,49.159426,Abnormal
189 | 60.419932,5.265665422,59.8142356,55.15426658,109.0330745,30.26578534,Abnormal
190 | 85.68094951,38.65003527,82.68097744,47.03091424,120.8407069,61.95903428,Abnormal
191 | 82.4065243,29.27642195,77.05456489,53.13010235,117.0422439,62.76534831,Abnormal
192 | 43.7182623,9.811985315,51.99999999,33.90627699,88.43424213,40.88092253,Abnormal
193 | 86.472905,40.30376567,61.14101155,46.16913933,97.4041888,55.75222146,Abnormal
194 | 74.46908181,33.28315665,66.94210105,41.18592517,146.4660009,124.9844057,Abnormal
195 | 70.25043628,10.34012252,76.37007032,59.91031376,119.2370072,32.66650243,Abnormal
196 | 72.64385013,18.92911726,67.99999999,53.71473287,116.9634162,25.38424676,Abnormal
197 | 71.24176388,5.268270454,85.99958417,65.97349342,110.703107,38.2598637,Abnormal
198 | 63.7723908,12.76338484,65.36052425,51.00900596,89.82274067,55.99545386,Abnormal
199 | 58.82837872,37.57787321,125.7423855,21.25050551,135.6294176,117.3146829,Abnormal
200 | 74.85448008,13.90908417,62.69325884,60.9453959,115.2087008,33.17225512,Abnormal
201 | 75.29847847,16.67148361,61.29620362,58.62699486,118.8833881,31.57582292,Abnormal
202 | 63.36433898,20.02462134,67.49870507,43.33971763,130.9992576,37.55670552,Abnormal
203 | 67.51305267,33.2755899,96.28306169,34.23746278,145.6010328,88.30148594,Abnormal
204 | 76.31402766,41.93368293,93.2848628,34.38034472,132.2672855,101.2187828,Abnormal
205 | 73.63596236,9.711317947,62.99999999,63.92464442,98.72792982,26.97578722,Abnormal
206 | 56.53505139,14.37718927,44.99154663,42.15786212,101.7233343,25.77317356,Abnormal
207 | 80.11157156,33.94243223,85.10160773,46.16913933,125.5936237,100.2921068,Abnormal
208 | 95.48022873,46.55005318,58.99999999,48.93017555,96.68390337,77.28307195,Abnormal
209 | 74.09473084,18.82372712,76.03215571,55.27100372,128.4057314,73.38821617,Abnormal
210 | 87.67908663,20.36561331,93.82241589,67.31347333,120.9448288,76.73062904,Abnormal
211 | 48.25991962,16.41746236,36.32913708,31.84245726,94.88233607,28.34379914,Abnormal
212 | 38.50527283,16.96429691,35.11281407,21.54097592,127.6328747,7.986683227,Normal
213 | 54.92085752,18.96842952,51.60145541,35.952428,125.8466462,2.001642472,Normal
214 | 44.36249017,8.945434892,46.90209626,35.41705528,129.220682,4.994195288,Normal
215 | 48.3189305,17.45212105,47.99999999,30.86680945,128.9803079,-0.910940567,Normal
216 | 45.70178875,10.65985935,42.5778464,35.0419294,130.1783144,-3.38890999,Normal
217 | 30.74193812,13.35496594,35.90352597,17.38697218,142.4101072,-2.005372903,Normal
218 | 50.91310144,6.6769999,30.89652243,44.23610154,118.151531,-1.057985526,Normal
219 | 38.12658854,6.557617408,50.44507473,31.56897113,132.114805,6.338199339,Normal
220 | 51.62467183,15.96934373,35,35.6553281,129.385308,1.00922834,Normal
221 | 64.31186727,26.32836901,50.95896417,37.98349826,106.1777511,3.118221289,Normal
222 | 44.48927476,21.78643263,31.47415392,22.70284212,113.7784936,-0.284129366,Normal
223 | 54.9509702,5.865353416,52.99999999,49.08561678,126.9703283,-0.631602951,Normal
224 | 56.10377352,13.10630665,62.63701952,42.99746687,116.2285032,31.17276727,Normal
225 | 69.3988184,18.89840693,75.96636144,50.50041147,103.5825398,-0.44366081,Normal
226 | 89.83467631,22.63921678,90.56346144,67.19545953,100.5011917,3.040973261,Normal
227 | 59.72614016,7.724872599,55.34348527,52.00126756,125.1742214,3.235159224,Normal
228 | 63.95952166,16.06094486,63.12373633,47.8985768,142.3601245,6.298970934,Normal
229 | 61.54059876,19.67695713,52.89222856,41.86364163,118.6862678,4.815031084,Normal
230 | 38.04655072,8.30166942,26.23683004,29.7448813,123.8034132,3.885773488,Normal
231 | 43.43645061,10.09574326,36.03222439,33.34070735,137.4396942,-3.114450861,Normal
232 | 65.61180231,23.13791922,62.58217893,42.47388309,124.1280012,-4.083298414,Normal
233 | 53.91105429,12.93931796,38.99999999,40.97173633,118.1930354,5.074353176,Normal
234 | 43.11795103,13.81574355,40.34738779,29.30220748,128.5177217,0.970926407,Normal
235 | 40.6832291,9.148437195,31.02159252,31.53479191,139.1184721,-2.511618596,Normal
236 | 37.7319919,9.386298276,41.99999999,28.34569362,135.740926,13.68304672,Normal
237 | 63.92947003,19.97109671,40.17704963,43.95837332,113.0659387,-11.05817866,Normal
238 | 61.82162717,13.59710457,63.99999999,48.22452261,121.779803,1.296191194,Normal
239 | 62.14080535,13.96097523,57.99999999,48.17983012,133.2818339,4.955105669,Normal
240 | 69.00491277,13.29178975,55.5701429,55.71312302,126.6116215,10.83201105,Normal
241 | 56.44702568,19.44449915,43.5778464,37.00252653,139.1896903,-1.859688529,Normal
242 | 41.6469159,8.835549101,36.03197484,32.8113668,116.5551679,-6.054537956,Normal
243 | 51.52935759,13.51784732,35,38.01151027,126.7185156,13.92833085,Normal
244 | 39.08726449,5.536602477,26.93203835,33.55066201,131.5844199,-0.75946135,Normal
245 | 34.64992241,7.514782784,42.99999999,27.13513962,123.9877408,-4.082937601,Normal
246 | 63.02630005,27.33624023,51.60501665,35.69005983,114.5066078,7.439869802,Normal
247 | 47.80555887,10.68869819,53.99999999,37.11686068,125.3911378,-0.402523218,Normal
248 | 46.63786363,15.85371711,39.99999999,30.78414653,119.3776026,9.06458168,Normal
249 | 49.82813487,16.73643493,28,33.09169994,121.4355585,1.91330704,Normal
250 | 47.31964755,8.573680295,35.56025198,38.74596726,120.5769719,1.630663508,Normal
251 | 50.75329025,20.23505957,37,30.51823068,122.343516,2.288487746,Normal
252 | 36.15782981,-0.810514093,33.62731353,36.96834391,135.9369096,-2.092506504,Normal
253 | 40.74699612,1.835524271,49.99999999,38.91147185,139.2471502,0.668556793,Normal
254 | 42.91804052,-5.845994341,57.99999999,48.76403486,121.6068586,-3.362044654,Normal
255 | 63.79242525,21.34532339,65.99999999,42.44710185,119.5503909,12.38260373,Normal
256 | 72.95564397,19.57697146,61.00707117,53.37867251,111.2340468,0.813491154,Normal
257 | 67.53818154,14.65504222,58.00142908,52.88313932,123.6322597,25.9702063,Normal
258 | 54.75251965,9.752519649,47.99999999,45,123.0379985,8.235294118,Normal
259 | 50.16007802,-2.970024337,41.99999999,53.13010235,131.8024914,-8.290203373,Normal
260 | 40.34929637,10.19474845,37.96774659,30.15454792,128.0099272,0.458901373,Normal
261 | 63.61919213,16.93450781,49.34926218,46.68468432,117.0897469,-0.357811974,Normal
262 | 54.14240778,11.93511014,42.99999999,42.20729763,122.2090834,0.153549242,Normal
263 | 74.97602148,14.92170492,53.73007172,60.05431656,105.6453997,1.594747729,Normal
264 | 42.51727249,14.37567126,25.32356538,28.14160123,128.9056892,0.75702014,Normal
265 | 33.78884314,3.675109986,25.5,30.11373315,128.3253556,-1.776111234,Normal
266 | 54.5036853,6.819910138,46.99999999,47.68377516,111.7911722,-4.406769011,Normal
267 | 48.17074627,9.594216702,39.71092029,38.57652956,135.6233101,5.360050572,Normal
268 | 46.37408781,10.21590237,42.69999999,36.15818544,121.2476572,-0.54202201,Normal
269 | 52.86221391,9.410371613,46.98805181,43.4518423,123.0912395,1.856659161,Normal
270 | 57.1458515,16.48909145,42.84214764,40.65676005,113.8061775,5.0151857,Normal
271 | 37.14014978,16.48123972,24,20.65891006,125.0143609,7.366425398,Normal
272 | 51.31177106,8.875541276,56.99999999,42.43622979,126.4722584,-2.144043911,Normal
273 | 42.51561014,16.54121618,41.99999999,25.97439396,120.631941,7.876730692,Normal
274 | 39.35870531,7.011261806,37,32.3474435,117.8187599,1.904048199,Normal
275 | 35.8775708,1.112373561,43.45725694,34.76519724,126.9239062,-1.632238263,Normal
276 | 43.1919153,9.976663803,28.93814927,33.21525149,123.4674001,1.741017579,Normal
277 | 67.28971201,16.7175142,50.99999999,50.5721978,137.5917777,4.960343813,Normal
278 | 51.32546366,13.63122319,33.25857782,37.69424047,131.3061224,1.78886965,Normal
279 | 65.7563482,13.20692644,43.99999999,52.54942177,129.3935728,-1.982120038,Normal
280 | 40.41336566,-1.329412398,30.98276809,41.74277806,119.3356546,-6.173674823,Normal
281 | 48.80190855,18.01776202,51.99999999,30.78414653,139.1504066,10.44286169,Normal
282 | 50.08615264,13.43004422,34.45754051,36.65610842,119.1346221,3.089484465,Normal
283 | 64.26150724,14.49786554,43.90250363,49.76364169,115.3882683,5.951454368,Normal
284 | 53.68337998,13.44702168,41.58429713,40.23635831,113.9137026,2.737035292,Normal
285 | 48.99595771,13.11382047,51.87351997,35.88213725,126.3981876,0.535471617,Normal
286 | 59.16761171,14.56274875,43.19915768,44.60486296,121.0356423,2.830504124,Normal
287 | 67.80469442,16.55066167,43.25680184,51.25403274,119.6856451,4.867539941,Normal
288 | 61.73487533,17.11431203,46.89999999,44.6205633,120.9201997,3.087725997,Normal
289 | 33.04168754,-0.324678459,19.0710746,33.366366,120.3886112,9.354364925,Normal
290 | 74.56501543,15.72431994,58.61858244,58.84069549,105.417304,0.599247113,Normal
291 | 44.43070103,14.17426387,32.2434952,30.25643716,131.7176127,-3.604255336,Normal
292 | 36.42248549,13.87942449,20.24256187,22.543061,126.0768612,0.179717077,Normal
293 | 51.07983294,14.20993529,35.95122893,36.86989765,115.8037111,6.905089963,Normal
294 | 34.75673809,2.631739646,29.50438112,32.12499844,127.1398495,-0.460894198,Normal
295 | 48.90290434,5.587588658,55.49999999,43.31531568,137.1082886,19.85475919,Normal
296 | 46.23639915,10.0627701,37,36.17362905,128.0636203,-5.100053328,Normal
297 | 46.42636614,6.620795049,48.09999999,39.80557109,130.3500956,2.449382401,Normal
298 | 39.65690201,16.20883944,36.67485694,23.44806258,131.922009,-4.968979881,Normal
299 | 45.57548229,18.75913544,33.77414297,26.81634684,116.7970069,3.131909921,Normal
300 | 66.50717865,20.89767207,31.72747138,45.60950658,128.9029049,1.517203356,Normal
301 | 82.90535054,29.89411893,58.25054221,53.01123161,110.7089577,6.079337831,Normal
302 | 50.67667667,6.461501271,35,44.2151754,116.5879699,-0.214710615,Normal
303 | 89.01487529,26.07598143,69.02125897,62.93889386,111.4810746,6.061508401,Normal
304 | 54.60031622,21.48897426,29.36021618,33.11134196,118.3433212,-1.471067262,Normal
305 | 34.38229939,2.062682882,32.39081996,32.31961651,128.3001991,-3.365515555,Normal
306 | 45.07545026,12.30695118,44.58317718,32.76849908,147.8946372,-8.941709421,Normal
307 | 47.90356517,13.61668819,36,34.28687698,117.4490622,-4.245395422,Normal
308 | 53.93674778,20.72149628,29.22053381,33.21525149,114.365845,-0.421010392,Normal
309 | 61.44659663,22.6949683,46.17034732,38.75162833,125.6707246,-2.707879517,Normal
310 | 45.25279209,8.693157364,41.5831264,36.55963472,118.5458418,0.214750167,Normal
311 | 33.84164075,5.073991409,36.64123294,28.76764934,123.9452436,-0.199249089,Normal
312 | 


--------------------------------------------------------------------------------