├── Small_examples
    └── init
├── docker_deploy
    ├── app
    │   ├── __init__.py
    │   ├── requirements.txt
    │   ├── aditya_model1_adaboost.joblib
    │   ├── __pycache__
    │   │   ├── main.cpython-37.pyc
    │   │   ├── utils.cpython-37.pyc
    │   │   ├── constants.cpython-37.pyc
    │   │   └── data_model.cpython-37.pyc
    │   ├── data_model.py
    │   ├── constants.py
    │   ├── utils.py
    │   └── main.py
    ├── requirements.txt
    └── Dockerfile
├── unit testing
    ├── unit_test
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── test_encode_features.cpython-37-pytest-5.4.3.pyc
    │   │   ├── test_normalize_data.cpython-37-pytest-5.4.3.pyc
    │   │   ├── test_get_inference_data.cpython-37-pytest-5.4.3.pyc
    │   │   ├── test_apply_pre_proceesing.cpython-37-pytest-5.4.3.pyc
    │   │   └── test_apply_pre_processing.cpython-37-pytest-5.4.3.pyc
    │   ├── test_encode_features.py
    │   ├── test_apply_pre_proceesing.py
    │   ├── test_normalize_data.py
    │   └── test_get_inference_data.py
    ├── pytest_command.txt
    ├── aditya_model2_svm.joblib
    ├── aditya_model1_adaboost.joblib
    ├── __pycache__
    │   ├── main.cpython-37.pyc
    │   ├── utils.cpython-37.pyc
    │   └── constants.cpython-37.pyc
    ├── data_model.py
    ├── Data
    │   ├── inference_heart_disease.csv
    │   └── heart.csv
    ├── constants.py
    ├── utils.py
    ├── Heart Disease Inference Modularized.ipynb
    └── main.py
├── exception handling and logging
    ├── __init__.py
    ├── unit_test
    │   ├── __init__.py
    │   ├── test_encode_features.py
    │   ├── test_apply_pre_proceesing.py
    │   ├── test_get_inference_data.py
    │   └── test_normalize_data.py
    ├── aditya_model2_svm.joblib
    ├── aditya_model1_adaboost.joblib
    ├── __pycache__
    │   ├── utils.cpython-37.pyc
    │   └── constants.cpython-37.pyc
    ├── data_model.py
    ├── Data
    │   ├── inference_heart_disease.csv
    │   └── heart.csv
    ├── constants.py
    ├── inference_pipe_exec.log
    ├── main.py
    ├── utils.py
    ├── Heart Disease Inference Modularized.ipynb
    └── main_old.py
├── docker_commands.docx
├── docker_commands.pdf
├── python scripting
    ├── aditya_model2_svm.joblib
    ├── aditya_model1_adaboost.pkl
    ├── aditya_model1_adaboost.joblib
    ├── __pycache__
    │   ├── utils.cpython-37.pyc
    │   └── constants.cpython-37.pyc
    ├── data_model.py
    ├── Data
    │   ├── inference_heart_disease.csv
    │   └── heart.csv
    ├── constants.py
    ├── main.py
    ├── utils.py
    ├── Heart Disease Inference Modularized.ipynb
    └── Heart Disease Inference.py
├── uci_heart_disease
    ├── aditya_model2_svm.joblib
    ├── aditya_model1_adaboost.pkl
    ├── aditya_model1_adaboost.joblib
    ├── __pycache__
    │   ├── utils.cpython-37.pyc
    │   └── constants.cpython-37.pyc
    ├── constants.py
    ├── data_model.py
    ├── Data
    │   ├── inference_heart_disease.csv
    │   └── heart.csv
    ├── utils.py
    ├── main.py
    ├── Heart Disease Inference.py
    ├── Heart Disease Inference Modularized.ipynb
    └── Heart Disease Inference.ipynb
├── code linting
    ├── commands_pycodestyle.txt
    ├── Corrected
    │   ├── utils.py
    │   └── Heart Disease Inference.py
    ├── utils.py
    └── Heart Disease Inference.py
├── code documentation
    ├── commands_pycodestyle.txt
    ├── constants.py
    ├── Corrected
    │   ├── constants.py
    │   ├── utils.py
    │   ├── Heart Disease Inference Modularized.ipynb
    │   └── Heart Disease Inference.py
    ├── utils.py
    ├── Heart Disease Inference.py
    └── Heart Disease Inference Modularized.ipynb
├── Links.txt
├── fast api
    └── HelloWorldFAPI.py
├── env create.txt
├── docker_commands.txt
└── requirements.txt


/Small_examples/init:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/docker_deploy/app/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/unit testing/unit_test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/exception handling and logging/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/exception handling and logging/unit_test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docker_commands.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/docker_commands.docx


--------------------------------------------------------------------------------
/docker_commands.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/docker_commands.pdf


--------------------------------------------------------------------------------
/unit testing/pytest_command.txt:
--------------------------------------------------------------------------------
1 | $pytest -q
2 | $pytest -q unit_test\test_get_inference_data.py
3 | $tree /f # to view directory structure


--------------------------------------------------------------------------------
/unit testing/aditya_model2_svm.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/unit testing/aditya_model2_svm.joblib


--------------------------------------------------------------------------------
/docker_deploy/app/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | matplotlib
3 | numpy
4 | pandas
5 | pydantic
6 | scikit-learn
7 | scipy
8 | seaborn
9 | uvicorn[standard]


--------------------------------------------------------------------------------
/docker_deploy/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | matplotlib
3 | numpy
4 | pandas
5 | pydantic
6 | scikit-learn
7 | scipy
8 | seaborn
9 | "uvicorn[standard]"


--------------------------------------------------------------------------------
/python scripting/aditya_model2_svm.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/python scripting/aditya_model2_svm.joblib


--------------------------------------------------------------------------------
/python scripting/aditya_model1_adaboost.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/python scripting/aditya_model1_adaboost.pkl


--------------------------------------------------------------------------------
/uci_heart_disease/aditya_model2_svm.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/uci_heart_disease/aditya_model2_svm.joblib


--------------------------------------------------------------------------------
/unit testing/aditya_model1_adaboost.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/unit testing/aditya_model1_adaboost.joblib


--------------------------------------------------------------------------------
/python scripting/aditya_model1_adaboost.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/python scripting/aditya_model1_adaboost.joblib


--------------------------------------------------------------------------------
/uci_heart_disease/aditya_model1_adaboost.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/uci_heart_disease/aditya_model1_adaboost.pkl


--------------------------------------------------------------------------------
/unit testing/__pycache__/main.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/unit testing/__pycache__/main.cpython-37.pyc


--------------------------------------------------------------------------------
/unit testing/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/unit testing/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/docker_deploy/app/aditya_model1_adaboost.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/docker_deploy/app/aditya_model1_adaboost.joblib


--------------------------------------------------------------------------------
/uci_heart_disease/aditya_model1_adaboost.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/uci_heart_disease/aditya_model1_adaboost.joblib


--------------------------------------------------------------------------------
/code linting/commands_pycodestyle.txt:
--------------------------------------------------------------------------------
1 | pycodestyle --first file_name.py
2 | pycodestyle --show-source --show-pep8 file_name.py
3 | pycodestyle --statistics -qq file_name.py


--------------------------------------------------------------------------------
/docker_deploy/app/__pycache__/main.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/docker_deploy/app/__pycache__/main.cpython-37.pyc


--------------------------------------------------------------------------------
/docker_deploy/app/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/docker_deploy/app/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/python scripting/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/python scripting/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/uci_heart_disease/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/uci_heart_disease/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/unit testing/__pycache__/constants.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/unit testing/__pycache__/constants.cpython-37.pyc


--------------------------------------------------------------------------------
/code documentation/commands_pycodestyle.txt:
--------------------------------------------------------------------------------
1 | pycodestyle --first file_name.py
2 | pycodestyle --show-source --show-pep8 file_name.py
3 | pycodestyle --statistics -qq file_name.py


--------------------------------------------------------------------------------
/python scripting/__pycache__/constants.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/python scripting/__pycache__/constants.cpython-37.pyc


--------------------------------------------------------------------------------
/docker_deploy/app/__pycache__/constants.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/docker_deploy/app/__pycache__/constants.cpython-37.pyc


--------------------------------------------------------------------------------
/docker_deploy/app/__pycache__/data_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/docker_deploy/app/__pycache__/data_model.cpython-37.pyc


--------------------------------------------------------------------------------
/exception handling and logging/aditya_model2_svm.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/exception handling and logging/aditya_model2_svm.joblib


--------------------------------------------------------------------------------
/uci_heart_disease/__pycache__/constants.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/uci_heart_disease/__pycache__/constants.cpython-37.pyc


--------------------------------------------------------------------------------
/unit testing/unit_test/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/unit testing/unit_test/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/exception handling and logging/aditya_model1_adaboost.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/exception handling and logging/aditya_model1_adaboost.joblib


--------------------------------------------------------------------------------
/exception handling and logging/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/exception handling and logging/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/exception handling and logging/__pycache__/constants.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/exception handling and logging/__pycache__/constants.cpython-37.pyc


--------------------------------------------------------------------------------
/unit testing/unit_test/__pycache__/test_encode_features.cpython-37-pytest-5.4.3.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/unit testing/unit_test/__pycache__/test_encode_features.cpython-37-pytest-5.4.3.pyc


--------------------------------------------------------------------------------
/unit testing/unit_test/__pycache__/test_normalize_data.cpython-37-pytest-5.4.3.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/unit testing/unit_test/__pycache__/test_normalize_data.cpython-37-pytest-5.4.3.pyc


--------------------------------------------------------------------------------
/unit testing/unit_test/__pycache__/test_get_inference_data.cpython-37-pytest-5.4.3.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/unit testing/unit_test/__pycache__/test_get_inference_data.cpython-37-pytest-5.4.3.pyc


--------------------------------------------------------------------------------
/docker_deploy/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.7.7
 2 | 
 3 | COPY ./app /app
 4 | 
 5 | WORKDIR /app
 6 | 
 7 | RUN pip install -r requirements.txt
 8 | 
 9 | EXPOSE 8000
10 | 
11 | ENTRYPOINT ["uvicorn"]
12 | 
13 | CMD ["main:app", "--host", "0.0.0.0"]


--------------------------------------------------------------------------------
/unit testing/unit_test/__pycache__/test_apply_pre_proceesing.cpython-37-pytest-5.4.3.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/unit testing/unit_test/__pycache__/test_apply_pre_proceesing.cpython-37-pytest-5.4.3.pyc


--------------------------------------------------------------------------------
/unit testing/unit_test/__pycache__/test_apply_pre_processing.cpython-37-pytest-5.4.3.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ContentUpgrad/Pre-Requisite_MLOps/master/unit testing/unit_test/__pycache__/test_apply_pre_processing.cpython-37-pytest-5.4.3.pyc


--------------------------------------------------------------------------------
/Links.txt:
--------------------------------------------------------------------------------
1 | https://towardsdatascience.com/how-you-can-quickly-deploy-your-ml-models-with-fastapi-9428085a87bf
2 | https://towardsdatascience.com/how-to-deploy-a-machine-learning-model-with-fastapi-docker-and-github-actions-13374cbd638a
3 | https://hub.docker.com/_/hello-world
4 | https://docs.docker.com/desktop/


--------------------------------------------------------------------------------
/fast api/HelloWorldFAPI.py:
--------------------------------------------------------------------------------
 1 | # !pip install fastapi
 2 | # !pip install "uvicorn[standard]"
 3 | import uvicorn
 4 | from fastapi import FastAPI
 5 | 
 6 | app = FastAPI()
 7 | 
 8 | @app.get("/")
 9 | def home():
10 |     return {"Hello" : "World"}
11 | 
12 | if __name__ == "__main__":
13 |     uvicorn.run("HelloWorldFAPI:app")


--------------------------------------------------------------------------------
/unit testing/unit_test/test_encode_features.py:
--------------------------------------------------------------------------------
1 | # 1. Write test case to validate if length of 
2 | # ONE_HOT_ENCODED_FEATURES list is matching the 
3 | # number of columns of the 
4 | # input dataframe to encode_features()
5 | 
6 | 
7 | # 2. Write a test case to ensure all
8 | # categorical values are converted to
9 | # vector encodings


--------------------------------------------------------------------------------
/exception handling and logging/unit_test/test_encode_features.py:
--------------------------------------------------------------------------------
1 | # 1. Write test case to validate if length of 
2 | # ONE_HOT_ENCODED_FEATURES list is matching the 
3 | # number of columns of the 
4 | # input dataframe to encode_features()
5 | 
6 | 
7 | # 2. Write a test case to ensure all
8 | # categorical values are converted to
9 | # vector encodings


--------------------------------------------------------------------------------
/unit testing/unit_test/test_apply_pre_proceesing.py:
--------------------------------------------------------------------------------
 1 | # 1. Write a test case to check
 2 | # if the FEATURES_TO_ENCODE constant
 3 | # is not coming as null and
 4 | # all the list elements are
 5 | # string in data type
 6 | 
 7 | 
 8 | # 2. Write a test case to ensure that the shape of the 
 9 | # dataframes are consistent after one hot encoding
10 | # and data normlization


--------------------------------------------------------------------------------
/exception handling and logging/unit_test/test_apply_pre_proceesing.py:
--------------------------------------------------------------------------------
 1 | # 1. Write a test case to check
 2 | # if the FEATURES_TO_ENCODE constant
 3 | # is not coming as null and
 4 | # all the list elements are
 5 | # string in data type
 6 | 
 7 | 
 8 | # 2. Write a test case to ensure that the shape of the 
 9 | # dataframes are consistent after one hot encoding
10 | # and data normlization


--------------------------------------------------------------------------------
/unit testing/unit_test/test_normalize_data.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | warnings.filterwarnings("ignore")
 3 | from utils import *
 4 | import pandas as pd
 5 | 
 6 | class TestDataNormalization:
 7 |     def test_norm_data_len(self):
 8 |         inference_data, labels = get_inference_data()
 9 |         norm_df = normalize_data(inference_data)
10 |         assert len(inference_data) == len(norm_df), "length has changed after normalization"


--------------------------------------------------------------------------------
/unit testing/data_model.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | from typing import Optional
 3 | 
 4 | class InputDataModel(BaseModel):
 5 | 	age: float
 6 | 	sex: int
 7 | 	chest_pain_type: int
 8 | 	resting_bp: float
 9 | 	cholestoral: float
10 | 	fasting_blood_sugar: float
11 | 	restecg: int
12 | 	max_hr: float
13 | 	exang: int
14 | 	oldpeak: float
15 | 	slope: int
16 | 	num_major_vessels: int
17 | 	thal: int
18 | 
19 | class OutputDataModel(BaseModel):
20 |     predicted_value: bool
21 |     predicted_class: str
22 | 


--------------------------------------------------------------------------------
/docker_deploy/app/data_model.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | from typing import Optional
 3 | 
 4 | class InputDataModel(BaseModel):
 5 | 	age: float
 6 | 	sex: int
 7 | 	chest_pain_type: int
 8 | 	resting_bp: float
 9 | 	cholestoral: float
10 | 	fasting_blood_sugar: float
11 | 	restecg: int
12 | 	max_hr: float
13 | 	exang: int
14 | 	oldpeak: float
15 | 	slope: int
16 | 	num_major_vessels: int
17 | 	thal: int
18 | 	
19 | class OutputDataModel(BaseModel):
20 |     predicted_value: bool
21 |     predicted_class: str
22 | 


--------------------------------------------------------------------------------
/python scripting/data_model.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | from typing import Optional
 3 | 
 4 | class InputDataModel(BaseModel):
 5 | 	age: float
 6 | 	sex: int
 7 | 	chest_pain_type: int
 8 | 	resting_bp: float
 9 | 	cholestoral: float
10 | 	fasting_blood_sugar: float
11 | 	restecg: int
12 | 	max_hr: float
13 | 	exang: int
14 | 	oldpeak: float
15 | 	slope: int
16 | 	num_major_vessels: int
17 | 	thal: int
18 | 	
19 | class OutputDataModel(BaseModel):
20 |     predicted_value: bool
21 |     predicted_class: str
22 | 


--------------------------------------------------------------------------------
/exception handling and logging/data_model.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | from typing import Optional
 3 | 
 4 | class InputDataModel(BaseModel):
 5 | 	age: float
 6 | 	sex: int
 7 | 	chest_pain_type: int
 8 | 	resting_bp: float
 9 | 	cholestoral: float
10 | 	fasting_blood_sugar: float
11 | 	restecg: int
12 | 	max_hr: float
13 | 	exang: int
14 | 	oldpeak: float
15 | 	slope: int
16 | 	num_major_vessels: int
17 | 	thal: int
18 | 	
19 | class OutputDataModel(BaseModel):
20 |     predicted_value: bool
21 |     predicted_class: str
22 | 


--------------------------------------------------------------------------------
/uci_heart_disease/constants.py:
--------------------------------------------------------------------------------
 1 | ONE_HOT_ENCODED_FEATURES = ['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',
 2 |     'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_0', 'thal_1',
 3 |     'thal_2', 'thal_3', 'slope_0', 'slope_1', 'slope_2',
 4 |     'chest_pain_type_0', 'chest_pain_type_1', 'chest_pain_type_2',
 5 |     'chest_pain_type_3', 'restecg_0', 'restecg_1', 'restecg_2']
 6 | 
 7 | FEATURES_TO_ENCODE = ['thal', 'slope', 'chest_pain_type', 'restecg']
 8 | 
 9 | MODEL_NAME = 'aditya_model1_adaboost.joblib'
10 | 


--------------------------------------------------------------------------------
/uci_heart_disease/data_model.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | from typing import Optional
 3 | 
 4 | class InputDataModel(BaseModel):
 5 | 	age: float
 6 | 	sex: int
 7 | 	chest_pain_type: int
 8 | 	resting_bp: float
 9 | 	cholestoral: float
10 | 	fasting_blood_sugar: float
11 | 	restecg: int
12 | 	max_hr: float
13 | 	exang: int
14 | 	oldpeak: float
15 | 	slope: int
16 | 	num_major_vessels: int
17 | 	thal: int
18 | 	
19 | class OutputDataModel(BaseModel):
20 |     predicted_value: bool
21 |     predicted_class: str
22 | 


--------------------------------------------------------------------------------
/unit testing/unit_test/test_get_inference_data.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | warnings.filterwarnings("ignore")
 3 | from utils import *
 4 | import pandas as pd
 5 | 
 6 | 
 7 | class TestGetInferenceData:
 8 |     def test_method_output_type(self):
 9 |         df = pd.DataFrame()
10 |         inference_data, labels = get_inference_data()
11 |         assert type(inference_data)==type(df), "inference data should be a dataframe"
12 | 
13 |     def test_method_output_len(self):
14 |         inference_data, labels = get_inference_data()
15 |         assert len(inference_data) == len(labels), "length of data and labels are not matching"


--------------------------------------------------------------------------------
/exception handling and logging/unit_test/test_get_inference_data.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | warnings.filterwarnings("ignore")
 3 | from utils import *
 4 | import pandas as pd
 5 | 
 6 | 
 7 | class TestGetInferenceData:
 8 |     def test_method_output_type(self):
 9 |         df = pd.DataFrame()
10 |         inference_data, labels = get_inference_data()
11 |         assert type(inference_data)==type(df), "inference data should be a dataframe"
12 | 
13 |     def test_method_output_len(self):
14 |         inference_data, labels = get_inference_data()
15 |         assert len(inference_data) == len(labels), "length of data and labels are not matching"


--------------------------------------------------------------------------------
/docker_deploy/app/constants.py:
--------------------------------------------------------------------------------
 1 | MODEL_NAME = 'aditya_model1_adaboost.joblib'
 2 | ORIGINAL_FEATURES = ['age', 'sex', 'chest_pain_type', 'resting_bp', 'cholestoral',
 3 |        'fasting_blood_sugar', 'restecg', 'max_hr', 'exang', 'oldpeak', 'slope',
 4 |        'num_major_vessels', 'thal']
 5 | FEATURES_TO_ENCODE = ['thal', 'slope', 'chest_pain_type', 'restecg']
 6 | ONE_HOT_ENCODED_FEATURES = ['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',
 7 |        'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_0', 'thal_1',
 8 |        'thal_2', 'thal_3', 'slope_0', 'slope_1', 'slope_2',
 9 |        'chest_pain_type_0', 'chest_pain_type_1', 'chest_pain_type_2',
10 |        'chest_pain_type_3', 'restecg_0', 'restecg_1', 'restecg_2']


--------------------------------------------------------------------------------
/code documentation/constants.py:
--------------------------------------------------------------------------------
 1 | MODEL_NAME = 'aditya_model1_adaboost.joblib'
 2 | ORIGINAL_FEATURES = ['age', 'sex', 'chest_pain_type', 'resting_bp', 'cholestoral',
 3 |        'fasting_blood_sugar', 'restecg', 'max_hr', 'exang', 'oldpeak', 'slope',
 4 |        'num_major_vessels', 'thal']
 5 | FEATURES_TO_ENCODE = ['thal', 'slope', 'chest_pain_type', 'restecg']
 6 | ONE_HOT_ENCODED_FEATURES = ['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',
 7 |        'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_0', 'thal_1',
 8 |        'thal_2', 'thal_3', 'slope_0', 'slope_1', 'slope_2',
 9 |        'chest_pain_type_0', 'chest_pain_type_1', 'chest_pain_type_2',
10 |        'chest_pain_type_3', 'restecg_0', 'restecg_1', 'restecg_2']
11 | 


--------------------------------------------------------------------------------
/env create.txt:
--------------------------------------------------------------------------------
 1 | python -m venv deployment/ # create environment
 2 | deployment\Scripts\activate.bat   # activate environment
 3 | deactivate		   # deactivate environment
 4 | pip freeze > requirements.txt  # Requirement.txt
 5 | cat requirements.txt           # Requirement.txt
 6 | 
 7 | 
 8 | # Post Call Body
 9 | {
10 |   "age": 66,
11 |   "sex": 1,
12 |   "chest_pain_type": 0,
13 |   "resting_bp": 120,
14 |   "cholestoral": 302,
15 |   "fasting_blood_sugar": 0,
16 |   "restecg": 0,
17 |   "max_hr": 151,
18 |   "exang": 0,
19 |   "oldpeak": 0.4,
20 |   "slope": 1,
21 |   "num_major_vessels": 0,
22 |   "thal": 2
23 | }
24 | 
25 | =============
26 | Environment Variable
27 | DOCKER_HOST
28 | tcp://192.168.99.100:2376
29 | tcp://localhost:2375
30 | DOCKER_TLS_VERIFY
31 | 1


--------------------------------------------------------------------------------
/exception handling and logging/unit_test/test_normalize_data.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | import warnings
 3 | warnings.filterwarnings("ignore")
 4 | from utils import *
 5 | import pandas as pd
 6 | 
 7 | 
 8 | class TestDataNormalization:
 9 |     def test_norm_data_len(self):
10 |         inference_data, labels = get_inference_data()
11 |         norm_df = normalize_data(inference_data)
12 |         assert len(inference_data)==len(norm_df),"Length has changed after normalization"
13 | 
14 |     def test_norm_data_valuerange(self):
15 |         inference_data, labels = get_inference_data()
16 |         norm_df = normalize_data(inference_data)
17 |         assert int(norm_df.max().max()) <= 1,"Max value after normalization should be 1"
18 |         assert int(norm_df.min().min()) >= 0,"Min value after normalization should be 0"
19 |     '''


--------------------------------------------------------------------------------
/unit testing/Data/inference_heart_disease.csv:
--------------------------------------------------------------------------------
 1 | age,sex,chest_pain_type,resting_bp,cholestoral,fasting_blood_sugar,restecg,max_hr,exang,oldpeak,slope,num_major_vessels,thal,target
 2 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1
 3 | 52,1,0,112,230,0,1,160,0,0.0,2,1,2,0
 4 | 63,0,1,140,195,0,1,179,0,0.0,2,2,2,1
 5 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0
 6 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0
 7 | 41,1,2,130,214,0,0,168,0,2.0,1,0,2,1
 8 | 39,0,2,94,199,0,1,179,0,0.0,2,0,2,1
 9 | 48,1,0,130,256,1,0,150,1,0.0,2,2,3,0
10 | 57,1,0,110,335,0,1,143,1,3.0,1,1,3,0
11 | 47,1,2,138,257,0,0,156,0,0.0,2,0,2,1
12 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0
13 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
14 | 53,1,0,142,226,0,0,111,1,0.0,2,0,3,1
15 | 41,1,1,120,157,0,1,182,0,0.0,2,0,2,1
16 | 54,1,0,110,206,0,0,108,1,0.0,1,1,2,0
17 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1
18 | 42,1,0,140,226,0,1,178,0,0.0,2,0,2,1
19 | 29,1,1,130,204,0,0,202,0,0.0,2,0,2,1
20 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1
21 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
22 | 


--------------------------------------------------------------------------------
/python scripting/Data/inference_heart_disease.csv:
--------------------------------------------------------------------------------
 1 | age,sex,chest_pain_type,resting_bp,cholestoral,fasting_blood_sugar,restecg,max_hr,exang,oldpeak,slope,num_major_vessels,thal,target
 2 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1
 3 | 52,1,0,112,230,0,1,160,0,0.0,2,1,2,0
 4 | 63,0,1,140,195,0,1,179,0,0.0,2,2,2,1
 5 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0
 6 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0
 7 | 41,1,2,130,214,0,0,168,0,2.0,1,0,2,1
 8 | 39,0,2,94,199,0,1,179,0,0.0,2,0,2,1
 9 | 48,1,0,130,256,1,0,150,1,0.0,2,2,3,0
10 | 57,1,0,110,335,0,1,143,1,3.0,1,1,3,0
11 | 47,1,2,138,257,0,0,156,0,0.0,2,0,2,1
12 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0
13 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
14 | 53,1,0,142,226,0,0,111,1,0.0,2,0,3,1
15 | 41,1,1,120,157,0,1,182,0,0.0,2,0,2,1
16 | 54,1,0,110,206,0,0,108,1,0.0,1,1,2,0
17 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1
18 | 42,1,0,140,226,0,1,178,0,0.0,2,0,2,1
19 | 29,1,1,130,204,0,0,202,0,0.0,2,0,2,1
20 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1
21 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
22 | 


--------------------------------------------------------------------------------
/uci_heart_disease/Data/inference_heart_disease.csv:
--------------------------------------------------------------------------------
 1 | age,sex,chest_pain_type,resting_bp,cholestoral,fasting_blood_sugar,restecg,max_hr,exang,oldpeak,slope,num_major_vessels,thal,target
 2 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1
 3 | 52,1,0,112,230,0,1,160,0,0.0,2,1,2,0
 4 | 63,0,1,140,195,0,1,179,0,0.0,2,2,2,1
 5 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0
 6 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0
 7 | 41,1,2,130,214,0,0,168,0,2.0,1,0,2,1
 8 | 39,0,2,94,199,0,1,179,0,0.0,2,0,2,1
 9 | 48,1,0,130,256,1,0,150,1,0.0,2,2,3,0
10 | 57,1,0,110,335,0,1,143,1,3.0,1,1,3,0
11 | 47,1,2,138,257,0,0,156,0,0.0,2,0,2,1
12 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0
13 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
14 | 53,1,0,142,226,0,0,111,1,0.0,2,0,3,1
15 | 41,1,1,120,157,0,1,182,0,0.0,2,0,2,1
16 | 54,1,0,110,206,0,0,108,1,0.0,1,1,2,0
17 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1
18 | 42,1,0,140,226,0,1,178,0,0.0,2,0,2,1
19 | 29,1,1,130,204,0,0,202,0,0.0,2,0,2,1
20 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1
21 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
22 | 


--------------------------------------------------------------------------------
/exception handling and logging/Data/inference_heart_disease.csv:
--------------------------------------------------------------------------------
 1 | age,sex,chest_pain_type,resting_bp,cholestoral,fasting_blood_sugar,restecg,max_hr,exang,oldpeak,slope,num_major_vessels,thal,target
 2 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1
 3 | 52,1,0,112,230,0,1,160,0,0.0,2,1,2,0
 4 | 63,0,1,140,195,0,1,179,0,0.0,2,2,2,1
 5 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0
 6 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0
 7 | 41,1,2,130,214,0,0,168,0,2.0,1,0,2,1
 8 | 39,0,2,94,199,0,1,179,0,0.0,2,0,2,1
 9 | 48,1,0,130,256,1,0,150,1,0.0,2,2,3,0
10 | 57,1,0,110,335,0,1,143,1,3.0,1,1,3,0
11 | 47,1,2,138,257,0,0,156,0,0.0,2,0,2,1
12 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0
13 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
14 | 53,1,0,142,226,0,0,111,1,0.0,2,0,3,1
15 | 41,1,1,120,157,0,1,182,0,0.0,2,0,2,1
16 | 54,1,0,110,206,0,0,108,1,0.0,1,1,2,0
17 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1
18 | 42,1,0,140,226,0,1,178,0,0.0,2,0,2,1
19 | 29,1,1,130,204,0,0,202,0,0.0,2,0,2,1
20 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1
21 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
22 | 


--------------------------------------------------------------------------------
/unit testing/constants.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | constants.py contains all variables
 3 | where constant values are assigned
 4 | '''
 5 | 
 6 | # Latest model to be used in the inference process
 7 | MODEL_NAME = 'aditya_model1_adaboost.joblib'
 8 | # Column names of the source tabular data
 9 | ORIGINAL_FEATURES = ['age', 'sex', 'chest_pain_type', 'resting_bp', 'cholestoral',
10 |        'fasting_blood_sugar', 'restecg', 'max_hr', 'exang', 'oldpeak', 'slope',
11 |        'num_major_vessels', 'thal']
12 | # Categorical features that needs to be one hot encoded
13 | FEATURES_TO_ENCODE = ['thal', 'slope', 'chest_pain_type', 'restecg']
14 | # Feature names after one hot encoding
15 | ONE_HOT_ENCODED_FEATURES = ['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',
16 |        'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_0', 'thal_1',
17 |        'thal_2', 'thal_3', 'slope_0', 'slope_1', 'slope_2',
18 |        'chest_pain_type_0', 'chest_pain_type_1', 'chest_pain_type_2',
19 |        'chest_pain_type_3', 'restecg_0', 'restecg_1', 'restecg_2']
20 | 


--------------------------------------------------------------------------------
/python scripting/constants.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | constants.py contains all variables
 3 | where constant values are assigned
 4 | '''
 5 | 
 6 | # Latest model to be used in the inference process
 7 | MODEL_NAME = 'aditya_model1_adaboost.joblib'
 8 | # Column names of the source tabular data
 9 | ORIGINAL_FEATURES = ['age', 'sex', 'chest_pain_type', 'resting_bp', 'cholestoral',
10 |        'fasting_blood_sugar', 'restecg', 'max_hr', 'exang', 'oldpeak', 'slope',
11 |        'num_major_vessels', 'thal']
12 | # Categorical features that needs to be one hot encoded
13 | FEATURES_TO_ENCODE = ['thal', 'slope', 'chest_pain_type', 'restecg']
14 | # Feature names after one hot encoding
15 | ONE_HOT_ENCODED_FEATURES = ['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',
16 |        'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_0', 'thal_1',
17 |        'thal_2', 'thal_3', 'slope_0', 'slope_1', 'slope_2',
18 |        'chest_pain_type_0', 'chest_pain_type_1', 'chest_pain_type_2',
19 |        'chest_pain_type_3', 'restecg_0', 'restecg_1', 'restecg_2']
20 | 


--------------------------------------------------------------------------------
/code documentation/Corrected/constants.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | constants.py contains all variables
 3 | where constant values are assigned
 4 | '''
 5 | 
 6 | # Latest model to be used in the inference process
 7 | MODEL_NAME = 'aditya_model1_adaboost.joblib'
 8 | # Column names of the source tabular data
 9 | ORIGINAL_FEATURES = ['age', 'sex', 'chest_pain_type', 'resting_bp', 'cholestoral',
10 |        'fasting_blood_sugar', 'restecg', 'max_hr', 'exang', 'oldpeak', 'slope',
11 |        'num_major_vessels', 'thal']
12 | # Categorical features that needs to be one hot encoded
13 | FEATURES_TO_ENCODE = ['thal', 'slope', 'chest_pain_type', 'restecg']
14 | # Feature names after one hot encoding
15 | ONE_HOT_ENCODED_FEATURES = ['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',
16 |        'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_0', 'thal_1',
17 |        'thal_2', 'thal_3', 'slope_0', 'slope_1', 'slope_2',
18 |        'chest_pain_type_0', 'chest_pain_type_1', 'chest_pain_type_2',
19 |        'chest_pain_type_3', 'restecg_0', 'restecg_1', 'restecg_2']
20 | 


--------------------------------------------------------------------------------
/exception handling and logging/constants.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | constants.py contains all variables
 3 | where constant values are assigned
 4 | '''
 5 | 
 6 | # Latest model to be used in the inference process
 7 | MODEL_NAME = 'aditya_model1_adaboost.joblib'
 8 | # Column names of the source tabular data
 9 | ORIGINAL_FEATURES = ['age', 'sex', 'chest_pain_type', 'resting_bp', 'cholestoral',
10 |        'fasting_blood_sugar', 'restecg', 'max_hr', 'exang', 'oldpeak', 'slope',
11 |        'num_major_vessels', 'thal']
12 | # Categorical features that needs to be one hot encoded
13 | FEATURES_TO_ENCODE = ['thal', 'slope', 'chest_pain_type', 'restecg']
14 | # Feature names after one hot encoding
15 | ONE_HOT_ENCODED_FEATURES = ['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',
16 |        'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_0', 'thal_1',
17 |        'thal_2', 'thal_3', 'slope_0', 'slope_1', 'slope_2',
18 |        'chest_pain_type_0', 'chest_pain_type_1', 'chest_pain_type_2',
19 |        'chest_pain_type_3', 'restecg_0', 'restecg_1', 'restecg_2']
20 | 


--------------------------------------------------------------------------------
/docker_commands.txt:
--------------------------------------------------------------------------------
 1 | # Build Image
 2 | docker build -t fastapideploy .
 3 | 
 4 | # Run Container
 5 | docker run -d --name dockercontainer -p 80:8000 fastapideploy
 6 | 
 7 | # PS
 8 | docker ps    --- Show running containers
 9 | docker ps -a --- Show all containers
10 | 
11 | # Show Images
12 | docker images
13 | 
14 | # Remove
15 | docker rm dockercontainer -- remove container
16 | docker rmi fastapideploy -- remove image
17 | 
18 | # Docker Hub
19 | -- Create Repository from Hub
20 | 
21 | # Create Tag/Re-Tag Image
22 | docker tag fastapideploy adib0073/fastapi_ml_deploy:fastapideploy
23 | 
24 | # Commit Container
25 | docker commit dockercontainer adib0073/fastapi_ml_deploy:fastapideploy
26 | 
27 | # Push Container
28 | docker push adib0073/fastapi_ml_deploy:fastapideploy
29 | 
30 | # Pull Docker Images
31 | docker pull adib0073/fastapi_ml_deploy:fastapideploy
32 | 
33 | ============================================
34 | # Build a Docker image
35 | $ docker build -t [image_name]:[tag] .
36 | 
37 | # Run a Docker container specifying a name
38 | $ docker run --name [container_name] [image_name]:[tag]
39 |      $ docker run -p [host:host] [image_name]
40 | 
41 | # Fetch the logs of a container
42 | $ docker logs -f [container_id_or_name]
43 | 
44 | # Run a command in a running container
45 | $ docker exec -it [container_id_or_name] bash
46 | 
47 | # Show running containers
48 | $ docker ps
49 | 
50 | # Show all containers
51 | $ docker ps -a
52 | 
53 | # Show Docker images
54 | $ docker images
55 | 
56 | # Stop a Docker container
57 | $ docker stop [container_id_or_name]
58 | 
59 | # Remove a Docker container
60 | $ docker rm [container_id_or_name]
61 | 
62 | # Remove a Docker image
63 | $ docker rmi [image_id_or_name]


--------------------------------------------------------------------------------
/code documentation/utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn import preprocessing
 4 | from constants import *
 5 | 
 6 | 
 7 | def get_inference_data():
 8 |     data = pd.read_csv("Data/inference_heart_disease.csv") # Live connection to the database
 9 |     data.drop_duplicates(subset=None, inplace=True)
10 |     data.duplicated().any()
11 |     
12 |     return data[data.columns.drop('target')], data['target']
13 | 
14 | 
15 | def encode_features(df, features):
16 |     encoded_df = pd.DataFrame(columns= ONE_HOT_ENCODED_FEATURES)
17 |     placeholder_df = pd.DataFrame()
18 |     
19 |     for f in features:
20 |         if(f in df.columns):
21 |             encoded = pd.get_dummies(df[f])
22 |             encoded = encoded.add_prefix(f + '_')
23 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
24 |         else:
25 |             print('Feature not found')
26 |             return df
27 |     
28 |     for feature in encoded_df.columns:
29 |         if feature in df.columns:
30 |             encoded_df[feature] = df[feature]
31 |         if feature in placeholder_df.columns:
32 |             encoded_df[feature] = placeholder_df[feature]
33 |     encoded_df.fillna(0, inplace=True)
34 |     
35 |     return encoded_df
36 | 
37 | def normalize_data(df):
38 |     val = df.values 
39 |     min_max_normalizer = preprocessing.MinMaxScaler()
40 |     norm_val = min_max_normalizer.fit_transform(val)
41 |     df2 = pd.DataFrame(norm_val)
42 |     
43 |     return df2
44 | 
45 | def apply_pre_processing(data):
46 |     features_to_encode = FEATURES_TO_ENCODE 
47 |     encoded = encode_features(data, features_to_encode)
48 |     processed_data = normalize_data(encoded)
49 |     
50 |     return processed_data
51 | 


--------------------------------------------------------------------------------
/uci_heart_disease/utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn import preprocessing
 4 | from constants import *
 5 | 
 6 | 
 7 | def get_inference_data():
 8 |     data = pd.read_csv("Data/inference_heart_disease.csv") # Live connection to the database
 9 |     data.drop_duplicates(subset=None, inplace=True)
10 |     data.duplicated().any()
11 |     
12 |     return data[data.columns.drop('target')], data['target']
13 | 
14 | 
15 | def encode_features(df, features):
16 |     encoded_df = pd.DataFrame(columns= ONE_HOT_ENCODED_FEATURES) # from constants.py
17 |     placeholder_df = pd.DataFrame()
18 |     
19 |     for f in features:
20 |         if(f in df.columns):
21 |             encoded = pd.get_dummies(df[f])
22 |             encoded = encoded.add_prefix(f + '_')
23 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
24 |         else:
25 |             print('Feature not found')
26 |             return df
27 |     
28 |     for feature in encoded_df.columns:
29 |         if feature in df.columns:
30 |             encoded_df[feature] = df[feature]
31 |         if feature in placeholder_df.columns:
32 |             encoded_df[feature] = placeholder_df[feature]
33 |     encoded_df.fillna(0, inplace=True)
34 |     
35 |     return encoded_df
36 | 
37 | 
38 | def normalize_data(df):
39 |     val = df.values 
40 |     min_max_normalizer = preprocessing.MinMaxScaler()
41 |     norm_val = min_max_normalizer.fit_transform(val)
42 |     df2 = pd.DataFrame(norm_val)
43 |     
44 |     return df2
45 | 
46 | 
47 | def apply_pre_processing(data):
48 |     features_to_encode = FEATURES_TO_ENCODE 
49 |     encoded = encode_features(data, features_to_encode) 
50 |     processed_data = normalize_data(encoded)
51 |     
52 |     return processed_data
53 | 


--------------------------------------------------------------------------------
/docker_deploy/app/utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from sklearn import preprocessing
 4 | from constants import *
 5 | 
 6 | 
 7 | # apply same pre-processing and feature engineering techniques as applied during the training process
 8 | def encode_features(df, features):
 9 |     '''
10 |     Method for one-hot encoding all selected categorical fields
11 |     '''
12 |     # Implement these steps to prevent dimension mismatch during inference
13 |     encoded_df = pd.DataFrame(columns= ONE_HOT_ENCODED_FEATURES) # from constants.py
14 |     placeholder_df = pd.DataFrame()
15 |     
16 |     # One-Hot Encoding using get_dummies for the specified categorical features
17 |     for f in features:
18 |         if(f in df.columns):
19 |             encoded = pd.get_dummies(df[f])
20 |             encoded = encoded.add_prefix(f + '_')
21 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
22 |         else:
23 |             print('Feature not found')
24 |             return df
25 |     
26 |     # Implement these steps to prevent dimension mismatch during inference
27 |     for feature in encoded_df.columns:
28 |         if feature in df.columns:
29 |             encoded_df[feature] = df[feature]
30 |         if feature in placeholder_df.columns:
31 |             encoded_df[feature] = placeholder_df[feature]
32 |     # fill all null values
33 |     encoded_df.fillna(0, inplace=True)
34 |     
35 |     return encoded_df
36 | 
37 | def normalize_data(df):
38 |     val = df.values 
39 |     min_max_normalizer = preprocessing.MinMaxScaler()
40 |     norm_val = min_max_normalizer.fit_transform(val)
41 |     df2 = pd.DataFrame(norm_val)
42 |     
43 |     return df2
44 | 
45 | def apply_pre_processing(data):
46 |     features_to_encode = FEATURES_TO_ENCODE # from constants.py
47 |     encoded = encode_features(data, features_to_encode)
48 |     processed_data = normalize_data(encoded)
49 |     return processed_data


--------------------------------------------------------------------------------
/exception handling and logging/inference_pipe_exec.log:
--------------------------------------------------------------------------------
 1 | 2022-06-21 15:00:28,277 Started execution. Fetching data now ...
 2 | 2022-06-21 15:00:28,281 Encountered error. Please check.
 3 | 2022-06-21 15:00:28,281 [Errno 2] No such file or directory: 'Data/heart.csv'
 4 | 2022-06-21 15:01:00,426 Started execution. Fetching data now ...
 5 | 2022-06-21 15:01:00,450 Data fetched. Applying pre-processing now ...
 6 | 2022-06-21 15:01:00,466 Pre-processing is completed. Loading trained model now ...
 7 | 2022-06-21 15:01:00,634 Trained model is loaded. Executing trained model on inference data ...
 8 | 2022-06-21 15:01:00,650 Execution is complete.
 9 | 2022-07-03 14:04:07,785 Started execution. Fetching data now ...
10 | 2022-07-03 14:04:07,801 Data fetched. Applying pre-processing now ...
11 | 2022-07-03 14:04:07,850 Pre-processing is completed. Loading trained model now ...
12 | 2022-07-03 14:04:08,061 Trained model is loaded. Executing trained model on inference data ...
13 | 2022-07-03 14:04:08,077 Execution is complete.
14 | 2022-07-04 19:36:21,404 Started execution. Fetching data now ...
15 | 2022-07-04 19:36:21,413 Data fetched. Applying pre-processing now ...
16 | 2022-07-04 19:36:21,424 Pre-processing is completed. Loading trained model now ...
17 | 2022-07-04 19:36:21,634 Trained model is loaded. Executing trained model on inference data ...
18 | 2022-07-04 19:36:21,648 Execution is complete.
19 | 2022-07-04 19:39:06,750 Started execution. Fetching data now ...
20 | 2022-07-04 19:39:06,760 Data fetched. Applying pre-processing now ...
21 | 2022-07-04 19:39:06,773 Pre-processing is completed. Loading trained model now ...
22 | 2022-07-04 19:39:06,988 Trained model is loaded. Executing trained model on inference data ...
23 | 2022-07-04 19:39:07,004 Execution is complete.
24 | 2022-07-04 19:41:07,090 Started execution. Fetching data now ...
25 | 2022-07-04 19:41:07,091 Encountered error. Please check.
26 | 2022-07-04 19:41:07,091 [Errno 2] No such file or directory: 'Data/inference_heart_disease.csv'
27 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | anyio==3.3.4
 2 | argcomplete==1.12.3
 3 | argon2-cffi==21.1.0
 4 | asgiref==3.4.1
 5 | atomicwrites==1.4.0
 6 | attrs==21.2.0
 7 | backcall==0.2.0
 8 | bleach==4.1.0
 9 | cffi==1.15.0
10 | click==8.0.3
11 | colorama==0.4.4
12 | cvxopt==1.2.7
13 | cvxpy==1.1.15
14 | cycler==0.10.0
15 | debugpy==1.5.1
16 | decorator==5.1.0
17 | defusedxml==0.7.1
18 | ecos==2.0.7.post1
19 | entrypoints==0.3
20 | fancyimpute==0.7.0
21 | fastapi==0.70.0
22 | h11==0.12.0
23 | httptools==0.2.0
24 | idna==3.3
25 | importlib-metadata==4.8.1
26 | iniconfig==1.1.1
27 | ipykernel==6.4.2
28 | ipython==7.28.0
29 | ipython-genutils==0.2.0
30 | jedi==0.18.0
31 | Jinja2==3.0.2
32 | joblib==1.1.0
33 | jsonschema==4.1.2
34 | jupyter-client==7.0.6
35 | jupyter-core==4.8.1
36 | jupyterlab-pygments==0.1.2
37 | kiwisolver==1.3.2
38 | knnimpute==0.1.0
39 | MarkupSafe==2.0.1
40 | matplotlib==3.4.3
41 | matplotlib-inline==0.1.3
42 | mistune==0.8.4
43 | nbclient==0.5.4
44 | nbconvert==6.2.0
45 | nbformat==5.1.3
46 | nest-asyncio==1.5.1
47 | nose==1.3.7
48 | notebook==6.4.5
49 | numpy==1.21.3
50 | osqp==0.6.2.post0
51 | packaging==21.0
52 | pandas==1.3.4
53 | pandocfilters==1.5.0
54 | parso==0.8.2
55 | pickleshare==0.7.5
56 | Pillow==8.4.0
57 | pluggy==1.0.0
58 | prometheus-client==0.11.0
59 | prompt-toolkit==3.0.21
60 | py==1.10.0
61 | pycparser==2.20
62 | pydantic==1.8.2
63 | Pygments==2.10.0
64 | pyparsing==2.4.7
65 | pyrsistent==0.18.0
66 | pytest==6.2.5
67 | python-dateutil==2.8.2
68 | python-dotenv==0.19.1
69 | pytz==2021.3
70 | pywin32==302
71 | pywinpty==1.1.4
72 | PyYAML==6.0
73 | pyzmq==22.3.0
74 | qdldl==0.1.5.post0
75 | scikit-learn==1.0
76 | scipy==1.7.1
77 | scs==2.1.4
78 | seaborn==0.11.2
79 | Send2Trash==1.8.0
80 | six==1.16.0
81 | sniffio==1.2.0
82 | starlette==0.16.0
83 | terminado==0.12.1
84 | testpath==0.5.0
85 | threadpoolctl==3.0.0
86 | toml==0.10.2
87 | tornado==6.1
88 | traitlets==5.1.0
89 | typing-extensions==3.10.0.2
90 | uvicorn==0.15.0
91 | watchgod==0.7
92 | wcwidth==0.2.5
93 | webencodings==0.5.1
94 | websockets==10.0
95 | zipp==3.6.0
96 | 


--------------------------------------------------------------------------------
/python scripting/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | # ## Heart Disease Inference
 5 | # In this script, we will try to look at
 6 | # the inference part of the heart disease classification solution
 7 | 
 8 | # ### Import Modules
 9 | import warnings
10 | warnings.filterwarnings("ignore")
11 | import pandas as pd
12 | import numpy as np
13 | import joblib
14 | from sklearn.metrics import accuracy_score
15 | from utils import *
16 | from constants import *
17 | import argparse
18 | 
19 | 
20 | # main function - starting point of the code
21 | def main(model_name):
22 |     '''
23 |     main function - starting point of the code
24 |     input: model name taken as input 
25 |     (default: adaboost model)
26 |     output: no return value, only prints outcome
27 |     '''
28 |     print("Starting execution of the inference code...")
29 |     # in real-time use cases, this code should be replaced with live flowing data
30 |     # use get_inference_data() from utils.py to fetch inference data
31 |     inference_data, labels = get_inference_data()
32 |     # use apply_pre_processing() from utils.py 
33 |     # to apply necessary preprocessing as applied for training data
34 |     processed_inference_data = apply_pre_processing(inference_data)
35 |     # ### Load Saved Model
36 |     model = joblib.load(model_name)
37 |     # ### Prediction on inference data
38 |     model.predict(processed_inference_data)
39 |     # ### Scoring check on prediction
40 |     print("Checking inference accuracy:")
41 |     print(accuracy_score(labels, model.predict(processed_inference_data)))
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     parser = argparse.ArgumentParser(description='Running inference pipeline')
46 |     parser.add_argument('--model',
47 |                         default='adaboost',
48 |                         help='select algorithm: svm or adaboost')
49 |     args = parser.parse_args()
50 |     print(f"Selected algorithm: {args.model}")
51 |     if(args.model == 'svm'):
52 |         model_name = 'aditya_model2_svm.joblib'
53 |     else:
54 |         model_name = 'aditya_model1_adaboost.joblib'
55 |     main(model_name)
56 | 


--------------------------------------------------------------------------------
/docker_deploy/app/main.py:
--------------------------------------------------------------------------------
 1 | import joblib
 2 | from fastapi import FastAPI
 3 | from constants import *
 4 | from utils import *
 5 | from data_model import *
 6 | 
 7 | 
 8 | app = FastAPI()
 9 | model = joblib.load(MODEL_NAME) # Load from constants
10 | 
11 | def apply_model(model, inference_data):
12 | 	# Prepare prediction dataframe
13 | 	inf_df = pd.DataFrame([[inference_data.age, 
14 | 						   inference_data.sex, 
15 | 						   inference_data.chest_pain_type, 
16 | 						   inference_data.resting_bp,
17 | 						   inference_data.cholestoral,
18 | 						   inference_data.fasting_blood_sugar,
19 | 						   inference_data.restecg,
20 | 						   inference_data.max_hr,
21 | 						   inference_data.exang,
22 | 						   inference_data.oldpeak,
23 | 						   inference_data.slope,
24 | 						   inference_data.num_major_vessels,
25 | 						   inference_data.thal]], columns = ORIGINAL_FEATURES)
26 | 	inf_df[ORIGINAL_FEATURES[0]] = inference_data.age
27 | 	inf_df[ORIGINAL_FEATURES[1]] = inference_data.sex
28 | 	inf_df[ORIGINAL_FEATURES[2]] = inference_data.chest_pain_type
29 | 	inf_df[ORIGINAL_FEATURES[3]] = inference_data.resting_bp
30 | 	inf_df[ORIGINAL_FEATURES[4]] = inference_data.cholestoral
31 | 	inf_df[ORIGINAL_FEATURES[5]] = inference_data.fasting_blood_sugar
32 | 	inf_df[ORIGINAL_FEATURES[6]] = inference_data.restecg
33 | 	inf_df[ORIGINAL_FEATURES[7]] = inference_data.max_hr
34 | 	inf_df[ORIGINAL_FEATURES[8]] = inference_data.exang
35 | 	inf_df[ORIGINAL_FEATURES[9]] = inference_data.oldpeak
36 | 	inf_df[ORIGINAL_FEATURES[10]] = inference_data.slope
37 | 	inf_df[ORIGINAL_FEATURES[11]] = inference_data.num_major_vessels
38 | 	inf_df[ORIGINAL_FEATURES[12]] = inference_data.thal
39 | 	
40 | 	processed_inference_data = apply_pre_processing(inf_df)
41 | 	pred = model.predict(processed_inference_data)[0]
42 | 	
43 | 	if pred == 1:
44 | 		pred_value = True
45 | 		pred_class = "heart disease"
46 | 	else:
47 | 		pred_value = False
48 | 		pred_class = "No heart disease"
49 | 
50 | 	return pred_value, pred_class
51 | 
52 | @app.get('/')
53 | def get_root():
54 | 
55 | 	return {'message': 'Welcome to the Heart Disease Detection API'}
56 | 	
57 | @app.post("/predict", response_model=OutputDataModel)
58 | async def post_predictions(inference_data: InputDataModel):
59 | 
60 |     pred_value, pred_class = apply_model(model, inference_data)
61 | 
62 |     response = {
63 |         "predicted_value": pred_value,
64 |         "predicted_class": pred_class
65 |     }
66 |     return response


--------------------------------------------------------------------------------
/uci_heart_disease/main.py:
--------------------------------------------------------------------------------
 1 | import joblib
 2 | from fastapi import FastAPI
 3 | from constants import *
 4 | from utils import *
 5 | from data_model import *
 6 | 
 7 | 
 8 | app = FastAPI()
 9 | model = joblib.load(MODEL_NAME) # Load from constants
10 | 
11 | def apply_model(model, inference_data):
12 | 	# Prepare prediction dataframe
13 | 	inf_df = pd.DataFrame([[inference_data.age, 
14 | 						   inference_data.sex, 
15 | 						   inference_data.chest_pain_type, 
16 | 						   inference_data.resting_bp,
17 | 						   inference_data.cholestoral,
18 | 						   inference_data.fasting_blood_sugar,
19 | 						   inference_data.restecg,
20 | 						   inference_data.max_hr,
21 | 						   inference_data.exang,
22 | 						   inference_data.oldpeak,
23 | 						   inference_data.slope,
24 | 						   inference_data.num_major_vessels,
25 | 						   inference_data.thal]], columns = ORIGINAL_FEATURES)
26 | 	inf_df[ORIGINAL_FEATURES[0]] = inference_data.age
27 | 	inf_df[ORIGINAL_FEATURES[1]] = inference_data.sex
28 | 	inf_df[ORIGINAL_FEATURES[2]] = inference_data.chest_pain_type
29 | 	inf_df[ORIGINAL_FEATURES[3]] = inference_data.resting_bp
30 | 	inf_df[ORIGINAL_FEATURES[4]] = inference_data.cholestoral
31 | 	inf_df[ORIGINAL_FEATURES[5]] = inference_data.fasting_blood_sugar
32 | 	inf_df[ORIGINAL_FEATURES[6]] = inference_data.restecg
33 | 	inf_df[ORIGINAL_FEATURES[7]] = inference_data.max_hr
34 | 	inf_df[ORIGINAL_FEATURES[8]] = inference_data.exang
35 | 	inf_df[ORIGINAL_FEATURES[9]] = inference_data.oldpeak
36 | 	inf_df[ORIGINAL_FEATURES[10]] = inference_data.slope
37 | 	inf_df[ORIGINAL_FEATURES[11]] = inference_data.num_major_vessels
38 | 	inf_df[ORIGINAL_FEATURES[12]] = inference_data.thal
39 | 	
40 | 	processed_inference_data = apply_pre_processing(inf_df)
41 | 	pred = model.predict(processed_inference_data)[0]
42 | 	
43 | 	if pred == 1:
44 | 		pred_value = True
45 | 		pred_class = "heart disease"
46 | 	else:
47 | 		pred_value = False
48 | 		pred_class = "No heart disease"
49 | 
50 | 	return pred_value, pred_class
51 | 
52 | @app.get('/')
53 | def get_root():
54 | 
55 | 	return {'message': 'Welcome to the Heart Disease Detection API'}
56 | 
57 | @app.post("/predict", response_model=OutputDataModel)
58 | async def post_predictions(inference_data: InputDataModel):
59 | 
60 |     pred_value, pred_class = apply_model(model, inference_data)
61 | 
62 |     response = {
63 |         "predicted_value": pred_value,
64 |         "predicted_class": pred_class
65 |     }
66 |     return response
67 | 


--------------------------------------------------------------------------------
/exception handling and logging/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | # ## Heart Disease Inference
 5 | # In this script, we will try to look at
 6 | # the inference part of the heart disease classification solution
 7 | 
 8 | # ### Import Modules
 9 | import warnings
10 | warnings.filterwarnings("ignore")
11 | import pandas as pd
12 | import numpy as np
13 | import joblib
14 | from sklearn.metrics import accuracy_score
15 | from utils import *
16 | from constants import *
17 | import argparse
18 | import logging
19 | 
20 | 
21 | # main function - starting point of the code
22 | def main(model_name, logger):
23 |     '''
24 |     main function - starting point of the code
25 |     input: model name taken as input 
26 |     (default: adaboost model)
27 |     output: no return value, only prints outcome
28 |     '''
29 |     try:
30 |         print("Starting execution of the inference code...")
31 |         logger.info("Started execution. Fetching data now ...")
32 |         # in real-time use cases, this code should be replaced with live flowing data
33 |         # use get_inference_data() from utils.py to fetch inference data
34 |         inference_data, labels = get_inference_data()
35 |         logger.info("Data fetched. Applying pre-processing now ...")
36 |         # use apply_pre_processing() from utils.py 
37 |         # to apply necessary preprocessing as applied for training data
38 |         processed_inference_data = apply_pre_processing(inference_data)
39 |         logger.info("Pre-processing is completed. Loading trained model now ...")
40 |         # ### Load Saved Model
41 |         model = joblib.load(model_name)
42 |         logger.info("Trained model is loaded. Executing trained model on inference data ...")
43 |         # ### Prediction on inference data
44 |         model.predict(processed_inference_data)
45 |         # ### Scoring check on prediction
46 |         print("Checking inference accuracy:")
47 |         print(accuracy_score(labels, model.predict(processed_inference_data)))
48 |         logger.info("Execution is complete.")
49 |     except Exception as e:
50 |         print("--------Error!!!--------")
51 |         logger.error("Encountered error. Please check.")
52 |         logger.error(e)
53 |         print(e)
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     # Create and configure logger
58 |     logging.basicConfig(filename="inference_pipe_exec.log",
59 |                     format='%(asctime)s %(message)s',
60 |                     filemode='a')
61 | 
62 |     # Creating an object
63 |     logger = logging.getLogger()
64 | 
65 |     # Setting the threshold of logger to DEBUG
66 |     logger.setLevel(logging.DEBUG)
67 | 
68 |     parser = argparse.ArgumentParser(description='Running inference pipeline')
69 |     parser.add_argument('--model',
70 |                         default='adaboost',
71 |                         help='select algorithm: svm or adaboost')
72 |     args = parser.parse_args()
73 |     print(f"Selected algorithm: {args.model}")
74 |     if(args.model == 'svm'):
75 |         model_name = 'aditya_model2_svm.joblib'
76 |     else:
77 |         model_name = 'aditya_model1_adaboost.joblib'
78 |     main(model_name, logger)
79 | 


--------------------------------------------------------------------------------
/code linting/Corrected/utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Utils.py contains all utility functions
 3 | used during the inference process
 4 | '''
 5 | 
 6 | import pandas as pd
 7 | import numpy as np
 8 | from sklearn import preprocessing
 9 | from constants import *
10 | 
11 | 
12 | def get_inference_data():
13 |     '''
14 |     Method for loading inference data
15 |     Input: No input
16 |     Output: Returns inference data features and labels
17 |     Example usage: inference_data, labels = get_inference_data()
18 |     '''
19 |     # Live connection to the database
20 |     data = pd.read_csv("Data/inference_heart_disease.csv")
21 |     data.drop_duplicates(subset=None, inplace=True)
22 |     data.duplicated().any()
23 |     return data[data.columns.drop('target')], data['target']
24 | 
25 | 
26 | # apply same pre-processing and feature engineering techniques
27 | # as applied during the training process
28 | def encode_features(df, features):
29 |     '''
30 |     Method for one-hot encoding all selected categorical fields
31 |     Input: The method takes pandas dataframe and
32 |     list of the feature names as input
33 |     Output: Returns a dataframe with one-hot encoded features
34 |     Example usage:
35 |     one_hot_encoded_df = encode_features(dataframe, list_features_to_encode)
36 |     '''
37 |     # Implement these steps to prevent dimension mismatch during inference
38 |     # from constants.py
39 |     encoded_df = pd.DataFrame(columns=ONE_HOT_ENCODED_FEATURES)
40 |     placeholder_df = pd.DataFrame()
41 |     # One-Hot Encoding using get_dummies for the specified categorical features
42 |     for f in features:
43 |         if(f in df.columns):
44 |             encoded = pd.get_dummies(df[f])
45 |             encoded = encoded.add_prefix(f + '_')
46 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
47 |         else:
48 |             print('Feature not found')
49 |             return df
50 |     # Implement these steps to prevent dimension mismatch during inference
51 |     for feature in encoded_df.columns:
52 |         if feature in df.columns:
53 |             encoded_df[feature] = df[feature]
54 |         if feature in placeholder_df.columns:
55 |             encoded_df[feature] = placeholder_df[feature]
56 |     # fill all null values
57 |     encoded_df.fillna(0, inplace=True)
58 |     return encoded_df
59 | 
60 | 
61 | def normalize_data(df):
62 |     '''
63 |     Normalize data using Min-Max Scaler
64 |     Input: The method takes pandas dataframe as input
65 |     Output: Returns a dataframe with normalized features
66 |     Example usage:
67 |     normalized_df = normalize_data(df)
68 |     '''
69 |     values = df.values
70 |     min_max_normalizer = preprocessing.MinMaxScaler()
71 |     norm_val = min_max_normalizer.fit_transform(values)
72 |     norm_df = pd.DataFrame(norm_val)
73 |     return norm_df
74 | 
75 | 
76 | def apply_pre_processing(data):
77 |     '''
78 |     Apply all pre-processing methods together
79 |     Input: The method takes the inference data as pandas dataframe as input
80 |     Output: Returns a dataframe after applying all preprocessing steps
81 |     Example usage:
82 |     processed_data = apply_pre_processing(df)
83 |     '''
84 |     features_to_encode = FEATURES_TO_ENCODE  # from constants.py
85 |     # applying encoded features function
86 |     encoded = encode_features(data, features_to_encode)
87 |     processed_data = normalize_data(encoded)  # applying normalization function
88 |     return processed_data
89 | 


--------------------------------------------------------------------------------
/code linting/utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Utils.py contains all utility functions
 3 | used during the inference process
 4 | '''
 5 | 
 6 | import pandas as pd
 7 | import numpy as np
 8 | from sklearn import preprocessing
 9 | from constants import *
10 | 
11 | 
12 | def get_inference_data():
13 |     '''
14 |     Method for loading inference data
15 |     Input: No input
16 |     Output: Returns inference data features and labels
17 |     Example usage: inference_data, labels = get_inference_data()
18 |     '''
19 |     # Live connection to the database
20 |     data = pd.read_csv("Data/inference_heart_disease.csv")
21 |     data.drop_duplicates(subset=None, inplace=True)
22 |     data.duplicated().any()
23 |     return data[data.columns.drop('target')], data['target']
24 | 
25 | 
26 | # apply same pre-processing and feature engineering techniques as applied during the training process
27 | def encode_features(df, features):
28 |     '''
29 |     Method for one-hot encoding all selected categorical fields
30 |     Input: The method takes pandas dataframe and
31 |     list of the feature names as input
32 |     Output: Returns a dataframe with one-hot encoded features
33 |     Example usage:
34 |     one_hot_encoded_df = encode_features(dataframe, list_features_to_encode)
35 |     '''
36 |     # Implement these steps to prevent dimension mismatch during inference
37 |     encoded_df = pd.DataFrame(columns= ONE_HOT_ENCODED_FEATURES) # from constants.py
38 |     placeholder_df = pd.DataFrame()
39 |     
40 |     # One-Hot Encoding using get_dummies for the specified categorical features
41 |     for f in features:
42 |         if(f in df.columns):
43 |             encoded = pd.get_dummies(df[f])
44 |             encoded = encoded.add_prefix(f + '_')
45 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
46 |         else:
47 |             print('Feature not found')
48 |             return df
49 |     
50 |     # Implement these steps to prevent dimension mismatch during inference
51 |     for feature in encoded_df.columns:
52 |         if feature in df.columns:
53 |             encoded_df[feature] = df[feature]
54 |         if feature in placeholder_df.columns:
55 |             encoded_df[feature] = placeholder_df[feature]
56 |     # fill all null values
57 |     encoded_df.fillna(0, inplace=True)
58 |     
59 |     return encoded_df
60 | 
61 | def normalize_data(df):
62 |     '''
63 |     Normalize data using Min-Max Scaler
64 |     Input: The method takes pandas dataframe as input
65 |     Output: Returns a dataframe with normalized features
66 |     Example usage:
67 |     normalized_df = normalize_data(df)
68 |     '''
69 |     values = df.values 
70 |     min_max_normalizer = preprocessing.MinMaxScaler()
71 |     norm_val = min_max_normalizer.fit_transform(values)
72 |     norm_df = pd.DataFrame(norm_val)
73 |     
74 |     return norm_df
75 | 
76 | def apply_pre_processing(data):
77 |     '''
78 |     Apply all pre-processing methods together
79 |     Input: The method takes the inference data as pandas dataframe as input
80 |     Output: Returns a dataframe after applying all preprocessing steps
81 |     Example usage:
82 |     processed_data = apply_pre_processing(df)
83 |     '''
84 |     features_to_encode = FEATURES_TO_ENCODE # from constants.py
85 |     encoded = encode_features(data, features_to_encode) # applying encoded features function
86 |     processed_data = normalize_data(encoded) # applying normalization function
87 |     
88 |     return processed_data
89 | 


--------------------------------------------------------------------------------
/python scripting/utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Utils.py contains all utility functions
 3 | used during the inference process
 4 | '''
 5 | 
 6 | import pandas as pd
 7 | import numpy as np
 8 | from sklearn import preprocessing
 9 | from constants import *
10 | 
11 | 
12 | def get_inference_data():
13 |     '''
14 |     Method for loading inference data
15 |     Input: No input
16 |     Output: Returns inference data features and labels
17 |     Example usage: inference_data, labels = get_inference_data()
18 |     '''
19 |     data = pd.read_csv("Data/inference_heart_disease.csv") # Live connection to the database
20 |     data.drop_duplicates(subset=None, inplace=True)
21 |     data.duplicated().any()
22 |     
23 |     return data[data.columns.drop('target')], data['target']
24 | 
25 | 
26 | # apply same pre-processing and feature engineering techniques as applied during the training process
27 | def encode_features(df, features):
28 |     '''
29 |     Method for one-hot encoding all selected categorical fields
30 |     Input: The method takes pandas dataframe and
31 |     list of the feature names as input
32 |     Output: Returns a dataframe with one-hot encoded features
33 |     Example usage:
34 |     one_hot_encoded_df = encode_features(dataframe, list_features_to_encode)
35 |     '''
36 |     # Implement these steps to prevent dimension mismatch during inference
37 |     encoded_df = pd.DataFrame(columns= ONE_HOT_ENCODED_FEATURES) # from constants.py
38 |     placeholder_df = pd.DataFrame()
39 |     
40 |     # One-Hot Encoding using get_dummies for the specified categorical features
41 |     for f in features:
42 |         if(f in df.columns):
43 |             encoded = pd.get_dummies(df[f])
44 |             encoded = encoded.add_prefix(f + '_')
45 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
46 |         else:
47 |             print('Feature not found')
48 |             return df
49 |     
50 |     # Implement these steps to prevent dimension mismatch during inference
51 |     for feature in encoded_df.columns:
52 |         if feature in df.columns:
53 |             encoded_df[feature] = df[feature]
54 |         if feature in placeholder_df.columns:
55 |             encoded_df[feature] = placeholder_df[feature]
56 |     # fill all null values
57 |     encoded_df.fillna(0, inplace=True)
58 |     
59 |     return encoded_df
60 | 
61 | def normalize_data(df):
62 |     '''
63 |     Normalize data using Min-Max Scaler
64 |     Input: The method takes pandas dataframe as input
65 |     Output: Returns a dataframe with normalized features
66 |     Example usage:
67 |     normalized_df = normalize_data(df)
68 |     '''
69 |     values = df.values 
70 |     min_max_normalizer = preprocessing.MinMaxScaler()
71 |     norm_val = min_max_normalizer.fit_transform(values)
72 |     norm_df = pd.DataFrame(norm_val)
73 |     
74 |     return norm_df
75 | 
76 | def apply_pre_processing(data):
77 |     '''
78 |     Apply all pre-processing methods together
79 |     Input: The method takes the inference data as pandas dataframe as input
80 |     Output: Returns a dataframe after applying all preprocessing steps
81 |     Example usage:
82 |     processed_data = apply_pre_processing(df)
83 |     '''
84 |     features_to_encode = FEATURES_TO_ENCODE # from constants.py
85 |     encoded = encode_features(data, features_to_encode) # applying encoded features function
86 |     processed_data = normalize_data(encoded) # applying normalization function
87 |     
88 |     return processed_data
89 | 


--------------------------------------------------------------------------------
/unit testing/utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Utils.py contains all utility functions
 3 | used during the inference process
 4 | '''
 5 | 
 6 | import pandas as pd
 7 | import numpy as np
 8 | from sklearn import preprocessing
 9 | from constants import *
10 | 
11 | 
12 | def get_inference_data():
13 |     '''
14 |     Method for loading inference data
15 |     Input: No input
16 |     Output: Returns inference data features and labels
17 |     Example usage: inference_data, labels = get_inference_data()
18 |     '''
19 |     data = pd.read_csv("Data/inference_heart_disease.csv") # Live connection to the database
20 |     data.drop_duplicates(subset=None, inplace=True)
21 |     data.duplicated().any()
22 |     
23 |     return data[data.columns.drop('target')], data['target']
24 | 
25 | 
26 | # apply same pre-processing and feature engineering techniques as applied during the training process
27 | def encode_features(df, features):
28 |     '''
29 |     Method for one-hot encoding all selected categorical fields
30 |     Input: The method takes pandas dataframe and
31 |     list of the feature names as input
32 |     Output: Returns a dataframe with one-hot encoded features
33 |     Example usage:
34 |     one_hot_encoded_df = encode_features(dataframe, list_features_to_encode)
35 |     '''
36 |     # Implement these steps to prevent dimension mismatch during inference
37 |     encoded_df = pd.DataFrame(columns= ONE_HOT_ENCODED_FEATURES) # from constants.py
38 |     placeholder_df = pd.DataFrame()
39 |     
40 |     # One-Hot Encoding using get_dummies for the specified categorical features
41 |     for f in features:
42 |         if(f in df.columns):
43 |             encoded = pd.get_dummies(df[f])
44 |             encoded = encoded.add_prefix(f + '_')
45 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
46 |         else:
47 |             print('Feature not found')
48 |             return df
49 |     
50 |     # Implement these steps to prevent dimension mismatch during inference
51 |     for feature in encoded_df.columns:
52 |         if feature in df.columns:
53 |             encoded_df[feature] = df[feature]
54 |         if feature in placeholder_df.columns:
55 |             encoded_df[feature] = placeholder_df[feature]
56 |     # fill all null values
57 |     encoded_df.fillna(0, inplace=True)
58 |     
59 |     return encoded_df
60 | 
61 | def normalize_data(df):
62 |     '''
63 |     Normalize data using Min-Max Scaler
64 |     Input: The method takes pandas dataframe as input
65 |     Output: Returns a dataframe with normalized features
66 |     Example usage:
67 |     normalized_df = normalize_data(df)
68 |     '''
69 |     values = df.values 
70 |     min_max_normalizer = preprocessing.MinMaxScaler()
71 |     norm_val = min_max_normalizer.fit_transform(values)
72 |     norm_df = pd.DataFrame(norm_val)
73 |     
74 |     return norm_df
75 | 
76 | def apply_pre_processing(data):
77 |     '''
78 |     Apply all pre-processing methods together
79 |     Input: The method takes the inference data as pandas dataframe as input
80 |     Output: Returns a dataframe after applying all preprocessing steps
81 |     Example usage:
82 |     processed_data = apply_pre_processing(df)
83 |     '''
84 |     features_to_encode = FEATURES_TO_ENCODE # from constants.py
85 |     encoded = encode_features(data, features_to_encode) # applying encoded features function
86 |     processed_data = normalize_data(encoded) # applying normalization function
87 |     
88 |     return processed_data
89 | 


--------------------------------------------------------------------------------
/code documentation/Corrected/utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Utils.py contains all utility functions
 3 | used during the inference process
 4 | '''
 5 | 
 6 | import pandas as pd
 7 | import numpy as np
 8 | from sklearn import preprocessing
 9 | from constants import *
10 | 
11 | 
12 | def get_inference_data():
13 |     '''
14 |     Method for loading inference data
15 |     Input: No input
16 |     Output: Returns inference data features and labels
17 |     Example usage: inference_data, labels = get_inference_data()
18 |     '''
19 |     data = pd.read_csv("Data/inference_heart_disease.csv") # Live connection to the database
20 |     data.drop_duplicates(subset=None, inplace=True)
21 |     data.duplicated().any()
22 |     
23 |     return data[data.columns.drop('target')], data['target']
24 | 
25 | 
26 | # apply same pre-processing and feature engineering techniques as applied during the training process
27 | def encode_features(df, features):
28 |     '''
29 |     Method for one-hot encoding all selected categorical fields
30 |     Input: The method takes pandas dataframe and
31 |     list of the feature names as input
32 |     Output: Returns a dataframe with one-hot encoded features
33 |     Example usage:
34 |     one_hot_encoded_df = encode_features(dataframe, list_features_to_encode)
35 |     '''
36 |     # Implement these steps to prevent dimension mismatch during inference
37 |     encoded_df = pd.DataFrame(columns= ONE_HOT_ENCODED_FEATURES) # from constants.py
38 |     placeholder_df = pd.DataFrame()
39 |     
40 |     # One-Hot Encoding using get_dummies for the specified categorical features
41 |     for f in features:
42 |         if(f in df.columns):
43 |             encoded = pd.get_dummies(df[f])
44 |             encoded = encoded.add_prefix(f + '_')
45 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
46 |         else:
47 |             print('Feature not found')
48 |             return df
49 |     
50 |     # Implement these steps to prevent dimension mismatch during inference
51 |     for feature in encoded_df.columns:
52 |         if feature in df.columns:
53 |             encoded_df[feature] = df[feature]
54 |         if feature in placeholder_df.columns:
55 |             encoded_df[feature] = placeholder_df[feature]
56 |     # fill all null values
57 |     encoded_df.fillna(0, inplace=True)
58 |     
59 |     return encoded_df
60 | 
61 | def normalize_data(df):
62 |     '''
63 |     Normalize data using Min-Max Scaler
64 |     Input: The method takes pandas dataframe as input
65 |     Output: Returns a dataframe with normalized features
66 |     Example usage:
67 |     normalized_df = normalize_data(df)
68 |     '''
69 |     values = df.values 
70 |     min_max_normalizer = preprocessing.MinMaxScaler()
71 |     norm_val = min_max_normalizer.fit_transform(values)
72 |     norm_df = pd.DataFrame(norm_val)
73 |     
74 |     return norm_df
75 | 
76 | def apply_pre_processing(data):
77 |     '''
78 |     Apply all pre-processing methods together
79 |     Input: The method takes the inference data as pandas dataframe as input
80 |     Output: Returns a dataframe after applying all preprocessing steps
81 |     Example usage:
82 |     processed_data = apply_pre_processing(df)
83 |     '''
84 |     features_to_encode = FEATURES_TO_ENCODE # from constants.py
85 |     encoded = encode_features(data, features_to_encode) # applying encoded features function
86 |     processed_data = normalize_data(encoded) # applying normalization function
87 |     
88 |     return processed_data
89 | 


--------------------------------------------------------------------------------
/exception handling and logging/utils.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Utils.py contains all utility functions
 3 | used during the inference process
 4 | '''
 5 | 
 6 | import pandas as pd
 7 | import numpy as np
 8 | from sklearn import preprocessing
 9 | from constants import *
10 | 
11 | 
12 | def get_inference_data():
13 |     '''
14 |     Method for loading inference data
15 |     Input: No input
16 |     Output: Returns inference data features and labels
17 |     Example usage: inference_data, labels = get_inference_data()
18 |     '''
19 |     data = pd.read_csv("Data/inference_heart_disease.csv") # Live connection to the database
20 |     data.drop_duplicates(subset=None, inplace=True)
21 |     data.duplicated().any()
22 |     
23 |     return data[data.columns.drop('target')], data['target']
24 | 
25 | 
26 | # apply same pre-processing and feature engineering techniques as applied during the training process
27 | def encode_features(df, features):
28 |     '''
29 |     Method for one-hot encoding all selected categorical fields
30 |     Input: The method takes pandas dataframe and
31 |     list of the feature names as input
32 |     Output: Returns a dataframe with one-hot encoded features
33 |     Example usage:
34 |     one_hot_encoded_df = encode_features(dataframe, list_features_to_encode)
35 |     '''
36 |     # Implement these steps to prevent dimension mismatch during inference
37 |     encoded_df = pd.DataFrame(columns= ONE_HOT_ENCODED_FEATURES) # from constants.py
38 |     placeholder_df = pd.DataFrame()
39 |     
40 |     # One-Hot Encoding using get_dummies for the specified categorical features
41 |     for f in features:
42 |         if(f in df.columns):
43 |             encoded = pd.get_dummies(df[f])
44 |             encoded = encoded.add_prefix(f + '_')
45 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
46 |         else:
47 |             print('Feature not found')
48 |             return df
49 |     
50 |     # Implement these steps to prevent dimension mismatch during inference
51 |     for feature in encoded_df.columns:
52 |         if feature in df.columns:
53 |             encoded_df[feature] = df[feature]
54 |         if feature in placeholder_df.columns:
55 |             encoded_df[feature] = placeholder_df[feature]
56 |     # fill all null values
57 |     encoded_df.fillna(0, inplace=True)
58 |     
59 |     return encoded_df
60 | 
61 | def normalize_data(df):
62 |     '''
63 |     Normalize data using Min-Max Scaler
64 |     Input: The method takes pandas dataframe as input
65 |     Output: Returns a dataframe with normalized features
66 |     Example usage:
67 |     normalized_df = normalize_data(df)
68 |     '''
69 |     values = df.values 
70 |     min_max_normalizer = preprocessing.MinMaxScaler()
71 |     norm_val = min_max_normalizer.fit_transform(values)
72 |     norm_df = pd.DataFrame(norm_val)
73 |     
74 |     return norm_df
75 | 
76 | def apply_pre_processing(data):
77 |     '''
78 |     Apply all pre-processing methods together
79 |     Input: The method takes the inference data as pandas dataframe as input
80 |     Output: Returns a dataframe after applying all preprocessing steps
81 |     Example usage:
82 |     processed_data = apply_pre_processing(df)
83 |     '''
84 |     features_to_encode = FEATURES_TO_ENCODE # from constants.py
85 |     encoded = encode_features(data, features_to_encode) # applying encoded features function
86 |     processed_data = normalize_data(encoded) # applying normalization function
87 |     
88 |     return processed_data
89 | 


--------------------------------------------------------------------------------
/uci_heart_disease/Heart Disease Inference.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | # %%
  4 | 
  5 | # ## Heart Disease Classification
  6 | # 
  7 | # In this notebook, we will try to look at just the inference part of the heart disease classification solution
  8 | 
  9 | # ### Import Modules
 10 | 
 11 | # %%
 12 | 
 13 | 
 14 | import pandas as pd
 15 | import numpy as np
 16 | from sklearn import preprocessing
 17 | import joblib
 18 | 
 19 | 
 20 | # ### Get Inference Data
 21 | 
 22 | # %%
 23 | 
 24 | 
 25 | # in real-time use cases, this method should be replaced with live flowing data
 26 | def get_inference_data():
 27 |     data = pd.read_csv("Data/heart.csv")
 28 |     data.drop_duplicates(subset=None, inplace=True)
 29 |     data.duplicated().any()
 30 |     inference_df = data.sample(frac=1, random_state = 2)
 31 |     inference_df = inference_df.tail(20)
 32 |     return inference_df[inference_df.columns.drop('target')], inference_df['target']
 33 | 
 34 | inference_data, labels = get_inference_data()
 35 | 
 36 | 
 37 | # %%
 38 | 
 39 | 
 40 | inference_data.columns
 41 | 
 42 | 
 43 | # %%
 44 | 
 45 | 
 46 | inference_data.head()
 47 | 
 48 | 
 49 | # ### Apply Same Pre-processing
 50 | 
 51 | # %%
 52 | 
 53 | 
 54 | # apply same pre-processing and feature engineering techniques as applied during the training process
 55 | def encode_features(df, features):
 56 |     '''
 57 |     Method for one-hot encoding all selected categorical fields
 58 |     '''
 59 |     # Implement these steps to prevent dimension mismatch during inference
 60 |     encoded_df = pd.DataFrame(columns= ['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',
 61 |        'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_0', 'thal_1',
 62 |        'thal_2', 'thal_3', 'slope_0', 'slope_1', 'slope_2',
 63 |        'chest_pain_type_0', 'chest_pain_type_1', 'chest_pain_type_2',
 64 |        'chest_pain_type_3', 'restecg_0', 'restecg_1', 'restecg_2'])
 65 |     placeholder_df = pd.DataFrame()
 66 |     
 67 |     # One-Hot Encoding using get_dummies for the specified categorical features
 68 |     for f in features:
 69 |         if(f in df.columns):
 70 |             encoded = pd.get_dummies(df[f])
 71 |             encoded = encoded.add_prefix(f + '_')
 72 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
 73 |         else:
 74 |             print('Feature not found')
 75 |             return df
 76 |     
 77 |     # Implement these steps to prevent dimension mismatch during inference
 78 |     for feature in encoded_df.columns:
 79 |         if feature in df.columns:
 80 |             encoded_df[feature] = df[feature]
 81 |         if feature in placeholder_df.columns:
 82 |             encoded_df[feature] = placeholder_df[feature]
 83 |     # fill all null values
 84 |     encoded_df.fillna(0, inplace=True)
 85 |     
 86 |     return encoded_df
 87 | 
 88 | def normalize_data(df):
 89 |     val = df.values 
 90 |     min_max_normalizer = preprocessing.MinMaxScaler()
 91 |     norm_val = min_max_normalizer.fit_transform(val)
 92 |     df2 = pd.DataFrame(norm_val)
 93 |     
 94 |     return df2
 95 | 
 96 | def apply_pre_processing(data):
 97 |     features_to_encode = ['thal', 'slope', 'chest_pain_type', 'restecg']
 98 |     encoded = encode_features(data, features_to_encode)
 99 |     processed_data = normalize_data(encoded)
100 |     return processed_data # Please note this is fabricated inference data, so just taking a small sample size
101 | 
102 | processed_inference_data = apply_pre_processing(inference_data)
103 | processed_inference_data
104 | 
105 | 
106 | # ### Load Saved Model
107 | 
108 | # %%
109 | 
110 | 
111 | model = joblib.load('aditya_model1_adaboost.joblib')
112 | model
113 | 
114 | 
115 | # ### Prediction on inference data
116 | 
117 | # %%
118 | 
119 | 
120 | model.predict(processed_inference_data)
121 | 
122 | 
123 | # ### Scoring check on prediction
124 | 
125 | # %%
126 | 
127 | 
128 | from sklearn.metrics import accuracy_score
129 | accuracy_score(labels[-20:], model.predict(processed_inference_data))
130 | 
131 | 
132 | # %%
133 | 
134 | 
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/code documentation/Heart Disease Inference.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | # %%
  4 | 
  5 | # ## Heart Disease Classification
  6 | # 
  7 | # In this notebook, we will try to look at just the inference part of the heart disease classification solution
  8 | 
  9 | # ### Import Modules
 10 | 
 11 | # %%
 12 | 
 13 | 
 14 | import pandas as pd
 15 | import numpy as np
 16 | from sklearn import preprocessing
 17 | import joblib
 18 | 
 19 | 
 20 | # ### Get Inference Data
 21 | 
 22 | # %%
 23 | 
 24 | 
 25 | # in real-time use cases, this method should be replaced with live flowing data
 26 | def get_inference_data():
 27 |     data = pd.read_csv("Data/heart.csv")
 28 |     data.drop_duplicates(subset=None, inplace=True)
 29 |     data.duplicated().any()
 30 |     inference_df = data.sample(frac=1, random_state = 2)
 31 |     inference_df = inference_df.tail(20)
 32 |     return inference_df[inference_df.columns.drop('target')], inference_df['target']
 33 | 
 34 | inference_data, labels = get_inference_data()
 35 | 
 36 | 
 37 | # %%
 38 | 
 39 | 
 40 | inference_data.columns
 41 | 
 42 | 
 43 | # %%
 44 | 
 45 | 
 46 | inference_data.head()
 47 | 
 48 | 
 49 | # ### Apply Same Pre-processing
 50 | 
 51 | # %%
 52 | 
 53 | 
 54 | # apply same pre-processing and feature engineering techniques as applied during the training process
 55 | def encode_features(df, features):
 56 |     '''
 57 |     Method for one-hot encoding all selected categorical fields
 58 |     '''
 59 |     # Implement these steps to prevent dimension mismatch during inference
 60 |     encoded_df = pd.DataFrame(columns= ['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',
 61 |        'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_0', 'thal_1',
 62 |        'thal_2', 'thal_3', 'slope_0', 'slope_1', 'slope_2',
 63 |        'chest_pain_type_0', 'chest_pain_type_1', 'chest_pain_type_2',
 64 |        'chest_pain_type_3', 'restecg_0', 'restecg_1', 'restecg_2'])
 65 |     placeholder_df = pd.DataFrame()
 66 |     
 67 |     # One-Hot Encoding using get_dummies for the specified categorical features
 68 |     for f in features:
 69 |         if(f in df.columns):
 70 |             encoded = pd.get_dummies(df[f])
 71 |             encoded = encoded.add_prefix(f + '_')
 72 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
 73 |         else:
 74 |             print('Feature not found')
 75 |             return df
 76 |     
 77 |     # Implement these steps to prevent dimension mismatch during inference
 78 |     for feature in encoded_df.columns:
 79 |         if feature in df.columns:
 80 |             encoded_df[feature] = df[feature]
 81 |         if feature in placeholder_df.columns:
 82 |             encoded_df[feature] = placeholder_df[feature]
 83 |     # fill all null values
 84 |     encoded_df.fillna(0, inplace=True)
 85 |     
 86 |     return encoded_df
 87 | 
 88 | def normalize_data(df):
 89 |     val = df.values 
 90 |     min_max_normalizer = preprocessing.MinMaxScaler()
 91 |     norm_val = min_max_normalizer.fit_transform(val)
 92 |     df2 = pd.DataFrame(norm_val)
 93 |     
 94 |     return df2
 95 | 
 96 | def apply_pre_processing(data):
 97 |     features_to_encode = ['thal', 'slope', 'chest_pain_type', 'restecg']
 98 |     encoded = encode_features(data, features_to_encode)
 99 |     processed_data = normalize_data(encoded)
100 |     return processed_data # Please note this is fabricated inference data, so just taking a small sample size
101 | 
102 | processed_inference_data = apply_pre_processing(inference_data)
103 | processed_inference_data
104 | 
105 | 
106 | # ### Load Saved Model
107 | 
108 | # %%
109 | 
110 | 
111 | model = joblib.load('aditya_model1_adaboost.joblib')
112 | model
113 | 
114 | 
115 | # ### Prediction on inference data
116 | 
117 | # %%
118 | 
119 | 
120 | model.predict(processed_inference_data)
121 | 
122 | 
123 | # ### Scoring check on prediction
124 | 
125 | # %%
126 | 
127 | 
128 | from sklearn.metrics import accuracy_score
129 | accuracy_score(labels[-20:], model.predict(processed_inference_data))
130 | 
131 | 
132 | # %%
133 | 
134 | 
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/code linting/Heart Disease Inference.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # ## Heart Disease Classification
  5 | # 
  6 | # In this notebook, we will try to look at just the inference part of the heart disease classification solution
  7 | 
  8 | # ### Import Modules
  9 | 
 10 | # In[20]:
 11 | 
 12 | 
 13 | import pandas as pd
 14 | import numpy as np
 15 | from sklearn import preprocessing
 16 | import joblib
 17 | 
 18 | 
 19 | # ### Get Inference Data
 20 | 
 21 | # In[79]:
 22 | 
 23 | 
 24 | # in real-time use cases, this method should be replaced with live flowing data
 25 | def get_inference_data():
 26 |     data = pd.read_csv("Data/heart.csv")
 27 |     data.drop_duplicates(subset=None, inplace=True)
 28 |     data.duplicated().any()
 29 |     inference_df = data.sample(frac=1, random_state = 2)
 30 |     inference_df = inference_df.tail(20)
 31 |     return inference_df[inference_df.columns.drop('target')], inference_df['target']
 32 | 
 33 | inference_data, labels = get_inference_data()
 34 | 
 35 | 
 36 | # In[84]:
 37 | 
 38 | 
 39 | inference_data.columns
 40 | 
 41 | 
 42 | # In[85]:
 43 | 
 44 | 
 45 | inference_data.head()
 46 | 
 47 | 
 48 | # ### Apply Same Pre-processing
 49 | 
 50 | # In[80]:
 51 | 
 52 | 
 53 | # apply same pre-processing and feature engineering techniques as applied during the training process
 54 | def encode_features(df, features):
 55 |     '''
 56 |     Method for one-hot encoding all selected categorical fields
 57 |     '''
 58 |     # Implement these steps to prevent dimension mismatch during inference
 59 |     encoded_df = pd.DataFrame(columns= ['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',
 60 |        'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_0', 'thal_1',
 61 |        'thal_2', 'thal_3', 'slope_0', 'slope_1', 'slope_2',
 62 |        'chest_pain_type_0', 'chest_pain_type_1', 'chest_pain_type_2',
 63 |        'chest_pain_type_3', 'restecg_0', 'restecg_1', 'restecg_2'])
 64 |     placeholder_df = pd.DataFrame()
 65 |     
 66 |     # One-Hot Encoding using get_dummies for the specified categorical features
 67 |     for f in features:
 68 |         if(f in df.columns):
 69 |             encoded = pd.get_dummies(df[f])
 70 |             encoded = encoded.add_prefix(f + '_')
 71 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
 72 |         else:
 73 |             print('Feature not found')
 74 |             return df
 75 |     
 76 |     # Implement these steps to prevent dimension mismatch during inference
 77 |     for feature in encoded_df.columns:
 78 |         if feature in df.columns:
 79 |             encoded_df[feature] = df[feature]
 80 |         if feature in placeholder_df.columns:
 81 |             encoded_df[feature] = placeholder_df[feature]
 82 |     # fill all null values
 83 |     encoded_df.fillna(0, inplace=True)
 84 |     
 85 |     return encoded_df
 86 | 
 87 | def normalize_data(df):
 88 |     val = df.values 
 89 |     min_max_normalizer = preprocessing.MinMaxScaler()
 90 |     norm_val = min_max_normalizer.fit_transform(val)
 91 |     df2 = pd.DataFrame(norm_val)
 92 |     
 93 |     return df2
 94 | 
 95 | def apply_pre_processing(data):
 96 |     features_to_encode = ['thal', 'slope', 'chest_pain_type', 'restecg']
 97 |     encoded = encode_features(data, features_to_encode)
 98 |     processed_data = normalize_data(encoded)
 99 |     return processed_data # Please note this is fabricated inference data, so just taking a small sample size
100 | 
101 | processed_inference_data = apply_pre_processing(inference_data)
102 | processed_inference_data
103 | 
104 | 
105 | # ### Load Saved Model
106 | 
107 | # In[81]:
108 | 
109 | 
110 | model = joblib.load('aditya_model1_adaboost.joblib')
111 | model
112 | 
113 | 
114 | # ### Prediction on inference data
115 | 
116 | # In[82]:
117 | 
118 | 
119 | model.predict(processed_inference_data)
120 | 
121 | 
122 | # ### Scoring check on prediction
123 | 
124 | # In[83]:
125 | 
126 | 
127 | from sklearn.metrics import accuracy_score
128 | accuracy_score(labels[-20:], model.predict(processed_inference_data))
129 | 
130 | 
131 | # In[ ]:
132 | 
133 | 
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/uci_heart_disease/Heart Disease Inference Modularized.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Heart Disease Inference Modularized\n",
  8 |     "\n",
  9 |     "In this notebook, we will try to look at just the inference part of the heart disease classification solution\n",
 10 |     "\n",
 11 |     "The process of applying the same transformations like data pre-processing, feature engineering etc. which was applied during the training process, and then applying the trained ML model to generate predictions is termed as the inference process."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### Import Modules"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import warnings\n",
 28 |     "warnings.filterwarnings(\"ignore\")\n",
 29 |     "import pandas as pd\n",
 30 |     "import numpy as np\n",
 31 |     "import joblib\n",
 32 |     "from sklearn.metrics import accuracy_score\n",
 33 |     "from utils import *\n",
 34 |     "from constants import *"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Get Inference Data"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# in real-time use cases, this code should be replaced with live flowing data\n",
 51 |     "inference_data, labels = get_inference_data()"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "### Apply Same Pre-processing"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "processed_inference_data = apply_pre_processing(inference_data)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "### Load Saved Model"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 5,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "model = joblib.load(MODEL_NAME)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "### Prediction on inference data"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 6,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "data": {
100 |       "text/plain": [
101 |        "array([1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1.,\n",
102 |        "       1., 1., 0.])"
103 |       ]
104 |      },
105 |      "execution_count": 6,
106 |      "metadata": {},
107 |      "output_type": "execute_result"
108 |     }
109 |    ],
110 |    "source": [
111 |     "model.predict(processed_inference_data)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "### Scoring check on prediction"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 7,
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/plain": [
129 |        "0.85"
130 |       ]
131 |      },
132 |      "execution_count": 7,
133 |      "metadata": {},
134 |      "output_type": "execute_result"
135 |     }
136 |    ],
137 |    "source": [
138 |     "accuracy_score(labels, model.predict(processed_inference_data))"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": []
147 |   }
148 |  ],
149 |  "metadata": {
150 |   "kernelspec": {
151 |    "display_name": "Python 3",
152 |    "language": "python",
153 |    "name": "python3"
154 |   },
155 |   "language_info": {
156 |    "codemirror_mode": {
157 |     "name": "ipython",
158 |     "version": 3
159 |    },
160 |    "file_extension": ".py",
161 |    "mimetype": "text/x-python",
162 |    "name": "python",
163 |    "nbconvert_exporter": "python",
164 |    "pygments_lexer": "ipython3",
165 |    "version": "3.7.7"
166 |   }
167 |  },
168 |  "nbformat": 4,
169 |  "nbformat_minor": 5
170 | }
171 | 


--------------------------------------------------------------------------------
/code documentation/Heart Disease Inference Modularized.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Heart Disease Inference Modularized\n",
  8 |     "\n",
  9 |     "In this notebook, we will try to look at just the inference part of the heart disease classification solution\n",
 10 |     "\n",
 11 |     "The process of applying the same transformations like data pre-processing, feature engineering etc. which was applied during the training process, and then applying the trained ML model to generate predictions is termed as the inference process."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### Import Modules"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 12,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import warnings\n",
 28 |     "warnings.filterwarnings(\"ignore\")\n",
 29 |     "import pandas as pd\n",
 30 |     "import numpy as np\n",
 31 |     "import joblib\n",
 32 |     "from sklearn.metrics import accuracy_score\n",
 33 |     "from utils import *\n",
 34 |     "from constants import *"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Get Inference Data"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 3,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# in real-time use cases, this code should be replaced with live flowing data\n",
 51 |     "inference_data, labels = get_inference_data()"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "### Apply Same Pre-processing"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 7,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "processed_inference_data = apply_pre_processing(inference_data)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "### Load Saved Model"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 10,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "model = joblib.load(MODEL_NAME)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "### Prediction on inference data"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 11,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "data": {
100 |       "text/plain": [
101 |        "array([1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1.,\n",
102 |        "       1., 1., 0.])"
103 |       ]
104 |      },
105 |      "execution_count": 11,
106 |      "metadata": {},
107 |      "output_type": "execute_result"
108 |     }
109 |    ],
110 |    "source": [
111 |     "model.predict(processed_inference_data)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "### Scoring check on prediction"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 13,
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/plain": [
129 |        "0.85"
130 |       ]
131 |      },
132 |      "execution_count": 13,
133 |      "metadata": {},
134 |      "output_type": "execute_result"
135 |     }
136 |    ],
137 |    "source": [
138 |     "accuracy_score(labels, model.predict(processed_inference_data))"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": []
147 |   }
148 |  ],
149 |  "metadata": {
150 |   "kernelspec": {
151 |    "display_name": "Python 3",
152 |    "language": "python",
153 |    "name": "python3"
154 |   },
155 |   "language_info": {
156 |    "codemirror_mode": {
157 |     "name": "ipython",
158 |     "version": 3
159 |    },
160 |    "file_extension": ".py",
161 |    "mimetype": "text/x-python",
162 |    "name": "python",
163 |    "nbconvert_exporter": "python",
164 |    "pygments_lexer": "ipython3",
165 |    "version": "3.7.7"
166 |   }
167 |  },
168 |  "nbformat": 4,
169 |  "nbformat_minor": 5
170 | }
171 | 


--------------------------------------------------------------------------------
/unit testing/Heart Disease Inference Modularized.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Heart Disease Inference Modularized\n",
  8 |     "\n",
  9 |     "In this notebook, we will try to look at just the inference part of the heart disease classification solution\n",
 10 |     "\n",
 11 |     "The process of applying the same transformations like data pre-processing, feature engineering etc. which was applied during the training process, and then applying the trained ML model to generate predictions is termed as the inference process."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### Import Modules"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import warnings\n",
 28 |     "warnings.filterwarnings(\"ignore\")\n",
 29 |     "import pandas as pd\n",
 30 |     "import numpy as np\n",
 31 |     "import joblib\n",
 32 |     "from sklearn.metrics import accuracy_score\n",
 33 |     "from utils import *\n",
 34 |     "from constants import *"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Get Inference Data"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# in real-time use cases, this code should be replaced with live flowing data\n",
 51 |     "# use get_inference_data() from utils.py to fetch inference data\n",
 52 |     "inference_data, labels = get_inference_data()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "### Apply Same Pre-processing"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 3,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# use apply_pre_processing() from utils.py \n",
 69 |     "# to apply necessary preprocessing as applied for training data\n",
 70 |     "processed_inference_data = apply_pre_processing(inference_data)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "### Load Saved Model"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 10,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "# use MODEL_NAME from constants.py\n",
 87 |     "model = joblib.load(MODEL_NAME)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "### Prediction on inference data"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 11,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "data": {
104 |       "text/plain": [
105 |        "array([1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1.,\n",
106 |        "       1., 1., 0.])"
107 |       ]
108 |      },
109 |      "execution_count": 11,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "# perform model prediction on inference data\n",
116 |     "model.predict(processed_inference_data)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "### Scoring check on prediction"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 13,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "data": {
133 |       "text/plain": [
134 |        "0.85"
135 |       ]
136 |      },
137 |      "execution_count": 13,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "# check inference accuracy\n",
144 |     "accuracy_score(labels, model.predict(processed_inference_data))"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": []
153 |   }
154 |  ],
155 |  "metadata": {
156 |   "kernelspec": {
157 |    "display_name": "Python 3",
158 |    "language": "python",
159 |    "name": "python3"
160 |   },
161 |   "language_info": {
162 |    "codemirror_mode": {
163 |     "name": "ipython",
164 |     "version": 3
165 |    },
166 |    "file_extension": ".py",
167 |    "mimetype": "text/x-python",
168 |    "name": "python",
169 |    "nbconvert_exporter": "python",
170 |    "pygments_lexer": "ipython3",
171 |    "version": "3.7.7"
172 |   }
173 |  },
174 |  "nbformat": 4,
175 |  "nbformat_minor": 5
176 | }
177 | 


--------------------------------------------------------------------------------
/python scripting/Heart Disease Inference Modularized.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Heart Disease Inference Modularized\n",
  8 |     "\n",
  9 |     "In this notebook, we will try to look at just the inference part of the heart disease classification solution\n",
 10 |     "\n",
 11 |     "The process of applying the same transformations like data pre-processing, feature engineering etc. which was applied during the training process, and then applying the trained ML model to generate predictions is termed as the inference process."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### Import Modules"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import warnings\n",
 28 |     "warnings.filterwarnings(\"ignore\")\n",
 29 |     "import pandas as pd\n",
 30 |     "import numpy as np\n",
 31 |     "import joblib\n",
 32 |     "from sklearn.metrics import accuracy_score\n",
 33 |     "from utils import *\n",
 34 |     "from constants import *"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Get Inference Data"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# in real-time use cases, this code should be replaced with live flowing data\n",
 51 |     "# use get_inference_data() from utils.py to fetch inference data\n",
 52 |     "inference_data, labels = get_inference_data()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "### Apply Same Pre-processing"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 3,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# use apply_pre_processing() from utils.py \n",
 69 |     "# to apply necessary preprocessing as applied for training data\n",
 70 |     "processed_inference_data = apply_pre_processing(inference_data)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "### Load Saved Model"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 10,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "# use MODEL_NAME from constants.py\n",
 87 |     "model = joblib.load(MODEL_NAME)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "### Prediction on inference data"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 11,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "data": {
104 |       "text/plain": [
105 |        "array([1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1.,\n",
106 |        "       1., 1., 0.])"
107 |       ]
108 |      },
109 |      "execution_count": 11,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "# perform model prediction on inference data\n",
116 |     "model.predict(processed_inference_data)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "### Scoring check on prediction"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 13,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "data": {
133 |       "text/plain": [
134 |        "0.85"
135 |       ]
136 |      },
137 |      "execution_count": 13,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "# check inference accuracy\n",
144 |     "accuracy_score(labels, model.predict(processed_inference_data))"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": []
153 |   }
154 |  ],
155 |  "metadata": {
156 |   "kernelspec": {
157 |    "display_name": "Python 3",
158 |    "language": "python",
159 |    "name": "python3"
160 |   },
161 |   "language_info": {
162 |    "codemirror_mode": {
163 |     "name": "ipython",
164 |     "version": 3
165 |    },
166 |    "file_extension": ".py",
167 |    "mimetype": "text/x-python",
168 |    "name": "python",
169 |    "nbconvert_exporter": "python",
170 |    "pygments_lexer": "ipython3",
171 |    "version": "3.7.7"
172 |   }
173 |  },
174 |  "nbformat": 4,
175 |  "nbformat_minor": 5
176 | }
177 | 


--------------------------------------------------------------------------------
/code documentation/Corrected/Heart Disease Inference Modularized.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Heart Disease Inference Modularized\n",
  8 |     "\n",
  9 |     "In this notebook, we will try to look at just the inference part of the heart disease classification solution\n",
 10 |     "\n",
 11 |     "The process of applying the same transformations like data pre-processing, feature engineering etc. which was applied during the training process, and then applying the trained ML model to generate predictions is termed as the inference process."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### Import Modules"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 12,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import warnings\n",
 28 |     "warnings.filterwarnings(\"ignore\")\n",
 29 |     "import pandas as pd\n",
 30 |     "import numpy as np\n",
 31 |     "import joblib\n",
 32 |     "from sklearn.metrics import accuracy_score\n",
 33 |     "from utils import *\n",
 34 |     "from constants import *"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Get Inference Data"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 3,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# in real-time use cases, this code should be replaced with live flowing data\n",
 51 |     "# use get_inference_data() from utils.py to fetch inference data\n",
 52 |     "inference_data, labels = get_inference_data()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "### Apply Same Pre-processing"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 7,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# use apply_pre_processing() from utils.py \n",
 69 |     "# to apply necessary preprocessing as applied for training data\n",
 70 |     "processed_inference_data = apply_pre_processing(inference_data)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "### Load Saved Model"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 10,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "# use MODEL_NAME from constants.py\n",
 87 |     "model = joblib.load(MODEL_NAME)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "### Prediction on inference data"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 11,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "data": {
104 |       "text/plain": [
105 |        "array([1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1.,\n",
106 |        "       1., 1., 0.])"
107 |       ]
108 |      },
109 |      "execution_count": 11,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "# perform model prediction on inference data\n",
116 |     "model.predict(processed_inference_data)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "### Scoring check on prediction"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 13,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "data": {
133 |       "text/plain": [
134 |        "0.85"
135 |       ]
136 |      },
137 |      "execution_count": 13,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "# check inference accuracy\n",
144 |     "accuracy_score(labels, model.predict(processed_inference_data))"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": []
153 |   }
154 |  ],
155 |  "metadata": {
156 |   "kernelspec": {
157 |    "display_name": "Python 3",
158 |    "language": "python",
159 |    "name": "python3"
160 |   },
161 |   "language_info": {
162 |    "codemirror_mode": {
163 |     "name": "ipython",
164 |     "version": 3
165 |    },
166 |    "file_extension": ".py",
167 |    "mimetype": "text/x-python",
168 |    "name": "python",
169 |    "nbconvert_exporter": "python",
170 |    "pygments_lexer": "ipython3",
171 |    "version": "3.7.7"
172 |   }
173 |  },
174 |  "nbformat": 4,
175 |  "nbformat_minor": 5
176 | }
177 | 


--------------------------------------------------------------------------------
/exception handling and logging/Heart Disease Inference Modularized.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Heart Disease Inference Modularized\n",
  8 |     "\n",
  9 |     "In this notebook, we will try to look at just the inference part of the heart disease classification solution\n",
 10 |     "\n",
 11 |     "The process of applying the same transformations like data pre-processing, feature engineering etc. which was applied during the training process, and then applying the trained ML model to generate predictions is termed as the inference process."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### Import Modules"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import warnings\n",
 28 |     "warnings.filterwarnings(\"ignore\")\n",
 29 |     "import pandas as pd\n",
 30 |     "import numpy as np\n",
 31 |     "import joblib\n",
 32 |     "from sklearn.metrics import accuracy_score\n",
 33 |     "from utils import *\n",
 34 |     "from constants import *"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### Get Inference Data"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# in real-time use cases, this code should be replaced with live flowing data\n",
 51 |     "# use get_inference_data() from utils.py to fetch inference data\n",
 52 |     "inference_data, labels = get_inference_data()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "### Apply Same Pre-processing"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 3,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# use apply_pre_processing() from utils.py \n",
 69 |     "# to apply necessary preprocessing as applied for training data\n",
 70 |     "processed_inference_data = apply_pre_processing(inference_data)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "### Load Saved Model"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 10,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "# use MODEL_NAME from constants.py\n",
 87 |     "model = joblib.load(MODEL_NAME)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "markdown",
 92 |    "metadata": {},
 93 |    "source": [
 94 |     "### Prediction on inference data"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 11,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "data": {
104 |       "text/plain": [
105 |        "array([1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1.,\n",
106 |        "       1., 1., 0.])"
107 |       ]
108 |      },
109 |      "execution_count": 11,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "# perform model prediction on inference data\n",
116 |     "model.predict(processed_inference_data)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "### Scoring check on prediction"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 13,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "data": {
133 |       "text/plain": [
134 |        "0.85"
135 |       ]
136 |      },
137 |      "execution_count": 13,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "# check inference accuracy\n",
144 |     "accuracy_score(labels, model.predict(processed_inference_data))"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": []
153 |   }
154 |  ],
155 |  "metadata": {
156 |   "kernelspec": {
157 |    "display_name": "Python 3",
158 |    "language": "python",
159 |    "name": "python3"
160 |   },
161 |   "language_info": {
162 |    "codemirror_mode": {
163 |     "name": "ipython",
164 |     "version": 3
165 |    },
166 |    "file_extension": ".py",
167 |    "mimetype": "text/x-python",
168 |    "name": "python",
169 |    "nbconvert_exporter": "python",
170 |    "pygments_lexer": "ipython3",
171 |    "version": "3.7.7"
172 |   }
173 |  },
174 |  "nbformat": 4,
175 |  "nbformat_minor": 5
176 | }
177 | 


--------------------------------------------------------------------------------
/code linting/Corrected/Heart Disease Inference.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # ## Heart Disease Classification
  5 | # In this script, we will try to look at
  6 | # the inference part of the heart disease classification solution
  7 | 
  8 | # ### Import Modules
  9 | import pandas as pd
 10 | import numpy as np
 11 | from sklearn import preprocessing
 12 | import joblib
 13 | from sklearn.metrics import accuracy_score
 14 | 
 15 | 
 16 | # ### Get Inference Data
 17 | # in real-time use cases, this method should be replaced with live flowing data
 18 | def get_inference_data():
 19 |     '''
 20 |     Method for loading inference data
 21 |     Example usage: inference_data, labels = get_inference_data()
 22 |     '''
 23 |     data = pd.read_csv("Data/heart.csv")
 24 |     data.drop_duplicates(subset=None, inplace=True)
 25 |     data.duplicated().any()
 26 |     df = data.sample(frac=1, random_state=2)
 27 |     # Taking last 20 records as an example only
 28 |     df = df.tail(20)
 29 |     return df[df.columns.drop('target')], df['target']
 30 | 
 31 | 
 32 | inference_data, labels = get_inference_data()
 33 | 
 34 | # ### Apply Same Pre-processing
 35 | 
 36 | # apply same pre-processing and feature engineering techniques
 37 | # as applied during the training process
 38 | 
 39 | 
 40 | def encode_features(df, features):
 41 |     '''
 42 |     Method for one-hot encoding all selected categorical fields
 43 |     Input: The method takes pandas dataframe and
 44 |     list of the feature names as input
 45 |     Output: Returns a dataframe with one-hot encoded features
 46 |     Example usage:
 47 |     one_hot_encoded_df = encode_features(dataframe, list_features_to_encode)
 48 |     '''
 49 |     # Implement these steps to prevent dimension mismatch during inference
 50 |     encoded_df = pd.DataFrame(columns=['age', 'sex', 'resting_bp',
 51 |                                        'cholestoral', 'fasting_blood_sugar',
 52 |                                        'max_hr', 'exang', 'oldpeak',
 53 |                                        'num_major_vessels', 'thal_0', 'thal_1',
 54 |                                        'thal_2', 'thal_3', 'slope_0',
 55 |                                        'slope_1', 'slope_2',
 56 |                                        'chest_pain_type_0',
 57 |                                        'chest_pain_type_1',
 58 |                                        'chest_pain_type_2',
 59 |                                        'chest_pain_type_3', 'restecg_0',
 60 |                                        'restecg_1', 'restecg_2'])
 61 |     placeholder_df = pd.DataFrame()
 62 | 
 63 |     # One-Hot Encoding using get_dummies for the specified categorical features
 64 |     for f in features:
 65 |         if(f in df.columns):
 66 |             encoded = pd.get_dummies(df[f])
 67 |             encoded = encoded.add_prefix(f + '_')
 68 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
 69 |         else:
 70 |             print('Feature not found')
 71 |             return df
 72 | 
 73 |     # Implement these steps to prevent dimension mismatch during inference
 74 |     for feature in encoded_df.columns:
 75 |         if feature in df.columns:
 76 |             encoded_df[feature] = df[feature]
 77 |         if feature in placeholder_df.columns:
 78 |             encoded_df[feature] = placeholder_df[feature]
 79 |     # fill all null values
 80 |     encoded_df.fillna(0, inplace=True)
 81 | 
 82 |     return encoded_df
 83 | 
 84 | 
 85 | def normalize_data(df):
 86 |     '''
 87 |     Normalize data using Min-Max Scaler
 88 |     Input: The method takes pandas dataframe as input
 89 |     Output: Returns a dataframe with normalized features
 90 |     Example usage:
 91 |     normalized_df = normalize_data(df)
 92 |     '''
 93 |     val = df.values
 94 |     min_max_normalizer = preprocessing.MinMaxScaler()
 95 |     norm_val = min_max_normalizer.fit_transform(val)
 96 |     df2 = pd.DataFrame(norm_val)
 97 |     return df2
 98 | 
 99 | 
100 | def apply_pre_processing(data):
101 |     '''
102 |     Normalize data using Min-Max Scaler
103 |     Input: The method takes pandas dataframe as input
104 |     Output: Returns a dataframe with normalized features
105 |     Example usage:
106 |     normalized_df = normalize_data(df)
107 |     '''
108 |     features_to_encode = ['thal', 'slope', 'chest_pain_type', 'restecg']
109 |     encoded = encode_features(data, features_to_encode)
110 |     processed_data = normalize_data(encoded)
111 |     # Please note this is fabricated inference data,
112 |     # so just taking a small sample size
113 |     return processed_data
114 | 
115 | 
116 | processed_inference_data = apply_pre_processing(inference_data)
117 | 
118 | 
119 | # ### Load Saved Model
120 | model = joblib.load('aditya_model1_adaboost.joblib')
121 | 
122 | 
123 | # ### Prediction on inference data
124 | model.predict(processed_inference_data)
125 | 
126 | 
127 | # ### Scoring check on prediction
128 | accuracy_score(labels[-20:], model.predict(processed_inference_data))
129 | 


--------------------------------------------------------------------------------
/code documentation/Corrected/Heart Disease Inference.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # ## Heart Disease Classification
  5 | # In this script, we will try to look at
  6 | # the inference part of the heart disease classification solution
  7 | 
  8 | # ### Import Modules
  9 | import pandas as pd
 10 | import numpy as np
 11 | from sklearn import preprocessing
 12 | import joblib
 13 | from sklearn.metrics import accuracy_score
 14 | 
 15 | 
 16 | # ### Get Inference Data
 17 | # in real-time use cases, this method should be replaced with live flowing data
 18 | def get_inference_data():
 19 |     '''
 20 |     Method for loading inference data
 21 |     Example usage: inference_data, labels = get_inference_data()
 22 |     '''
 23 |     data = pd.read_csv("Data/heart.csv")
 24 |     data.drop_duplicates(subset=None, inplace=True)
 25 |     data.duplicated().any()
 26 |     df = data.sample(frac=1, random_state=2)
 27 |     # Taking last 20 records as an example only
 28 |     df = df.tail(20)
 29 |     return df[df.columns.drop('target')], df['target']
 30 | 
 31 | 
 32 | inference_data, labels = get_inference_data()
 33 | 
 34 | # ### Apply Same Pre-processing
 35 | 
 36 | # apply same pre-processing and feature engineering techniques
 37 | # as applied during the training process
 38 | 
 39 | 
 40 | def encode_features(df, features):
 41 |     '''
 42 |     Method for one-hot encoding all selected categorical fields
 43 |     Input: The method takes pandas dataframe and
 44 |     list of the feature names as input
 45 |     Output: Returns a dataframe with one-hot encoded features
 46 |     Example usage:
 47 |     one_hot_encoded_df = encode_features(dataframe, list_features_to_encode)
 48 |     '''
 49 |     # Implement these steps to prevent dimension mismatch during inference
 50 |     encoded_df = pd.DataFrame(columns=['age', 'sex', 'resting_bp',
 51 |                                        'cholestoral', 'fasting_blood_sugar',
 52 |                                        'max_hr', 'exang', 'oldpeak',
 53 |                                        'num_major_vessels', 'thal_0', 'thal_1',
 54 |                                        'thal_2', 'thal_3', 'slope_0',
 55 |                                        'slope_1', 'slope_2',
 56 |                                        'chest_pain_type_0',
 57 |                                        'chest_pain_type_1',
 58 |                                        'chest_pain_type_2',
 59 |                                        'chest_pain_type_3', 'restecg_0',
 60 |                                        'restecg_1', 'restecg_2'])
 61 |     placeholder_df = pd.DataFrame()
 62 | 
 63 |     # One-Hot Encoding using get_dummies for the specified categorical features
 64 |     for f in features:
 65 |         if(f in df.columns):
 66 |             encoded = pd.get_dummies(df[f])
 67 |             encoded = encoded.add_prefix(f + '_')
 68 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
 69 |         else:
 70 |             print('Feature not found')
 71 |             return df
 72 | 
 73 |     # Implement these steps to prevent dimension mismatch during inference
 74 |     for feature in encoded_df.columns:
 75 |         if feature in df.columns:
 76 |             encoded_df[feature] = df[feature]
 77 |         if feature in placeholder_df.columns:
 78 |             encoded_df[feature] = placeholder_df[feature]
 79 |     # fill all null values
 80 |     encoded_df.fillna(0, inplace=True)
 81 | 
 82 |     return encoded_df
 83 | 
 84 | 
 85 | def normalize_data(df):
 86 |     '''
 87 |     Normalize data using Min-Max Scaler
 88 |     Input: The method takes pandas dataframe as input
 89 |     Output: Returns a dataframe with normalized features
 90 |     Example usage:
 91 |     normalized_df = normalize_data(df)
 92 |     '''
 93 |     val = df.values
 94 |     min_max_normalizer = preprocessing.MinMaxScaler()
 95 |     norm_val = min_max_normalizer.fit_transform(val)
 96 |     df2 = pd.DataFrame(norm_val)
 97 |     return df2
 98 | 
 99 | 
100 | def apply_pre_processing(data):
101 |     '''
102 |     Normalize data using Min-Max Scaler
103 |     Input: The method takes pandas dataframe as input
104 |     Output: Returns a dataframe with normalized features
105 |     Example usage:
106 |     normalized_df = normalize_data(df)
107 |     '''
108 |     features_to_encode = ['thal', 'slope', 'chest_pain_type', 'restecg']
109 |     encoded = encode_features(data, features_to_encode)
110 |     processed_data = normalize_data(encoded)
111 |     # Please note this is fabricated inference data,
112 |     # so just taking a small sample size
113 |     return processed_data
114 | 
115 | 
116 | processed_inference_data = apply_pre_processing(inference_data)
117 | 
118 | 
119 | # ### Load Saved Model
120 | model = joblib.load('aditya_model1_adaboost.joblib')
121 | 
122 | 
123 | # ### Prediction on inference data
124 | model.predict(processed_inference_data)
125 | 
126 | 
127 | # ### Scoring check on prediction
128 | accuracy_score(labels[-20:], model.predict(processed_inference_data))
129 | 


--------------------------------------------------------------------------------
/unit testing/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # ## Heart Disease Classification
  5 | # In this script, we will try to look at
  6 | # the inference part of the heart disease classification solution
  7 | 
  8 | # ### Import Modules
  9 | import warnings
 10 | warnings.filterwarnings("ignore")
 11 | import pandas as pd
 12 | import numpy as np
 13 | from sklearn import preprocessing
 14 | import joblib
 15 | from sklearn.metrics import accuracy_score
 16 | import argparse
 17 | 
 18 | 
 19 | # ### Get Inference Data
 20 | # in real-time use cases, this method should be replaced with live flowing data
 21 | def get_inference_data():
 22 |     '''
 23 |     Method for loading inference data
 24 |     Example usage: inference_data, labels = get_inference_data()
 25 |     '''
 26 |     data = pd.read_csv("Data/heart.csv")
 27 |     data.drop_duplicates(subset=None, inplace=True)
 28 |     data.duplicated().any()
 29 |     df = data.sample(frac=1, random_state=2)
 30 |     # Taking last 20 records as an example only
 31 |     df = df.tail(20)
 32 |     return df[df.columns.drop('target')], df['target']
 33 | 
 34 | 
 35 | # ### Apply Same Pre-processing
 36 | 
 37 | # apply same pre-processing and feature engineering techniques
 38 | # as applied during the training process
 39 | 
 40 | 
 41 | def encode_features(df, features):
 42 |     '''
 43 |     Method for one-hot encoding all selected categorical fields
 44 |     Input: The method takes pandas dataframe and
 45 |     list of the feature names as input
 46 |     Output: Returns a dataframe with one-hot encoded features
 47 |     Example usage:
 48 |     one_hot_encoded_df = encode_features(dataframe, list_features_to_encode)
 49 |     '''
 50 |     # Implement these steps to prevent dimension mismatch during inference
 51 |     encoded_df = pd.DataFrame(columns=['age', 'sex', 'resting_bp',
 52 |                                        'cholestoral', 'fasting_blood_sugar',
 53 |                                        'max_hr', 'exang', 'oldpeak',
 54 |                                        'num_major_vessels', 'thal_0', 'thal_1',
 55 |                                        'thal_2', 'thal_3', 'slope_0',
 56 |                                        'slope_1', 'slope_2',
 57 |                                        'chest_pain_type_0',
 58 |                                        'chest_pain_type_1',
 59 |                                        'chest_pain_type_2',
 60 |                                        'chest_pain_type_3', 'restecg_0',
 61 |                                        'restecg_1', 'restecg_2'])
 62 |     placeholder_df = pd.DataFrame()
 63 | 
 64 |     # One-Hot Encoding using get_dummies for the specified categorical features
 65 |     for f in features:
 66 |         if(f in df.columns):
 67 |             encoded = pd.get_dummies(df[f])
 68 |             encoded = encoded.add_prefix(f + '_')
 69 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
 70 |         else:
 71 |             print('Feature not found')
 72 |             return df
 73 | 
 74 |     # Implement these steps to prevent dimension mismatch during inference
 75 |     for feature in encoded_df.columns:
 76 |         if feature in df.columns:
 77 |             encoded_df[feature] = df[feature]
 78 |         if feature in placeholder_df.columns:
 79 |             encoded_df[feature] = placeholder_df[feature]
 80 |     # fill all null values
 81 |     encoded_df.fillna(0, inplace=True)
 82 | 
 83 |     return encoded_df
 84 | 
 85 | 
 86 | def normalize_data(df):
 87 |     '''
 88 |     Normalize data using Min-Max Scaler
 89 |     Input: The method takes pandas dataframe as input
 90 |     Output: Returns a dataframe with normalized features
 91 |     Example usage:
 92 |     normalized_df = normalize_data(df)
 93 |     '''
 94 |     val = df.values
 95 |     min_max_normalizer = preprocessing.MinMaxScaler()
 96 |     norm_val = min_max_normalizer.fit_transform(val)
 97 |     df2 = pd.DataFrame(norm_val)
 98 |     return df2
 99 | 
100 | 
101 | def apply_pre_processing(data):
102 |     '''
103 |     Normalize data using Min-Max Scaler
104 |     Input: The method takes pandas dataframe as input
105 |     Output: Returns a dataframe with normalized features
106 |     Example usage:
107 |     normalized_df = normalize_data(df)
108 |     '''
109 |     features_to_encode = ['thal', 'slope', 'chest_pain_type', 'restecg']
110 |     encoded = encode_features(data, features_to_encode)
111 |     processed_data = normalize_data(encoded)
112 |     # Please note this is fabricated inference data,
113 |     # so just taking a small sample size
114 |     return processed_data
115 | 
116 | 
117 | # main function - starting point of the code
118 | def main(model_name):
119 |     '''
120 |     main function - starting point of the code
121 |     '''
122 |     print("Starting execution of the inference code...")
123 |     inference_data, labels = get_inference_data()
124 |     processed_inference_data = apply_pre_processing(inference_data)
125 |     # ### Load Saved Model
126 |     model = joblib.load(model_name)
127 |     # ### Prediction on inference data
128 |     model.predict(processed_inference_data)
129 |     # ### Scoring check on prediction
130 |     print("Checking inference accuracy:")
131 |     print(accuracy_score(labels[-20:], model.predict(processed_inference_data)))
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     parser = argparse.ArgumentParser(description='Running inference pipeline')
136 |     parser.add_argument('--model',
137 |                         default='adaboost',
138 |                         help='select algorithm: svm or adaboost')
139 |     args = parser.parse_args()
140 |     if(args.model == 'svm'):
141 |         model_name = 'aditya_model2_svm.joblib'
142 |     else:
143 |         model_name = 'aditya_model1_adaboost.joblib'
144 |     main(model_name)
145 | 


--------------------------------------------------------------------------------
/python scripting/Heart Disease Inference.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # ## Heart Disease Classification
  5 | # In this script, we will try to look at
  6 | # the inference part of the heart disease classification solution
  7 | 
  8 | # ### Import Modules
  9 | import warnings
 10 | warnings.filterwarnings("ignore")
 11 | import pandas as pd
 12 | import numpy as np
 13 | from sklearn import preprocessing
 14 | import joblib
 15 | from sklearn.metrics import accuracy_score
 16 | import argparse
 17 | 
 18 | 
 19 | # ### Get Inference Data
 20 | # in real-time use cases, this method should be replaced with live flowing data
 21 | def get_inference_data():
 22 |     '''
 23 |     Method for loading inference data
 24 |     Example usage: inference_data, labels = get_inference_data()
 25 |     '''
 26 |     data = pd.read_csv("Data/heart.csv")
 27 |     data.drop_duplicates(subset=None, inplace=True)
 28 |     data.duplicated().any()
 29 |     df = data.sample(frac=1, random_state=2)
 30 |     # Taking last 20 records as an example only
 31 |     df = df.tail(20)
 32 |     return df[df.columns.drop('target')], df['target']
 33 | 
 34 | 
 35 | # ### Apply Same Pre-processing
 36 | 
 37 | # apply same pre-processing and feature engineering techniques
 38 | # as applied during the training process
 39 | 
 40 | 
 41 | def encode_features(df, features):
 42 |     '''
 43 |     Method for one-hot encoding all selected categorical fields
 44 |     Input: The method takes pandas dataframe and
 45 |     list of the feature names as input
 46 |     Output: Returns a dataframe with one-hot encoded features
 47 |     Example usage:
 48 |     one_hot_encoded_df = encode_features(dataframe, list_features_to_encode)
 49 |     '''
 50 |     # Implement these steps to prevent dimension mismatch during inference
 51 |     encoded_df = pd.DataFrame(columns=['age', 'sex', 'resting_bp',
 52 |                                        'cholestoral', 'fasting_blood_sugar',
 53 |                                        'max_hr', 'exang', 'oldpeak',
 54 |                                        'num_major_vessels', 'thal_0', 'thal_1',
 55 |                                        'thal_2', 'thal_3', 'slope_0',
 56 |                                        'slope_1', 'slope_2',
 57 |                                        'chest_pain_type_0',
 58 |                                        'chest_pain_type_1',
 59 |                                        'chest_pain_type_2',
 60 |                                        'chest_pain_type_3', 'restecg_0',
 61 |                                        'restecg_1', 'restecg_2'])
 62 |     placeholder_df = pd.DataFrame()
 63 | 
 64 |     # One-Hot Encoding using get_dummies for the specified categorical features
 65 |     for f in features:
 66 |         if(f in df.columns):
 67 |             encoded = pd.get_dummies(df[f])
 68 |             encoded = encoded.add_prefix(f + '_')
 69 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
 70 |         else:
 71 |             print('Feature not found')
 72 |             return df
 73 | 
 74 |     # Implement these steps to prevent dimension mismatch during inference
 75 |     for feature in encoded_df.columns:
 76 |         if feature in df.columns:
 77 |             encoded_df[feature] = df[feature]
 78 |         if feature in placeholder_df.columns:
 79 |             encoded_df[feature] = placeholder_df[feature]
 80 |     # fill all null values
 81 |     encoded_df.fillna(0, inplace=True)
 82 | 
 83 |     return encoded_df
 84 | 
 85 | 
 86 | def normalize_data(df):
 87 |     '''
 88 |     Normalize data using Min-Max Scaler
 89 |     Input: The method takes pandas dataframe as input
 90 |     Output: Returns a dataframe with normalized features
 91 |     Example usage:
 92 |     normalized_df = normalize_data(df)
 93 |     '''
 94 |     val = df.values
 95 |     min_max_normalizer = preprocessing.MinMaxScaler()
 96 |     norm_val = min_max_normalizer.fit_transform(val)
 97 |     df2 = pd.DataFrame(norm_val)
 98 |     return df2
 99 | 
100 | 
101 | def apply_pre_processing(data):
102 |     '''
103 |     Normalize data using Min-Max Scaler
104 |     Input: The method takes pandas dataframe as input
105 |     Output: Returns a dataframe with normalized features
106 |     Example usage:
107 |     normalized_df = normalize_data(df)
108 |     '''
109 |     features_to_encode = ['thal', 'slope', 'chest_pain_type', 'restecg']
110 |     encoded = encode_features(data, features_to_encode)
111 |     processed_data = normalize_data(encoded)
112 |     # Please note this is fabricated inference data,
113 |     # so just taking a small sample size
114 |     return processed_data
115 | 
116 | 
117 | # main function - starting point of the code
118 | def main(model_name):
119 |     '''
120 |     main function - starting point of the code
121 |     '''
122 |     print("Starting execution of the inference code...")
123 |     inference_data, labels = get_inference_data()
124 |     processed_inference_data = apply_pre_processing(inference_data)
125 |     # ### Load Saved Model
126 |     model = joblib.load(model_name)
127 |     # ### Prediction on inference data
128 |     model.predict(processed_inference_data)
129 |     # ### Scoring check on prediction
130 |     print("Checking inference accuracy:")
131 |     print(accuracy_score(labels[-20:], model.predict(processed_inference_data)))
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     parser = argparse.ArgumentParser(description='Running inference pipeline')
136 |     parser.add_argument('--model',
137 |                         default='adaboost',
138 |                         help='select algorithm: svm or adaboost')
139 |     args = parser.parse_args()
140 |     if(args.model == 'svm'):
141 |         model_name = 'aditya_model2_svm.joblib'
142 |     else:
143 |         model_name = 'aditya_model1_adaboost.joblib'
144 |     main(model_name)


--------------------------------------------------------------------------------
/exception handling and logging/main_old.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # ## Heart Disease Classification
  5 | # In this script, we will try to look at
  6 | # the inference part of the heart disease classification solution
  7 | 
  8 | # ### Import Modules
  9 | import warnings
 10 | warnings.filterwarnings("ignore")
 11 | import pandas as pd
 12 | import numpy as np
 13 | from sklearn import preprocessing
 14 | import joblib
 15 | from sklearn.metrics import accuracy_score
 16 | import argparse
 17 | import logging
 18 | 
 19 | 
 20 | # ### Get Inference Data
 21 | # in real-time use cases, this method should be replaced with live flowing data
 22 | def get_inference_data():
 23 |     '''
 24 |     Method for loading inference data
 25 |     Example usage: inference_data, labels = get_inference_data()
 26 |     '''
 27 |     data = pd.read_csv("Data/heart.csv")
 28 |     data.drop_duplicates(subset=None, inplace=True)
 29 |     data.duplicated().any()
 30 |     df = data.sample(frac=1, random_state=2)
 31 |     # Taking last 20 records as an example only
 32 |     df = df.tail(20)
 33 |     return df[df.columns.drop('target')], df['target']
 34 | 
 35 | 
 36 | # ### Apply Same Pre-processing
 37 | 
 38 | # apply same pre-processing and feature engineering techniques
 39 | # as applied during the training process
 40 | 
 41 | 
 42 | def encode_features(df, features):
 43 |     '''
 44 |     Method for one-hot encoding all selected categorical fields
 45 |     Input: The method takes pandas dataframe and
 46 |     list of the feature names as input
 47 |     Output: Returns a dataframe with one-hot encoded features
 48 |     Example usage:
 49 |     one_hot_encoded_df = encode_features(dataframe, list_features_to_encode)
 50 |     '''
 51 |     # Implement these steps to prevent dimension mismatch during inference
 52 |     encoded_df = pd.DataFrame(columns=['age', 'sex', 'resting_bp',
 53 |                                        'cholestoral', 'fasting_blood_sugar',
 54 |                                        'max_hr', 'exang', 'oldpeak',
 55 |                                        'num_major_vessels', 'thal_0', 'thal_1',
 56 |                                        'thal_2', 'thal_3', 'slope_0',
 57 |                                        'slope_1', 'slope_2',
 58 |                                        'chest_pain_type_0',
 59 |                                        'chest_pain_type_1',
 60 |                                        'chest_pain_type_2',
 61 |                                        'chest_pain_type_3', 'restecg_0',
 62 |                                        'restecg_1', 'restecg_2'])
 63 |     placeholder_df = pd.DataFrame()
 64 | 
 65 |     # One-Hot Encoding using get_dummies for the specified categorical features
 66 |     for f in features:
 67 |         if(f in df.columns):
 68 |             encoded = pd.get_dummies(df[f])
 69 |             encoded = encoded.add_prefix(f + '_')
 70 |             placeholder_df = pd.concat([placeholder_df, encoded], axis=1)
 71 |         else:
 72 |             print('Feature not found')
 73 |             return df
 74 | 
 75 |     # Implement these steps to prevent dimension mismatch during inference
 76 |     for feature in encoded_df.columns:
 77 |         if feature in df.columns:
 78 |             encoded_df[feature] = df[feature]
 79 |         if feature in placeholder_df.columns:
 80 |             encoded_df[feature] = placeholder_df[feature]
 81 |     # fill all null values
 82 |     encoded_df.fillna(0, inplace=True)
 83 | 
 84 |     return encoded_df
 85 | 
 86 | 
 87 | def normalize_data(df):
 88 |     '''
 89 |     Normalize data using Min-Max Scaler
 90 |     Input: The method takes pandas dataframe as input
 91 |     Output: Returns a dataframe with normalized features
 92 |     Example usage:
 93 |     normalized_df = normalize_data(df)
 94 |     '''
 95 |     val = df.values
 96 |     min_max_normalizer = preprocessing.MinMaxScaler()
 97 |     norm_val = min_max_normalizer.fit_transform(val)
 98 |     df2 = pd.DataFrame(norm_val)
 99 |     return df2
100 | 
101 | 
102 | def apply_pre_processing(data):
103 |     '''
104 |     Normalize data using Min-Max Scaler
105 |     Input: The method takes pandas dataframe as input
106 |     Output: Returns a dataframe with normalized features
107 |     Example usage:
108 |     normalized_df = normalize_data(df)
109 |     '''
110 |     features_to_encode = ['thal', 'slope', 'chest_pain_type', 'restecg']
111 |     encoded = encode_features(data, features_to_encode)
112 |     processed_data = normalize_data(encoded)
113 |     # Please note this is fabricated inference data,
114 |     # so just taking a small sample size
115 |     return processed_data
116 | 
117 | 
118 | # main function - starting point of the code
119 | def main(model_name, logger):
120 |     '''
121 |     main function - starting point of the code
122 |     '''
123 |     try:
124 |         print("Starting execution of the inference code...")
125 |         logger.info("Started execution. Fetching data now ...")
126 |         inference_data, labels = get_inference_data()
127 |         logger.info("Data fetched. Applying pre-processing now ...")
128 |         processed_inference_data = apply_pre_processing(inference_data)
129 |         # ### Load Saved Model
130 |         logger.info("Pre-processing is completed. Loading trained model now ...")
131 |         model = joblib.load(model_name)
132 |         logger.info("Trained model is loaded. Executing trained model on inference data ...")
133 |         # ### Prediction on inference data
134 |         model.predict(processed_inference_data)
135 |         # ### Scoring check on prediction
136 |         print("Checking inference accuracy:")
137 |         print(accuracy_score(labels[-20:], model.predict(processed_inference_data)))
138 |         logger.info("Execution is complete.")
139 |     except Exception as e:
140 |         print("--------Error!!!--------")
141 |         logger.error("Encountered error. Please check.")
142 |         logger.error(e)
143 |         print(e)
144 | 
145 | 
146 | 
147 | if __name__ == "__main__":
148 |     # Create and configure logger
149 |     logging.basicConfig(filename="inference_pipe_exec.log",
150 |                     format='%(asctime)s %(message)s',
151 |                     filemode='a')
152 | 
153 |     # Creating an object
154 |     logger = logging.getLogger()
155 | 
156 |     # Setting the threshold of logger to DEBUG
157 |     logger.setLevel(logging.DEBUG)
158 | 
159 |     parser = argparse.ArgumentParser(description='Running inference pipeline')
160 |     parser.add_argument('--model',
161 |                         default='adaboost',
162 |                         help='select algorithm: svm or adaboost')
163 |     args = parser.parse_args()
164 |     if(args.model == 'svm'):
165 |         model_name = 'aditya_model2_svm.joblib'
166 |     else:
167 |         model_name = 'aditya_model1_adaboost.joblib'
168 |     main(model_name, logger)


--------------------------------------------------------------------------------
/unit testing/Data/heart.csv:
--------------------------------------------------------------------------------
  1 | age,sex,chest_pain_type,resting_bp,cholestoral,fasting_blood_sugar,restecg,max_hr,exang,oldpeak,slope,num_major_vessels,thal,target
  2 | 63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
  3 | 37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
  4 | 41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
  5 | 56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
  6 | 57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
  7 | 57,1,0,140,192,0,1,148,0,0.4,1,0,1,1
  8 | 56,0,1,140,294,0,0,153,0,1.3,1,0,2,1
  9 | 44,1,1,120,263,0,1,173,0,0,2,0,3,1
 10 | 52,1,2,172,199,1,1,162,0,0.5,2,0,3,1
 11 | 57,1,2,150,168,0,1,174,0,1.6,2,0,2,1
 12 | 54,1,0,140,239,0,1,160,0,1.2,2,0,2,1
 13 | 48,0,2,130,275,0,1,139,0,0.2,2,0,2,1
 14 | 49,1,1,130,266,0,1,171,0,0.6,2,0,2,1
 15 | 64,1,3,110,211,0,0,144,1,1.8,1,0,2,1
 16 | 58,0,3,150,283,1,0,162,0,1,2,0,2,1
 17 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1
 18 | 58,0,2,120,340,0,1,172,0,0,2,0,2,1
 19 | 66,0,3,150,226,0,1,114,0,2.6,0,0,2,1
 20 | 43,1,0,150,247,0,1,171,0,1.5,2,0,2,1
 21 | 69,0,3,140,239,0,1,151,0,1.8,2,2,2,1
 22 | 59,1,0,135,234,0,1,161,0,0.5,1,0,3,1
 23 | 44,1,2,130,233,0,1,179,1,0.4,2,0,2,1
 24 | 42,1,0,140,226,0,1,178,0,0,2,0,2,1
 25 | 61,1,2,150,243,1,1,137,1,1,1,0,2,1
 26 | 40,1,3,140,199,0,1,178,1,1.4,2,0,3,1
 27 | 71,0,1,160,302,0,1,162,0,0.4,2,2,2,1
 28 | 59,1,2,150,212,1,1,157,0,1.6,2,0,2,1
 29 | 51,1,2,110,175,0,1,123,0,0.6,2,0,2,1
 30 | 65,0,2,140,417,1,0,157,0,0.8,2,1,2,1
 31 | 53,1,2,130,197,1,0,152,0,1.2,0,0,2,1
 32 | 41,0,1,105,198,0,1,168,0,0,2,1,2,1
 33 | 65,1,0,120,177,0,1,140,0,0.4,2,0,3,1
 34 | 44,1,1,130,219,0,0,188,0,0,2,0,2,1
 35 | 54,1,2,125,273,0,0,152,0,0.5,0,1,2,1
 36 | 51,1,3,125,213,0,0,125,1,1.4,2,1,2,1
 37 | 46,0,2,142,177,0,0,160,1,1.4,0,0,2,1
 38 | 54,0,2,135,304,1,1,170,0,0,2,0,2,1
 39 | 54,1,2,150,232,0,0,165,0,1.6,2,0,3,1
 40 | 65,0,2,155,269,0,1,148,0,0.8,2,0,2,1
 41 | 65,0,2,160,360,0,0,151,0,0.8,2,0,2,1
 42 | 51,0,2,140,308,0,0,142,0,1.5,2,1,2,1
 43 | 48,1,1,130,245,0,0,180,0,0.2,1,0,2,1
 44 | 45,1,0,104,208,0,0,148,1,3,1,0,2,1
 45 | 53,0,0,130,264,0,0,143,0,0.4,1,0,2,1
 46 | 39,1,2,140,321,0,0,182,0,0,2,0,2,1
 47 | 52,1,1,120,325,0,1,172,0,0.2,2,0,2,1
 48 | 44,1,2,140,235,0,0,180,0,0,2,0,2,1
 49 | 47,1,2,138,257,0,0,156,0,0,2,0,2,1
 50 | 53,0,2,128,216,0,0,115,0,0,2,0,0,1
 51 | 53,0,0,138,234,0,0,160,0,0,2,0,2,1
 52 | 51,0,2,130,256,0,0,149,0,0.5,2,0,2,1
 53 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1
 54 | 62,1,2,130,231,0,1,146,0,1.8,1,3,3,1
 55 | 44,0,2,108,141,0,1,175,0,0.6,1,0,2,1
 56 | 63,0,2,135,252,0,0,172,0,0,2,0,2,1
 57 | 52,1,1,134,201,0,1,158,0,0.8,2,1,2,1
 58 | 48,1,0,122,222,0,0,186,0,0,2,0,2,1
 59 | 45,1,0,115,260,0,0,185,0,0,2,0,2,1
 60 | 34,1,3,118,182,0,0,174,0,0,2,0,2,1
 61 | 57,0,0,128,303,0,0,159,0,0,2,1,2,1
 62 | 71,0,2,110,265,1,0,130,0,0,2,1,2,1
 63 | 54,1,1,108,309,0,1,156,0,0,2,0,3,1
 64 | 52,1,3,118,186,0,0,190,0,0,1,0,1,1
 65 | 41,1,1,135,203,0,1,132,0,0,1,0,1,1
 66 | 58,1,2,140,211,1,0,165,0,0,2,0,2,1
 67 | 35,0,0,138,183,0,1,182,0,1.4,2,0,2,1
 68 | 51,1,2,100,222,0,1,143,1,1.2,1,0,2,1
 69 | 45,0,1,130,234,0,0,175,0,0.6,1,0,2,1
 70 | 44,1,1,120,220,0,1,170,0,0,2,0,2,1
 71 | 62,0,0,124,209,0,1,163,0,0,2,0,2,1
 72 | 54,1,2,120,258,0,0,147,0,0.4,1,0,3,1
 73 | 51,1,2,94,227,0,1,154,1,0,2,1,3,1
 74 | 29,1,1,130,204,0,0,202,0,0,2,0,2,1
 75 | 51,1,0,140,261,0,0,186,1,0,2,0,2,1
 76 | 43,0,2,122,213,0,1,165,0,0.2,1,0,2,1
 77 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1
 78 | 51,1,2,125,245,1,0,166,0,2.4,1,0,2,1
 79 | 59,1,1,140,221,0,1,164,1,0,2,0,2,1
 80 | 52,1,1,128,205,1,1,184,0,0,2,0,2,1
 81 | 58,1,2,105,240,0,0,154,1,0.6,1,0,3,1
 82 | 41,1,2,112,250,0,1,179,0,0,2,0,2,1
 83 | 45,1,1,128,308,0,0,170,0,0,2,0,2,1
 84 | 60,0,2,102,318,0,1,160,0,0,2,1,2,1
 85 | 52,1,3,152,298,1,1,178,0,1.2,1,0,3,1
 86 | 42,0,0,102,265,0,0,122,0,0.6,1,0,2,1
 87 | 67,0,2,115,564,0,0,160,0,1.6,1,0,3,1
 88 | 68,1,2,118,277,0,1,151,0,1,2,1,3,1
 89 | 46,1,1,101,197,1,1,156,0,0,2,0,3,1
 90 | 54,0,2,110,214,0,1,158,0,1.6,1,0,2,1
 91 | 58,0,0,100,248,0,0,122,0,1,1,0,2,1
 92 | 48,1,2,124,255,1,1,175,0,0,2,2,2,1
 93 | 57,1,0,132,207,0,1,168,1,0,2,0,3,1
 94 | 52,1,2,138,223,0,1,169,0,0,2,4,2,1
 95 | 54,0,1,132,288,1,0,159,1,0,2,1,2,1
 96 | 45,0,1,112,160,0,1,138,0,0,1,0,2,1
 97 | 53,1,0,142,226,0,0,111,1,0,2,0,3,1
 98 | 62,0,0,140,394,0,0,157,0,1.2,1,0,2,1
 99 | 52,1,0,108,233,1,1,147,0,0.1,2,3,3,1
100 | 43,1,2,130,315,0,1,162,0,1.9,2,1,2,1
101 | 53,1,2,130,246,1,0,173,0,0,2,3,2,1
102 | 42,1,3,148,244,0,0,178,0,0.8,2,2,2,1
103 | 59,1,3,178,270,0,0,145,0,4.2,0,0,3,1
104 | 63,0,1,140,195,0,1,179,0,0,2,2,2,1
105 | 42,1,2,120,240,1,1,194,0,0.8,0,0,3,1
106 | 50,1,2,129,196,0,1,163,0,0,2,0,2,1
107 | 68,0,2,120,211,0,0,115,0,1.5,1,0,2,1
108 | 69,1,3,160,234,1,0,131,0,0.1,1,1,2,1
109 | 45,0,0,138,236,0,0,152,1,0.2,1,0,2,1
110 | 50,0,1,120,244,0,1,162,0,1.1,2,0,2,1
111 | 50,0,0,110,254,0,0,159,0,0,2,0,2,1
112 | 64,0,0,180,325,0,1,154,1,0,2,0,2,1
113 | 57,1,2,150,126,1,1,173,0,0.2,2,1,3,1
114 | 64,0,2,140,313,0,1,133,0,0.2,2,0,3,1
115 | 43,1,0,110,211,0,1,161,0,0,2,0,3,1
116 | 55,1,1,130,262,0,1,155,0,0,2,0,2,1
117 | 37,0,2,120,215,0,1,170,0,0,2,0,2,1
118 | 41,1,2,130,214,0,0,168,0,2,1,0,2,1
119 | 56,1,3,120,193,0,0,162,0,1.9,1,0,3,1
120 | 46,0,1,105,204,0,1,172,0,0,2,0,2,1
121 | 46,0,0,138,243,0,0,152,1,0,1,0,2,1
122 | 64,0,0,130,303,0,1,122,0,2,1,2,2,1
123 | 59,1,0,138,271,0,0,182,0,0,2,0,2,1
124 | 41,0,2,112,268,0,0,172,1,0,2,0,2,1
125 | 54,0,2,108,267,0,0,167,0,0,2,0,2,1
126 | 39,0,2,94,199,0,1,179,0,0,2,0,2,1
127 | 34,0,1,118,210,0,1,192,0,0.7,2,0,2,1
128 | 47,1,0,112,204,0,1,143,0,0.1,2,0,2,1
129 | 67,0,2,152,277,0,1,172,0,0,2,1,2,1
130 | 52,0,2,136,196,0,0,169,0,0.1,1,0,2,1
131 | 74,0,1,120,269,0,0,121,1,0.2,2,1,2,1
132 | 54,0,2,160,201,0,1,163,0,0,2,1,2,1
133 | 49,0,1,134,271,0,1,162,0,0,1,0,2,1
134 | 42,1,1,120,295,0,1,162,0,0,2,0,2,1
135 | 41,1,1,110,235,0,1,153,0,0,2,0,2,1
136 | 41,0,1,126,306,0,1,163,0,0,2,0,2,1
137 | 49,0,0,130,269,0,1,163,0,0,2,0,2,1
138 | 60,0,2,120,178,1,1,96,0,0,2,0,2,1
139 | 62,1,1,128,208,1,0,140,0,0,2,0,2,1
140 | 57,1,0,110,201,0,1,126,1,1.5,1,0,1,1
141 | 64,1,0,128,263,0,1,105,1,0.2,1,1,3,1
142 | 51,0,2,120,295,0,0,157,0,0.6,2,0,2,1
143 | 43,1,0,115,303,0,1,181,0,1.2,1,0,2,1
144 | 42,0,2,120,209,0,1,173,0,0,1,0,2,1
145 | 67,0,0,106,223,0,1,142,0,0.3,2,2,2,1
146 | 76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
147 | 70,1,1,156,245,0,0,143,0,0,2,0,2,1
148 | 44,0,2,118,242,0,1,149,0,0.3,1,1,2,1
149 | 60,0,3,150,240,0,1,171,0,0.9,2,0,2,1
150 | 44,1,2,120,226,0,1,169,0,0,2,0,2,1
151 | 42,1,2,130,180,0,1,150,0,0,2,0,2,1
152 | 66,1,0,160,228,0,0,138,0,2.3,2,0,1,1
153 | 71,0,0,112,149,0,1,125,0,1.6,1,0,2,1
154 | 64,1,3,170,227,0,0,155,0,0.6,1,0,3,1
155 | 66,0,2,146,278,0,0,152,0,0,1,1,2,1
156 | 39,0,2,138,220,0,1,152,0,0,1,0,2,1
157 | 58,0,0,130,197,0,1,131,0,0.6,1,0,2,1
158 | 47,1,2,130,253,0,1,179,0,0,2,0,2,1
159 | 35,1,1,122,192,0,1,174,0,0,2,0,2,1
160 | 58,1,1,125,220,0,1,144,0,0.4,1,4,3,1
161 | 56,1,1,130,221,0,0,163,0,0,2,0,3,1
162 | 56,1,1,120,240,0,1,169,0,0,0,0,2,1
163 | 55,0,1,132,342,0,1,166,0,1.2,2,0,2,1
164 | 41,1,1,120,157,0,1,182,0,0,2,0,2,1
165 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1
166 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1
167 | 67,1,0,160,286,0,0,108,1,1.5,1,3,2,0
168 | 67,1,0,120,229,0,0,129,1,2.6,1,2,3,0
169 | 62,0,0,140,268,0,0,160,0,3.6,0,2,2,0
170 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0
171 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
172 | 56,1,2,130,256,1,0,142,1,0.6,1,1,1,0
173 | 48,1,1,110,229,0,1,168,0,1,0,0,3,0
174 | 58,1,1,120,284,0,0,160,0,1.8,1,0,2,0
175 | 58,1,2,132,224,0,0,173,0,3.2,2,2,3,0
176 | 60,1,0,130,206,0,0,132,1,2.4,1,2,3,0
177 | 40,1,0,110,167,0,0,114,1,2,1,0,3,0
178 | 60,1,0,117,230,1,1,160,1,1.4,2,2,3,0
179 | 64,1,2,140,335,0,1,158,0,0,2,0,2,0
180 | 43,1,0,120,177,0,0,120,1,2.5,1,0,3,0
181 | 57,1,0,150,276,0,0,112,1,0.6,1,1,1,0
182 | 55,1,0,132,353,0,1,132,1,1.2,1,1,3,0
183 | 65,0,0,150,225,0,0,114,0,1,1,3,3,0
184 | 61,0,0,130,330,0,0,169,0,0,2,0,2,0
185 | 58,1,2,112,230,0,0,165,0,2.5,1,1,3,0
186 | 50,1,0,150,243,0,0,128,0,2.6,1,0,3,0
187 | 44,1,0,112,290,0,0,153,0,0,2,1,2,0
188 | 60,1,0,130,253,0,1,144,1,1.4,2,1,3,0
189 | 54,1,0,124,266,0,0,109,1,2.2,1,1,3,0
190 | 50,1,2,140,233,0,1,163,0,0.6,1,1,3,0
191 | 41,1,0,110,172,0,0,158,0,0,2,0,3,0
192 | 51,0,0,130,305,0,1,142,1,1.2,1,0,3,0
193 | 58,1,0,128,216,0,0,131,1,2.2,1,3,3,0
194 | 54,1,0,120,188,0,1,113,0,1.4,1,1,3,0
195 | 60,1,0,145,282,0,0,142,1,2.8,1,2,3,0
196 | 60,1,2,140,185,0,0,155,0,3,1,0,2,0
197 | 59,1,0,170,326,0,0,140,1,3.4,0,0,3,0
198 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0
199 | 67,1,0,125,254,1,1,163,0,0.2,1,2,3,0
200 | 62,1,0,120,267,0,1,99,1,1.8,1,2,3,0
201 | 65,1,0,110,248,0,0,158,0,0.6,2,2,1,0
202 | 44,1,0,110,197,0,0,177,0,0,2,1,2,0
203 | 60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
204 | 58,1,0,150,270,0,0,111,1,0.8,2,0,3,0
205 | 68,1,2,180,274,1,0,150,1,1.6,1,0,3,0
206 | 62,0,0,160,164,0,0,145,0,6.2,0,3,3,0
207 | 52,1,0,128,255,0,1,161,1,0,2,1,3,0
208 | 59,1,0,110,239,0,0,142,1,1.2,1,1,3,0
209 | 60,0,0,150,258,0,0,157,0,2.6,1,2,3,0
210 | 49,1,2,120,188,0,1,139,0,2,1,3,3,0
211 | 59,1,0,140,177,0,1,162,1,0,2,1,3,0
212 | 57,1,2,128,229,0,0,150,0,0.4,1,1,3,0
213 | 61,1,0,120,260,0,1,140,1,3.6,1,1,3,0
214 | 39,1,0,118,219,0,1,140,0,1.2,1,0,3,0
215 | 61,0,0,145,307,0,0,146,1,1,1,0,3,0
216 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0
217 | 43,0,0,132,341,1,0,136,1,3,1,0,3,0
218 | 62,0,2,130,263,0,1,97,0,1.2,1,1,3,0
219 | 63,1,0,130,330,1,0,132,1,1.8,2,3,3,0
220 | 65,1,0,135,254,0,0,127,0,2.8,1,1,3,0
221 | 48,1,0,130,256,1,0,150,1,0,2,2,3,0
222 | 63,0,0,150,407,0,0,154,0,4,1,3,3,0
223 | 55,1,0,140,217,0,1,111,1,5.6,0,0,3,0
224 | 65,1,3,138,282,1,0,174,0,1.4,1,1,2,0
225 | 56,0,0,200,288,1,0,133,1,4,0,2,3,0
226 | 54,1,0,110,239,0,1,126,1,2.8,1,1,3,0
227 | 70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
228 | 62,1,1,120,281,0,0,103,0,1.4,1,1,3,0
229 | 35,1,0,120,198,0,1,130,1,1.6,1,0,3,0
230 | 59,1,3,170,288,0,0,159,0,0.2,1,0,3,0
231 | 64,1,2,125,309,0,1,131,1,1.8,1,0,3,0
232 | 47,1,2,108,243,0,1,152,0,0,2,0,2,0
233 | 57,1,0,165,289,1,0,124,0,1,1,3,3,0
234 | 55,1,0,160,289,0,0,145,1,0.8,1,1,3,0
235 | 64,1,0,120,246,0,0,96,1,2.2,0,1,2,0
236 | 70,1,0,130,322,0,0,109,0,2.4,1,3,2,0
237 | 51,1,0,140,299,0,1,173,1,1.6,2,0,3,0
238 | 58,1,0,125,300,0,0,171,0,0,2,2,3,0
239 | 60,1,0,140,293,0,0,170,0,1.2,1,2,3,0
240 | 77,1,0,125,304,0,0,162,1,0,2,3,2,0
241 | 35,1,0,126,282,0,0,156,1,0,2,0,3,0
242 | 70,1,2,160,269,0,1,112,1,2.9,1,1,3,0
243 | 59,0,0,174,249,0,1,143,1,0,1,0,2,0
244 | 64,1,0,145,212,0,0,132,0,2,1,2,1,0
245 | 57,1,0,152,274,0,1,88,1,1.2,1,1,3,0
246 | 56,1,0,132,184,0,0,105,1,2.1,1,1,1,0
247 | 48,1,0,124,274,0,0,166,0,0.5,1,0,3,0
248 | 56,0,0,134,409,0,0,150,1,1.9,1,2,3,0
249 | 66,1,1,160,246,0,1,120,1,0,1,3,1,0
250 | 54,1,1,192,283,0,0,195,0,0,2,1,3,0
251 | 69,1,2,140,254,0,0,146,0,2,1,3,3,0
252 | 51,1,0,140,298,0,1,122,1,4.2,1,3,3,0
253 | 43,1,0,132,247,1,0,143,1,0.1,1,4,3,0
254 | 62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
255 | 67,1,0,100,299,0,0,125,1,0.9,1,2,2,0
256 | 59,1,3,160,273,0,0,125,0,0,2,0,2,0
257 | 45,1,0,142,309,0,0,147,1,0,1,3,3,0
258 | 58,1,0,128,259,0,0,130,1,3,1,2,3,0
259 | 50,1,0,144,200,0,0,126,1,0.9,1,0,3,0
260 | 62,0,0,150,244,0,1,154,1,1.4,1,0,2,0
261 | 38,1,3,120,231,0,1,182,1,3.8,1,0,3,0
262 | 66,0,0,178,228,1,1,165,1,1,1,2,3,0
263 | 52,1,0,112,230,0,1,160,0,0,2,1,2,0
264 | 53,1,0,123,282,0,1,95,1,2,1,2,3,0
265 | 63,0,0,108,269,0,1,169,1,1.8,1,2,2,0
266 | 54,1,0,110,206,0,0,108,1,0,1,1,2,0
267 | 66,1,0,112,212,0,0,132,1,0.1,2,1,2,0
268 | 55,0,0,180,327,0,2,117,1,3.4,1,0,2,0
269 | 49,1,2,118,149,0,0,126,0,0.8,2,3,2,0
270 | 54,1,0,122,286,0,0,116,1,3.2,1,2,2,0
271 | 56,1,0,130,283,1,0,103,1,1.6,0,0,3,0
272 | 46,1,0,120,249,0,0,144,0,0.8,2,0,3,0
273 | 61,1,3,134,234,0,1,145,0,2.6,1,2,2,0
274 | 67,1,0,120,237,0,1,71,0,1,1,0,2,0
275 | 58,1,0,100,234,0,1,156,0,0.1,2,1,3,0
276 | 47,1,0,110,275,0,0,118,1,1,1,1,2,0
277 | 52,1,0,125,212,0,1,168,0,1,2,2,3,0
278 | 58,1,0,146,218,0,1,105,0,2,1,1,3,0
279 | 57,1,1,124,261,0,1,141,0,0.3,2,0,3,0
280 | 58,0,1,136,319,1,0,152,0,0,2,2,2,0
281 | 61,1,0,138,166,0,0,125,1,3.6,1,1,2,0
282 | 42,1,0,136,315,0,1,125,1,1.8,1,0,1,0
283 | 52,1,0,128,204,1,1,156,1,1,1,0,0,0
284 | 59,1,2,126,218,1,1,134,0,2.2,1,1,1,0
285 | 40,1,0,152,223,0,1,181,0,0,2,0,3,0
286 | 61,1,0,140,207,0,0,138,1,1.9,2,1,3,0
287 | 46,1,0,140,311,0,1,120,1,1.8,1,2,3,0
288 | 59,1,3,134,204,0,1,162,0,0.8,2,2,2,0
289 | 57,1,1,154,232,0,0,164,0,0,2,1,2,0
290 | 57,1,0,110,335,0,1,143,1,3,1,1,3,0
291 | 55,0,0,128,205,0,2,130,1,2,1,1,3,0
292 | 61,1,0,148,203,0,1,161,0,0,2,1,3,0
293 | 58,1,0,114,318,0,2,140,0,4.4,0,3,1,0
294 | 58,0,0,170,225,1,0,146,1,2.8,1,2,1,0
295 | 67,1,2,152,212,0,0,150,0,0.8,1,0,3,0
296 | 44,1,0,120,169,0,1,144,1,2.8,0,0,1,0
297 | 63,1,0,140,187,0,0,144,1,4,2,2,3,0
298 | 63,0,0,124,197,0,1,136,1,0,1,0,2,0
299 | 59,1,0,164,176,1,0,90,0,1,1,2,1,0
300 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
301 | 45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
302 | 68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
303 | 57,1,0,130,131,0,1,115,1,1.2,1,1,3,0
304 | 57,0,1,130,236,0,0,174,0,0,1,1,2,0
305 | 


--------------------------------------------------------------------------------
/python scripting/Data/heart.csv:
--------------------------------------------------------------------------------
  1 | age,sex,chest_pain_type,resting_bp,cholestoral,fasting_blood_sugar,restecg,max_hr,exang,oldpeak,slope,num_major_vessels,thal,target
  2 | 63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
  3 | 37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
  4 | 41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
  5 | 56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
  6 | 57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
  7 | 57,1,0,140,192,0,1,148,0,0.4,1,0,1,1
  8 | 56,0,1,140,294,0,0,153,0,1.3,1,0,2,1
  9 | 44,1,1,120,263,0,1,173,0,0,2,0,3,1
 10 | 52,1,2,172,199,1,1,162,0,0.5,2,0,3,1
 11 | 57,1,2,150,168,0,1,174,0,1.6,2,0,2,1
 12 | 54,1,0,140,239,0,1,160,0,1.2,2,0,2,1
 13 | 48,0,2,130,275,0,1,139,0,0.2,2,0,2,1
 14 | 49,1,1,130,266,0,1,171,0,0.6,2,0,2,1
 15 | 64,1,3,110,211,0,0,144,1,1.8,1,0,2,1
 16 | 58,0,3,150,283,1,0,162,0,1,2,0,2,1
 17 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1
 18 | 58,0,2,120,340,0,1,172,0,0,2,0,2,1
 19 | 66,0,3,150,226,0,1,114,0,2.6,0,0,2,1
 20 | 43,1,0,150,247,0,1,171,0,1.5,2,0,2,1
 21 | 69,0,3,140,239,0,1,151,0,1.8,2,2,2,1
 22 | 59,1,0,135,234,0,1,161,0,0.5,1,0,3,1
 23 | 44,1,2,130,233,0,1,179,1,0.4,2,0,2,1
 24 | 42,1,0,140,226,0,1,178,0,0,2,0,2,1
 25 | 61,1,2,150,243,1,1,137,1,1,1,0,2,1
 26 | 40,1,3,140,199,0,1,178,1,1.4,2,0,3,1
 27 | 71,0,1,160,302,0,1,162,0,0.4,2,2,2,1
 28 | 59,1,2,150,212,1,1,157,0,1.6,2,0,2,1
 29 | 51,1,2,110,175,0,1,123,0,0.6,2,0,2,1
 30 | 65,0,2,140,417,1,0,157,0,0.8,2,1,2,1
 31 | 53,1,2,130,197,1,0,152,0,1.2,0,0,2,1
 32 | 41,0,1,105,198,0,1,168,0,0,2,1,2,1
 33 | 65,1,0,120,177,0,1,140,0,0.4,2,0,3,1
 34 | 44,1,1,130,219,0,0,188,0,0,2,0,2,1
 35 | 54,1,2,125,273,0,0,152,0,0.5,0,1,2,1
 36 | 51,1,3,125,213,0,0,125,1,1.4,2,1,2,1
 37 | 46,0,2,142,177,0,0,160,1,1.4,0,0,2,1
 38 | 54,0,2,135,304,1,1,170,0,0,2,0,2,1
 39 | 54,1,2,150,232,0,0,165,0,1.6,2,0,3,1
 40 | 65,0,2,155,269,0,1,148,0,0.8,2,0,2,1
 41 | 65,0,2,160,360,0,0,151,0,0.8,2,0,2,1
 42 | 51,0,2,140,308,0,0,142,0,1.5,2,1,2,1
 43 | 48,1,1,130,245,0,0,180,0,0.2,1,0,2,1
 44 | 45,1,0,104,208,0,0,148,1,3,1,0,2,1
 45 | 53,0,0,130,264,0,0,143,0,0.4,1,0,2,1
 46 | 39,1,2,140,321,0,0,182,0,0,2,0,2,1
 47 | 52,1,1,120,325,0,1,172,0,0.2,2,0,2,1
 48 | 44,1,2,140,235,0,0,180,0,0,2,0,2,1
 49 | 47,1,2,138,257,0,0,156,0,0,2,0,2,1
 50 | 53,0,2,128,216,0,0,115,0,0,2,0,0,1
 51 | 53,0,0,138,234,0,0,160,0,0,2,0,2,1
 52 | 51,0,2,130,256,0,0,149,0,0.5,2,0,2,1
 53 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1
 54 | 62,1,2,130,231,0,1,146,0,1.8,1,3,3,1
 55 | 44,0,2,108,141,0,1,175,0,0.6,1,0,2,1
 56 | 63,0,2,135,252,0,0,172,0,0,2,0,2,1
 57 | 52,1,1,134,201,0,1,158,0,0.8,2,1,2,1
 58 | 48,1,0,122,222,0,0,186,0,0,2,0,2,1
 59 | 45,1,0,115,260,0,0,185,0,0,2,0,2,1
 60 | 34,1,3,118,182,0,0,174,0,0,2,0,2,1
 61 | 57,0,0,128,303,0,0,159,0,0,2,1,2,1
 62 | 71,0,2,110,265,1,0,130,0,0,2,1,2,1
 63 | 54,1,1,108,309,0,1,156,0,0,2,0,3,1
 64 | 52,1,3,118,186,0,0,190,0,0,1,0,1,1
 65 | 41,1,1,135,203,0,1,132,0,0,1,0,1,1
 66 | 58,1,2,140,211,1,0,165,0,0,2,0,2,1
 67 | 35,0,0,138,183,0,1,182,0,1.4,2,0,2,1
 68 | 51,1,2,100,222,0,1,143,1,1.2,1,0,2,1
 69 | 45,0,1,130,234,0,0,175,0,0.6,1,0,2,1
 70 | 44,1,1,120,220,0,1,170,0,0,2,0,2,1
 71 | 62,0,0,124,209,0,1,163,0,0,2,0,2,1
 72 | 54,1,2,120,258,0,0,147,0,0.4,1,0,3,1
 73 | 51,1,2,94,227,0,1,154,1,0,2,1,3,1
 74 | 29,1,1,130,204,0,0,202,0,0,2,0,2,1
 75 | 51,1,0,140,261,0,0,186,1,0,2,0,2,1
 76 | 43,0,2,122,213,0,1,165,0,0.2,1,0,2,1
 77 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1
 78 | 51,1,2,125,245,1,0,166,0,2.4,1,0,2,1
 79 | 59,1,1,140,221,0,1,164,1,0,2,0,2,1
 80 | 52,1,1,128,205,1,1,184,0,0,2,0,2,1
 81 | 58,1,2,105,240,0,0,154,1,0.6,1,0,3,1
 82 | 41,1,2,112,250,0,1,179,0,0,2,0,2,1
 83 | 45,1,1,128,308,0,0,170,0,0,2,0,2,1
 84 | 60,0,2,102,318,0,1,160,0,0,2,1,2,1
 85 | 52,1,3,152,298,1,1,178,0,1.2,1,0,3,1
 86 | 42,0,0,102,265,0,0,122,0,0.6,1,0,2,1
 87 | 67,0,2,115,564,0,0,160,0,1.6,1,0,3,1
 88 | 68,1,2,118,277,0,1,151,0,1,2,1,3,1
 89 | 46,1,1,101,197,1,1,156,0,0,2,0,3,1
 90 | 54,0,2,110,214,0,1,158,0,1.6,1,0,2,1
 91 | 58,0,0,100,248,0,0,122,0,1,1,0,2,1
 92 | 48,1,2,124,255,1,1,175,0,0,2,2,2,1
 93 | 57,1,0,132,207,0,1,168,1,0,2,0,3,1
 94 | 52,1,2,138,223,0,1,169,0,0,2,4,2,1
 95 | 54,0,1,132,288,1,0,159,1,0,2,1,2,1
 96 | 45,0,1,112,160,0,1,138,0,0,1,0,2,1
 97 | 53,1,0,142,226,0,0,111,1,0,2,0,3,1
 98 | 62,0,0,140,394,0,0,157,0,1.2,1,0,2,1
 99 | 52,1,0,108,233,1,1,147,0,0.1,2,3,3,1
100 | 43,1,2,130,315,0,1,162,0,1.9,2,1,2,1
101 | 53,1,2,130,246,1,0,173,0,0,2,3,2,1
102 | 42,1,3,148,244,0,0,178,0,0.8,2,2,2,1
103 | 59,1,3,178,270,0,0,145,0,4.2,0,0,3,1
104 | 63,0,1,140,195,0,1,179,0,0,2,2,2,1
105 | 42,1,2,120,240,1,1,194,0,0.8,0,0,3,1
106 | 50,1,2,129,196,0,1,163,0,0,2,0,2,1
107 | 68,0,2,120,211,0,0,115,0,1.5,1,0,2,1
108 | 69,1,3,160,234,1,0,131,0,0.1,1,1,2,1
109 | 45,0,0,138,236,0,0,152,1,0.2,1,0,2,1
110 | 50,0,1,120,244,0,1,162,0,1.1,2,0,2,1
111 | 50,0,0,110,254,0,0,159,0,0,2,0,2,1
112 | 64,0,0,180,325,0,1,154,1,0,2,0,2,1
113 | 57,1,2,150,126,1,1,173,0,0.2,2,1,3,1
114 | 64,0,2,140,313,0,1,133,0,0.2,2,0,3,1
115 | 43,1,0,110,211,0,1,161,0,0,2,0,3,1
116 | 55,1,1,130,262,0,1,155,0,0,2,0,2,1
117 | 37,0,2,120,215,0,1,170,0,0,2,0,2,1
118 | 41,1,2,130,214,0,0,168,0,2,1,0,2,1
119 | 56,1,3,120,193,0,0,162,0,1.9,1,0,3,1
120 | 46,0,1,105,204,0,1,172,0,0,2,0,2,1
121 | 46,0,0,138,243,0,0,152,1,0,1,0,2,1
122 | 64,0,0,130,303,0,1,122,0,2,1,2,2,1
123 | 59,1,0,138,271,0,0,182,0,0,2,0,2,1
124 | 41,0,2,112,268,0,0,172,1,0,2,0,2,1
125 | 54,0,2,108,267,0,0,167,0,0,2,0,2,1
126 | 39,0,2,94,199,0,1,179,0,0,2,0,2,1
127 | 34,0,1,118,210,0,1,192,0,0.7,2,0,2,1
128 | 47,1,0,112,204,0,1,143,0,0.1,2,0,2,1
129 | 67,0,2,152,277,0,1,172,0,0,2,1,2,1
130 | 52,0,2,136,196,0,0,169,0,0.1,1,0,2,1
131 | 74,0,1,120,269,0,0,121,1,0.2,2,1,2,1
132 | 54,0,2,160,201,0,1,163,0,0,2,1,2,1
133 | 49,0,1,134,271,0,1,162,0,0,1,0,2,1
134 | 42,1,1,120,295,0,1,162,0,0,2,0,2,1
135 | 41,1,1,110,235,0,1,153,0,0,2,0,2,1
136 | 41,0,1,126,306,0,1,163,0,0,2,0,2,1
137 | 49,0,0,130,269,0,1,163,0,0,2,0,2,1
138 | 60,0,2,120,178,1,1,96,0,0,2,0,2,1
139 | 62,1,1,128,208,1,0,140,0,0,2,0,2,1
140 | 57,1,0,110,201,0,1,126,1,1.5,1,0,1,1
141 | 64,1,0,128,263,0,1,105,1,0.2,1,1,3,1
142 | 51,0,2,120,295,0,0,157,0,0.6,2,0,2,1
143 | 43,1,0,115,303,0,1,181,0,1.2,1,0,2,1
144 | 42,0,2,120,209,0,1,173,0,0,1,0,2,1
145 | 67,0,0,106,223,0,1,142,0,0.3,2,2,2,1
146 | 76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
147 | 70,1,1,156,245,0,0,143,0,0,2,0,2,1
148 | 44,0,2,118,242,0,1,149,0,0.3,1,1,2,1
149 | 60,0,3,150,240,0,1,171,0,0.9,2,0,2,1
150 | 44,1,2,120,226,0,1,169,0,0,2,0,2,1
151 | 42,1,2,130,180,0,1,150,0,0,2,0,2,1
152 | 66,1,0,160,228,0,0,138,0,2.3,2,0,1,1
153 | 71,0,0,112,149,0,1,125,0,1.6,1,0,2,1
154 | 64,1,3,170,227,0,0,155,0,0.6,1,0,3,1
155 | 66,0,2,146,278,0,0,152,0,0,1,1,2,1
156 | 39,0,2,138,220,0,1,152,0,0,1,0,2,1
157 | 58,0,0,130,197,0,1,131,0,0.6,1,0,2,1
158 | 47,1,2,130,253,0,1,179,0,0,2,0,2,1
159 | 35,1,1,122,192,0,1,174,0,0,2,0,2,1
160 | 58,1,1,125,220,0,1,144,0,0.4,1,4,3,1
161 | 56,1,1,130,221,0,0,163,0,0,2,0,3,1
162 | 56,1,1,120,240,0,1,169,0,0,0,0,2,1
163 | 55,0,1,132,342,0,1,166,0,1.2,2,0,2,1
164 | 41,1,1,120,157,0,1,182,0,0,2,0,2,1
165 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1
166 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1
167 | 67,1,0,160,286,0,0,108,1,1.5,1,3,2,0
168 | 67,1,0,120,229,0,0,129,1,2.6,1,2,3,0
169 | 62,0,0,140,268,0,0,160,0,3.6,0,2,2,0
170 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0
171 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
172 | 56,1,2,130,256,1,0,142,1,0.6,1,1,1,0
173 | 48,1,1,110,229,0,1,168,0,1,0,0,3,0
174 | 58,1,1,120,284,0,0,160,0,1.8,1,0,2,0
175 | 58,1,2,132,224,0,0,173,0,3.2,2,2,3,0
176 | 60,1,0,130,206,0,0,132,1,2.4,1,2,3,0
177 | 40,1,0,110,167,0,0,114,1,2,1,0,3,0
178 | 60,1,0,117,230,1,1,160,1,1.4,2,2,3,0
179 | 64,1,2,140,335,0,1,158,0,0,2,0,2,0
180 | 43,1,0,120,177,0,0,120,1,2.5,1,0,3,0
181 | 57,1,0,150,276,0,0,112,1,0.6,1,1,1,0
182 | 55,1,0,132,353,0,1,132,1,1.2,1,1,3,0
183 | 65,0,0,150,225,0,0,114,0,1,1,3,3,0
184 | 61,0,0,130,330,0,0,169,0,0,2,0,2,0
185 | 58,1,2,112,230,0,0,165,0,2.5,1,1,3,0
186 | 50,1,0,150,243,0,0,128,0,2.6,1,0,3,0
187 | 44,1,0,112,290,0,0,153,0,0,2,1,2,0
188 | 60,1,0,130,253,0,1,144,1,1.4,2,1,3,0
189 | 54,1,0,124,266,0,0,109,1,2.2,1,1,3,0
190 | 50,1,2,140,233,0,1,163,0,0.6,1,1,3,0
191 | 41,1,0,110,172,0,0,158,0,0,2,0,3,0
192 | 51,0,0,130,305,0,1,142,1,1.2,1,0,3,0
193 | 58,1,0,128,216,0,0,131,1,2.2,1,3,3,0
194 | 54,1,0,120,188,0,1,113,0,1.4,1,1,3,0
195 | 60,1,0,145,282,0,0,142,1,2.8,1,2,3,0
196 | 60,1,2,140,185,0,0,155,0,3,1,0,2,0
197 | 59,1,0,170,326,0,0,140,1,3.4,0,0,3,0
198 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0
199 | 67,1,0,125,254,1,1,163,0,0.2,1,2,3,0
200 | 62,1,0,120,267,0,1,99,1,1.8,1,2,3,0
201 | 65,1,0,110,248,0,0,158,0,0.6,2,2,1,0
202 | 44,1,0,110,197,0,0,177,0,0,2,1,2,0
203 | 60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
204 | 58,1,0,150,270,0,0,111,1,0.8,2,0,3,0
205 | 68,1,2,180,274,1,0,150,1,1.6,1,0,3,0
206 | 62,0,0,160,164,0,0,145,0,6.2,0,3,3,0
207 | 52,1,0,128,255,0,1,161,1,0,2,1,3,0
208 | 59,1,0,110,239,0,0,142,1,1.2,1,1,3,0
209 | 60,0,0,150,258,0,0,157,0,2.6,1,2,3,0
210 | 49,1,2,120,188,0,1,139,0,2,1,3,3,0
211 | 59,1,0,140,177,0,1,162,1,0,2,1,3,0
212 | 57,1,2,128,229,0,0,150,0,0.4,1,1,3,0
213 | 61,1,0,120,260,0,1,140,1,3.6,1,1,3,0
214 | 39,1,0,118,219,0,1,140,0,1.2,1,0,3,0
215 | 61,0,0,145,307,0,0,146,1,1,1,0,3,0
216 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0
217 | 43,0,0,132,341,1,0,136,1,3,1,0,3,0
218 | 62,0,2,130,263,0,1,97,0,1.2,1,1,3,0
219 | 63,1,0,130,330,1,0,132,1,1.8,2,3,3,0
220 | 65,1,0,135,254,0,0,127,0,2.8,1,1,3,0
221 | 48,1,0,130,256,1,0,150,1,0,2,2,3,0
222 | 63,0,0,150,407,0,0,154,0,4,1,3,3,0
223 | 55,1,0,140,217,0,1,111,1,5.6,0,0,3,0
224 | 65,1,3,138,282,1,0,174,0,1.4,1,1,2,0
225 | 56,0,0,200,288,1,0,133,1,4,0,2,3,0
226 | 54,1,0,110,239,0,1,126,1,2.8,1,1,3,0
227 | 70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
228 | 62,1,1,120,281,0,0,103,0,1.4,1,1,3,0
229 | 35,1,0,120,198,0,1,130,1,1.6,1,0,3,0
230 | 59,1,3,170,288,0,0,159,0,0.2,1,0,3,0
231 | 64,1,2,125,309,0,1,131,1,1.8,1,0,3,0
232 | 47,1,2,108,243,0,1,152,0,0,2,0,2,0
233 | 57,1,0,165,289,1,0,124,0,1,1,3,3,0
234 | 55,1,0,160,289,0,0,145,1,0.8,1,1,3,0
235 | 64,1,0,120,246,0,0,96,1,2.2,0,1,2,0
236 | 70,1,0,130,322,0,0,109,0,2.4,1,3,2,0
237 | 51,1,0,140,299,0,1,173,1,1.6,2,0,3,0
238 | 58,1,0,125,300,0,0,171,0,0,2,2,3,0
239 | 60,1,0,140,293,0,0,170,0,1.2,1,2,3,0
240 | 77,1,0,125,304,0,0,162,1,0,2,3,2,0
241 | 35,1,0,126,282,0,0,156,1,0,2,0,3,0
242 | 70,1,2,160,269,0,1,112,1,2.9,1,1,3,0
243 | 59,0,0,174,249,0,1,143,1,0,1,0,2,0
244 | 64,1,0,145,212,0,0,132,0,2,1,2,1,0
245 | 57,1,0,152,274,0,1,88,1,1.2,1,1,3,0
246 | 56,1,0,132,184,0,0,105,1,2.1,1,1,1,0
247 | 48,1,0,124,274,0,0,166,0,0.5,1,0,3,0
248 | 56,0,0,134,409,0,0,150,1,1.9,1,2,3,0
249 | 66,1,1,160,246,0,1,120,1,0,1,3,1,0
250 | 54,1,1,192,283,0,0,195,0,0,2,1,3,0
251 | 69,1,2,140,254,0,0,146,0,2,1,3,3,0
252 | 51,1,0,140,298,0,1,122,1,4.2,1,3,3,0
253 | 43,1,0,132,247,1,0,143,1,0.1,1,4,3,0
254 | 62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
255 | 67,1,0,100,299,0,0,125,1,0.9,1,2,2,0
256 | 59,1,3,160,273,0,0,125,0,0,2,0,2,0
257 | 45,1,0,142,309,0,0,147,1,0,1,3,3,0
258 | 58,1,0,128,259,0,0,130,1,3,1,2,3,0
259 | 50,1,0,144,200,0,0,126,1,0.9,1,0,3,0
260 | 62,0,0,150,244,0,1,154,1,1.4,1,0,2,0
261 | 38,1,3,120,231,0,1,182,1,3.8,1,0,3,0
262 | 66,0,0,178,228,1,1,165,1,1,1,2,3,0
263 | 52,1,0,112,230,0,1,160,0,0,2,1,2,0
264 | 53,1,0,123,282,0,1,95,1,2,1,2,3,0
265 | 63,0,0,108,269,0,1,169,1,1.8,1,2,2,0
266 | 54,1,0,110,206,0,0,108,1,0,1,1,2,0
267 | 66,1,0,112,212,0,0,132,1,0.1,2,1,2,0
268 | 55,0,0,180,327,0,2,117,1,3.4,1,0,2,0
269 | 49,1,2,118,149,0,0,126,0,0.8,2,3,2,0
270 | 54,1,0,122,286,0,0,116,1,3.2,1,2,2,0
271 | 56,1,0,130,283,1,0,103,1,1.6,0,0,3,0
272 | 46,1,0,120,249,0,0,144,0,0.8,2,0,3,0
273 | 61,1,3,134,234,0,1,145,0,2.6,1,2,2,0
274 | 67,1,0,120,237,0,1,71,0,1,1,0,2,0
275 | 58,1,0,100,234,0,1,156,0,0.1,2,1,3,0
276 | 47,1,0,110,275,0,0,118,1,1,1,1,2,0
277 | 52,1,0,125,212,0,1,168,0,1,2,2,3,0
278 | 58,1,0,146,218,0,1,105,0,2,1,1,3,0
279 | 57,1,1,124,261,0,1,141,0,0.3,2,0,3,0
280 | 58,0,1,136,319,1,0,152,0,0,2,2,2,0
281 | 61,1,0,138,166,0,0,125,1,3.6,1,1,2,0
282 | 42,1,0,136,315,0,1,125,1,1.8,1,0,1,0
283 | 52,1,0,128,204,1,1,156,1,1,1,0,0,0
284 | 59,1,2,126,218,1,1,134,0,2.2,1,1,1,0
285 | 40,1,0,152,223,0,1,181,0,0,2,0,3,0
286 | 61,1,0,140,207,0,0,138,1,1.9,2,1,3,0
287 | 46,1,0,140,311,0,1,120,1,1.8,1,2,3,0
288 | 59,1,3,134,204,0,1,162,0,0.8,2,2,2,0
289 | 57,1,1,154,232,0,0,164,0,0,2,1,2,0
290 | 57,1,0,110,335,0,1,143,1,3,1,1,3,0
291 | 55,0,0,128,205,0,2,130,1,2,1,1,3,0
292 | 61,1,0,148,203,0,1,161,0,0,2,1,3,0
293 | 58,1,0,114,318,0,2,140,0,4.4,0,3,1,0
294 | 58,0,0,170,225,1,0,146,1,2.8,1,2,1,0
295 | 67,1,2,152,212,0,0,150,0,0.8,1,0,3,0
296 | 44,1,0,120,169,0,1,144,1,2.8,0,0,1,0
297 | 63,1,0,140,187,0,0,144,1,4,2,2,3,0
298 | 63,0,0,124,197,0,1,136,1,0,1,0,2,0
299 | 59,1,0,164,176,1,0,90,0,1,1,2,1,0
300 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
301 | 45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
302 | 68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
303 | 57,1,0,130,131,0,1,115,1,1.2,1,1,3,0
304 | 57,0,1,130,236,0,0,174,0,0,1,1,2,0
305 | 


--------------------------------------------------------------------------------
/uci_heart_disease/Data/heart.csv:
--------------------------------------------------------------------------------
  1 | age,sex,chest_pain_type,resting_bp,cholestoral,fasting_blood_sugar,restecg,max_hr,exang,oldpeak,slope,num_major_vessels,thal,target
  2 | 63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
  3 | 37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
  4 | 41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
  5 | 56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
  6 | 57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
  7 | 57,1,0,140,192,0,1,148,0,0.4,1,0,1,1
  8 | 56,0,1,140,294,0,0,153,0,1.3,1,0,2,1
  9 | 44,1,1,120,263,0,1,173,0,0,2,0,3,1
 10 | 52,1,2,172,199,1,1,162,0,0.5,2,0,3,1
 11 | 57,1,2,150,168,0,1,174,0,1.6,2,0,2,1
 12 | 54,1,0,140,239,0,1,160,0,1.2,2,0,2,1
 13 | 48,0,2,130,275,0,1,139,0,0.2,2,0,2,1
 14 | 49,1,1,130,266,0,1,171,0,0.6,2,0,2,1
 15 | 64,1,3,110,211,0,0,144,1,1.8,1,0,2,1
 16 | 58,0,3,150,283,1,0,162,0,1,2,0,2,1
 17 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1
 18 | 58,0,2,120,340,0,1,172,0,0,2,0,2,1
 19 | 66,0,3,150,226,0,1,114,0,2.6,0,0,2,1
 20 | 43,1,0,150,247,0,1,171,0,1.5,2,0,2,1
 21 | 69,0,3,140,239,0,1,151,0,1.8,2,2,2,1
 22 | 59,1,0,135,234,0,1,161,0,0.5,1,0,3,1
 23 | 44,1,2,130,233,0,1,179,1,0.4,2,0,2,1
 24 | 42,1,0,140,226,0,1,178,0,0,2,0,2,1
 25 | 61,1,2,150,243,1,1,137,1,1,1,0,2,1
 26 | 40,1,3,140,199,0,1,178,1,1.4,2,0,3,1
 27 | 71,0,1,160,302,0,1,162,0,0.4,2,2,2,1
 28 | 59,1,2,150,212,1,1,157,0,1.6,2,0,2,1
 29 | 51,1,2,110,175,0,1,123,0,0.6,2,0,2,1
 30 | 65,0,2,140,417,1,0,157,0,0.8,2,1,2,1
 31 | 53,1,2,130,197,1,0,152,0,1.2,0,0,2,1
 32 | 41,0,1,105,198,0,1,168,0,0,2,1,2,1
 33 | 65,1,0,120,177,0,1,140,0,0.4,2,0,3,1
 34 | 44,1,1,130,219,0,0,188,0,0,2,0,2,1
 35 | 54,1,2,125,273,0,0,152,0,0.5,0,1,2,1
 36 | 51,1,3,125,213,0,0,125,1,1.4,2,1,2,1
 37 | 46,0,2,142,177,0,0,160,1,1.4,0,0,2,1
 38 | 54,0,2,135,304,1,1,170,0,0,2,0,2,1
 39 | 54,1,2,150,232,0,0,165,0,1.6,2,0,3,1
 40 | 65,0,2,155,269,0,1,148,0,0.8,2,0,2,1
 41 | 65,0,2,160,360,0,0,151,0,0.8,2,0,2,1
 42 | 51,0,2,140,308,0,0,142,0,1.5,2,1,2,1
 43 | 48,1,1,130,245,0,0,180,0,0.2,1,0,2,1
 44 | 45,1,0,104,208,0,0,148,1,3,1,0,2,1
 45 | 53,0,0,130,264,0,0,143,0,0.4,1,0,2,1
 46 | 39,1,2,140,321,0,0,182,0,0,2,0,2,1
 47 | 52,1,1,120,325,0,1,172,0,0.2,2,0,2,1
 48 | 44,1,2,140,235,0,0,180,0,0,2,0,2,1
 49 | 47,1,2,138,257,0,0,156,0,0,2,0,2,1
 50 | 53,0,2,128,216,0,0,115,0,0,2,0,0,1
 51 | 53,0,0,138,234,0,0,160,0,0,2,0,2,1
 52 | 51,0,2,130,256,0,0,149,0,0.5,2,0,2,1
 53 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1
 54 | 62,1,2,130,231,0,1,146,0,1.8,1,3,3,1
 55 | 44,0,2,108,141,0,1,175,0,0.6,1,0,2,1
 56 | 63,0,2,135,252,0,0,172,0,0,2,0,2,1
 57 | 52,1,1,134,201,0,1,158,0,0.8,2,1,2,1
 58 | 48,1,0,122,222,0,0,186,0,0,2,0,2,1
 59 | 45,1,0,115,260,0,0,185,0,0,2,0,2,1
 60 | 34,1,3,118,182,0,0,174,0,0,2,0,2,1
 61 | 57,0,0,128,303,0,0,159,0,0,2,1,2,1
 62 | 71,0,2,110,265,1,0,130,0,0,2,1,2,1
 63 | 54,1,1,108,309,0,1,156,0,0,2,0,3,1
 64 | 52,1,3,118,186,0,0,190,0,0,1,0,1,1
 65 | 41,1,1,135,203,0,1,132,0,0,1,0,1,1
 66 | 58,1,2,140,211,1,0,165,0,0,2,0,2,1
 67 | 35,0,0,138,183,0,1,182,0,1.4,2,0,2,1
 68 | 51,1,2,100,222,0,1,143,1,1.2,1,0,2,1
 69 | 45,0,1,130,234,0,0,175,0,0.6,1,0,2,1
 70 | 44,1,1,120,220,0,1,170,0,0,2,0,2,1
 71 | 62,0,0,124,209,0,1,163,0,0,2,0,2,1
 72 | 54,1,2,120,258,0,0,147,0,0.4,1,0,3,1
 73 | 51,1,2,94,227,0,1,154,1,0,2,1,3,1
 74 | 29,1,1,130,204,0,0,202,0,0,2,0,2,1
 75 | 51,1,0,140,261,0,0,186,1,0,2,0,2,1
 76 | 43,0,2,122,213,0,1,165,0,0.2,1,0,2,1
 77 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1
 78 | 51,1,2,125,245,1,0,166,0,2.4,1,0,2,1
 79 | 59,1,1,140,221,0,1,164,1,0,2,0,2,1
 80 | 52,1,1,128,205,1,1,184,0,0,2,0,2,1
 81 | 58,1,2,105,240,0,0,154,1,0.6,1,0,3,1
 82 | 41,1,2,112,250,0,1,179,0,0,2,0,2,1
 83 | 45,1,1,128,308,0,0,170,0,0,2,0,2,1
 84 | 60,0,2,102,318,0,1,160,0,0,2,1,2,1
 85 | 52,1,3,152,298,1,1,178,0,1.2,1,0,3,1
 86 | 42,0,0,102,265,0,0,122,0,0.6,1,0,2,1
 87 | 67,0,2,115,564,0,0,160,0,1.6,1,0,3,1
 88 | 68,1,2,118,277,0,1,151,0,1,2,1,3,1
 89 | 46,1,1,101,197,1,1,156,0,0,2,0,3,1
 90 | 54,0,2,110,214,0,1,158,0,1.6,1,0,2,1
 91 | 58,0,0,100,248,0,0,122,0,1,1,0,2,1
 92 | 48,1,2,124,255,1,1,175,0,0,2,2,2,1
 93 | 57,1,0,132,207,0,1,168,1,0,2,0,3,1
 94 | 52,1,2,138,223,0,1,169,0,0,2,4,2,1
 95 | 54,0,1,132,288,1,0,159,1,0,2,1,2,1
 96 | 45,0,1,112,160,0,1,138,0,0,1,0,2,1
 97 | 53,1,0,142,226,0,0,111,1,0,2,0,3,1
 98 | 62,0,0,140,394,0,0,157,0,1.2,1,0,2,1
 99 | 52,1,0,108,233,1,1,147,0,0.1,2,3,3,1
100 | 43,1,2,130,315,0,1,162,0,1.9,2,1,2,1
101 | 53,1,2,130,246,1,0,173,0,0,2,3,2,1
102 | 42,1,3,148,244,0,0,178,0,0.8,2,2,2,1
103 | 59,1,3,178,270,0,0,145,0,4.2,0,0,3,1
104 | 63,0,1,140,195,0,1,179,0,0,2,2,2,1
105 | 42,1,2,120,240,1,1,194,0,0.8,0,0,3,1
106 | 50,1,2,129,196,0,1,163,0,0,2,0,2,1
107 | 68,0,2,120,211,0,0,115,0,1.5,1,0,2,1
108 | 69,1,3,160,234,1,0,131,0,0.1,1,1,2,1
109 | 45,0,0,138,236,0,0,152,1,0.2,1,0,2,1
110 | 50,0,1,120,244,0,1,162,0,1.1,2,0,2,1
111 | 50,0,0,110,254,0,0,159,0,0,2,0,2,1
112 | 64,0,0,180,325,0,1,154,1,0,2,0,2,1
113 | 57,1,2,150,126,1,1,173,0,0.2,2,1,3,1
114 | 64,0,2,140,313,0,1,133,0,0.2,2,0,3,1
115 | 43,1,0,110,211,0,1,161,0,0,2,0,3,1
116 | 55,1,1,130,262,0,1,155,0,0,2,0,2,1
117 | 37,0,2,120,215,0,1,170,0,0,2,0,2,1
118 | 41,1,2,130,214,0,0,168,0,2,1,0,2,1
119 | 56,1,3,120,193,0,0,162,0,1.9,1,0,3,1
120 | 46,0,1,105,204,0,1,172,0,0,2,0,2,1
121 | 46,0,0,138,243,0,0,152,1,0,1,0,2,1
122 | 64,0,0,130,303,0,1,122,0,2,1,2,2,1
123 | 59,1,0,138,271,0,0,182,0,0,2,0,2,1
124 | 41,0,2,112,268,0,0,172,1,0,2,0,2,1
125 | 54,0,2,108,267,0,0,167,0,0,2,0,2,1
126 | 39,0,2,94,199,0,1,179,0,0,2,0,2,1
127 | 34,0,1,118,210,0,1,192,0,0.7,2,0,2,1
128 | 47,1,0,112,204,0,1,143,0,0.1,2,0,2,1
129 | 67,0,2,152,277,0,1,172,0,0,2,1,2,1
130 | 52,0,2,136,196,0,0,169,0,0.1,1,0,2,1
131 | 74,0,1,120,269,0,0,121,1,0.2,2,1,2,1
132 | 54,0,2,160,201,0,1,163,0,0,2,1,2,1
133 | 49,0,1,134,271,0,1,162,0,0,1,0,2,1
134 | 42,1,1,120,295,0,1,162,0,0,2,0,2,1
135 | 41,1,1,110,235,0,1,153,0,0,2,0,2,1
136 | 41,0,1,126,306,0,1,163,0,0,2,0,2,1
137 | 49,0,0,130,269,0,1,163,0,0,2,0,2,1
138 | 60,0,2,120,178,1,1,96,0,0,2,0,2,1
139 | 62,1,1,128,208,1,0,140,0,0,2,0,2,1
140 | 57,1,0,110,201,0,1,126,1,1.5,1,0,1,1
141 | 64,1,0,128,263,0,1,105,1,0.2,1,1,3,1
142 | 51,0,2,120,295,0,0,157,0,0.6,2,0,2,1
143 | 43,1,0,115,303,0,1,181,0,1.2,1,0,2,1
144 | 42,0,2,120,209,0,1,173,0,0,1,0,2,1
145 | 67,0,0,106,223,0,1,142,0,0.3,2,2,2,1
146 | 76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
147 | 70,1,1,156,245,0,0,143,0,0,2,0,2,1
148 | 44,0,2,118,242,0,1,149,0,0.3,1,1,2,1
149 | 60,0,3,150,240,0,1,171,0,0.9,2,0,2,1
150 | 44,1,2,120,226,0,1,169,0,0,2,0,2,1
151 | 42,1,2,130,180,0,1,150,0,0,2,0,2,1
152 | 66,1,0,160,228,0,0,138,0,2.3,2,0,1,1
153 | 71,0,0,112,149,0,1,125,0,1.6,1,0,2,1
154 | 64,1,3,170,227,0,0,155,0,0.6,1,0,3,1
155 | 66,0,2,146,278,0,0,152,0,0,1,1,2,1
156 | 39,0,2,138,220,0,1,152,0,0,1,0,2,1
157 | 58,0,0,130,197,0,1,131,0,0.6,1,0,2,1
158 | 47,1,2,130,253,0,1,179,0,0,2,0,2,1
159 | 35,1,1,122,192,0,1,174,0,0,2,0,2,1
160 | 58,1,1,125,220,0,1,144,0,0.4,1,4,3,1
161 | 56,1,1,130,221,0,0,163,0,0,2,0,3,1
162 | 56,1,1,120,240,0,1,169,0,0,0,0,2,1
163 | 55,0,1,132,342,0,1,166,0,1.2,2,0,2,1
164 | 41,1,1,120,157,0,1,182,0,0,2,0,2,1
165 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1
166 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1
167 | 67,1,0,160,286,0,0,108,1,1.5,1,3,2,0
168 | 67,1,0,120,229,0,0,129,1,2.6,1,2,3,0
169 | 62,0,0,140,268,0,0,160,0,3.6,0,2,2,0
170 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0
171 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
172 | 56,1,2,130,256,1,0,142,1,0.6,1,1,1,0
173 | 48,1,1,110,229,0,1,168,0,1,0,0,3,0
174 | 58,1,1,120,284,0,0,160,0,1.8,1,0,2,0
175 | 58,1,2,132,224,0,0,173,0,3.2,2,2,3,0
176 | 60,1,0,130,206,0,0,132,1,2.4,1,2,3,0
177 | 40,1,0,110,167,0,0,114,1,2,1,0,3,0
178 | 60,1,0,117,230,1,1,160,1,1.4,2,2,3,0
179 | 64,1,2,140,335,0,1,158,0,0,2,0,2,0
180 | 43,1,0,120,177,0,0,120,1,2.5,1,0,3,0
181 | 57,1,0,150,276,0,0,112,1,0.6,1,1,1,0
182 | 55,1,0,132,353,0,1,132,1,1.2,1,1,3,0
183 | 65,0,0,150,225,0,0,114,0,1,1,3,3,0
184 | 61,0,0,130,330,0,0,169,0,0,2,0,2,0
185 | 58,1,2,112,230,0,0,165,0,2.5,1,1,3,0
186 | 50,1,0,150,243,0,0,128,0,2.6,1,0,3,0
187 | 44,1,0,112,290,0,0,153,0,0,2,1,2,0
188 | 60,1,0,130,253,0,1,144,1,1.4,2,1,3,0
189 | 54,1,0,124,266,0,0,109,1,2.2,1,1,3,0
190 | 50,1,2,140,233,0,1,163,0,0.6,1,1,3,0
191 | 41,1,0,110,172,0,0,158,0,0,2,0,3,0
192 | 51,0,0,130,305,0,1,142,1,1.2,1,0,3,0
193 | 58,1,0,128,216,0,0,131,1,2.2,1,3,3,0
194 | 54,1,0,120,188,0,1,113,0,1.4,1,1,3,0
195 | 60,1,0,145,282,0,0,142,1,2.8,1,2,3,0
196 | 60,1,2,140,185,0,0,155,0,3,1,0,2,0
197 | 59,1,0,170,326,0,0,140,1,3.4,0,0,3,0
198 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0
199 | 67,1,0,125,254,1,1,163,0,0.2,1,2,3,0
200 | 62,1,0,120,267,0,1,99,1,1.8,1,2,3,0
201 | 65,1,0,110,248,0,0,158,0,0.6,2,2,1,0
202 | 44,1,0,110,197,0,0,177,0,0,2,1,2,0
203 | 60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
204 | 58,1,0,150,270,0,0,111,1,0.8,2,0,3,0
205 | 68,1,2,180,274,1,0,150,1,1.6,1,0,3,0
206 | 62,0,0,160,164,0,0,145,0,6.2,0,3,3,0
207 | 52,1,0,128,255,0,1,161,1,0,2,1,3,0
208 | 59,1,0,110,239,0,0,142,1,1.2,1,1,3,0
209 | 60,0,0,150,258,0,0,157,0,2.6,1,2,3,0
210 | 49,1,2,120,188,0,1,139,0,2,1,3,3,0
211 | 59,1,0,140,177,0,1,162,1,0,2,1,3,0
212 | 57,1,2,128,229,0,0,150,0,0.4,1,1,3,0
213 | 61,1,0,120,260,0,1,140,1,3.6,1,1,3,0
214 | 39,1,0,118,219,0,1,140,0,1.2,1,0,3,0
215 | 61,0,0,145,307,0,0,146,1,1,1,0,3,0
216 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0
217 | 43,0,0,132,341,1,0,136,1,3,1,0,3,0
218 | 62,0,2,130,263,0,1,97,0,1.2,1,1,3,0
219 | 63,1,0,130,330,1,0,132,1,1.8,2,3,3,0
220 | 65,1,0,135,254,0,0,127,0,2.8,1,1,3,0
221 | 48,1,0,130,256,1,0,150,1,0,2,2,3,0
222 | 63,0,0,150,407,0,0,154,0,4,1,3,3,0
223 | 55,1,0,140,217,0,1,111,1,5.6,0,0,3,0
224 | 65,1,3,138,282,1,0,174,0,1.4,1,1,2,0
225 | 56,0,0,200,288,1,0,133,1,4,0,2,3,0
226 | 54,1,0,110,239,0,1,126,1,2.8,1,1,3,0
227 | 70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
228 | 62,1,1,120,281,0,0,103,0,1.4,1,1,3,0
229 | 35,1,0,120,198,0,1,130,1,1.6,1,0,3,0
230 | 59,1,3,170,288,0,0,159,0,0.2,1,0,3,0
231 | 64,1,2,125,309,0,1,131,1,1.8,1,0,3,0
232 | 47,1,2,108,243,0,1,152,0,0,2,0,2,0
233 | 57,1,0,165,289,1,0,124,0,1,1,3,3,0
234 | 55,1,0,160,289,0,0,145,1,0.8,1,1,3,0
235 | 64,1,0,120,246,0,0,96,1,2.2,0,1,2,0
236 | 70,1,0,130,322,0,0,109,0,2.4,1,3,2,0
237 | 51,1,0,140,299,0,1,173,1,1.6,2,0,3,0
238 | 58,1,0,125,300,0,0,171,0,0,2,2,3,0
239 | 60,1,0,140,293,0,0,170,0,1.2,1,2,3,0
240 | 77,1,0,125,304,0,0,162,1,0,2,3,2,0
241 | 35,1,0,126,282,0,0,156,1,0,2,0,3,0
242 | 70,1,2,160,269,0,1,112,1,2.9,1,1,3,0
243 | 59,0,0,174,249,0,1,143,1,0,1,0,2,0
244 | 64,1,0,145,212,0,0,132,0,2,1,2,1,0
245 | 57,1,0,152,274,0,1,88,1,1.2,1,1,3,0
246 | 56,1,0,132,184,0,0,105,1,2.1,1,1,1,0
247 | 48,1,0,124,274,0,0,166,0,0.5,1,0,3,0
248 | 56,0,0,134,409,0,0,150,1,1.9,1,2,3,0
249 | 66,1,1,160,246,0,1,120,1,0,1,3,1,0
250 | 54,1,1,192,283,0,0,195,0,0,2,1,3,0
251 | 69,1,2,140,254,0,0,146,0,2,1,3,3,0
252 | 51,1,0,140,298,0,1,122,1,4.2,1,3,3,0
253 | 43,1,0,132,247,1,0,143,1,0.1,1,4,3,0
254 | 62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
255 | 67,1,0,100,299,0,0,125,1,0.9,1,2,2,0
256 | 59,1,3,160,273,0,0,125,0,0,2,0,2,0
257 | 45,1,0,142,309,0,0,147,1,0,1,3,3,0
258 | 58,1,0,128,259,0,0,130,1,3,1,2,3,0
259 | 50,1,0,144,200,0,0,126,1,0.9,1,0,3,0
260 | 62,0,0,150,244,0,1,154,1,1.4,1,0,2,0
261 | 38,1,3,120,231,0,1,182,1,3.8,1,0,3,0
262 | 66,0,0,178,228,1,1,165,1,1,1,2,3,0
263 | 52,1,0,112,230,0,1,160,0,0,2,1,2,0
264 | 53,1,0,123,282,0,1,95,1,2,1,2,3,0
265 | 63,0,0,108,269,0,1,169,1,1.8,1,2,2,0
266 | 54,1,0,110,206,0,0,108,1,0,1,1,2,0
267 | 66,1,0,112,212,0,0,132,1,0.1,2,1,2,0
268 | 55,0,0,180,327,0,2,117,1,3.4,1,0,2,0
269 | 49,1,2,118,149,0,0,126,0,0.8,2,3,2,0
270 | 54,1,0,122,286,0,0,116,1,3.2,1,2,2,0
271 | 56,1,0,130,283,1,0,103,1,1.6,0,0,3,0
272 | 46,1,0,120,249,0,0,144,0,0.8,2,0,3,0
273 | 61,1,3,134,234,0,1,145,0,2.6,1,2,2,0
274 | 67,1,0,120,237,0,1,71,0,1,1,0,2,0
275 | 58,1,0,100,234,0,1,156,0,0.1,2,1,3,0
276 | 47,1,0,110,275,0,0,118,1,1,1,1,2,0
277 | 52,1,0,125,212,0,1,168,0,1,2,2,3,0
278 | 58,1,0,146,218,0,1,105,0,2,1,1,3,0
279 | 57,1,1,124,261,0,1,141,0,0.3,2,0,3,0
280 | 58,0,1,136,319,1,0,152,0,0,2,2,2,0
281 | 61,1,0,138,166,0,0,125,1,3.6,1,1,2,0
282 | 42,1,0,136,315,0,1,125,1,1.8,1,0,1,0
283 | 52,1,0,128,204,1,1,156,1,1,1,0,0,0
284 | 59,1,2,126,218,1,1,134,0,2.2,1,1,1,0
285 | 40,1,0,152,223,0,1,181,0,0,2,0,3,0
286 | 61,1,0,140,207,0,0,138,1,1.9,2,1,3,0
287 | 46,1,0,140,311,0,1,120,1,1.8,1,2,3,0
288 | 59,1,3,134,204,0,1,162,0,0.8,2,2,2,0
289 | 57,1,1,154,232,0,0,164,0,0,2,1,2,0
290 | 57,1,0,110,335,0,1,143,1,3,1,1,3,0
291 | 55,0,0,128,205,0,2,130,1,2,1,1,3,0
292 | 61,1,0,148,203,0,1,161,0,0,2,1,3,0
293 | 58,1,0,114,318,0,2,140,0,4.4,0,3,1,0
294 | 58,0,0,170,225,1,0,146,1,2.8,1,2,1,0
295 | 67,1,2,152,212,0,0,150,0,0.8,1,0,3,0
296 | 44,1,0,120,169,0,1,144,1,2.8,0,0,1,0
297 | 63,1,0,140,187,0,0,144,1,4,2,2,3,0
298 | 63,0,0,124,197,0,1,136,1,0,1,0,2,0
299 | 59,1,0,164,176,1,0,90,0,1,1,2,1,0
300 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
301 | 45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
302 | 68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
303 | 57,1,0,130,131,0,1,115,1,1.2,1,1,3,0
304 | 57,0,1,130,236,0,0,174,0,0,1,1,2,0
305 | 


--------------------------------------------------------------------------------
/exception handling and logging/Data/heart.csv:
--------------------------------------------------------------------------------
  1 | age,sex,chest_pain_type,resting_bp,cholestoral,fasting_blood_sugar,restecg,max_hr,exang,oldpeak,slope,num_major_vessels,thal,target
  2 | 63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
  3 | 37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
  4 | 41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
  5 | 56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
  6 | 57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
  7 | 57,1,0,140,192,0,1,148,0,0.4,1,0,1,1
  8 | 56,0,1,140,294,0,0,153,0,1.3,1,0,2,1
  9 | 44,1,1,120,263,0,1,173,0,0,2,0,3,1
 10 | 52,1,2,172,199,1,1,162,0,0.5,2,0,3,1
 11 | 57,1,2,150,168,0,1,174,0,1.6,2,0,2,1
 12 | 54,1,0,140,239,0,1,160,0,1.2,2,0,2,1
 13 | 48,0,2,130,275,0,1,139,0,0.2,2,0,2,1
 14 | 49,1,1,130,266,0,1,171,0,0.6,2,0,2,1
 15 | 64,1,3,110,211,0,0,144,1,1.8,1,0,2,1
 16 | 58,0,3,150,283,1,0,162,0,1,2,0,2,1
 17 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1
 18 | 58,0,2,120,340,0,1,172,0,0,2,0,2,1
 19 | 66,0,3,150,226,0,1,114,0,2.6,0,0,2,1
 20 | 43,1,0,150,247,0,1,171,0,1.5,2,0,2,1
 21 | 69,0,3,140,239,0,1,151,0,1.8,2,2,2,1
 22 | 59,1,0,135,234,0,1,161,0,0.5,1,0,3,1
 23 | 44,1,2,130,233,0,1,179,1,0.4,2,0,2,1
 24 | 42,1,0,140,226,0,1,178,0,0,2,0,2,1
 25 | 61,1,2,150,243,1,1,137,1,1,1,0,2,1
 26 | 40,1,3,140,199,0,1,178,1,1.4,2,0,3,1
 27 | 71,0,1,160,302,0,1,162,0,0.4,2,2,2,1
 28 | 59,1,2,150,212,1,1,157,0,1.6,2,0,2,1
 29 | 51,1,2,110,175,0,1,123,0,0.6,2,0,2,1
 30 | 65,0,2,140,417,1,0,157,0,0.8,2,1,2,1
 31 | 53,1,2,130,197,1,0,152,0,1.2,0,0,2,1
 32 | 41,0,1,105,198,0,1,168,0,0,2,1,2,1
 33 | 65,1,0,120,177,0,1,140,0,0.4,2,0,3,1
 34 | 44,1,1,130,219,0,0,188,0,0,2,0,2,1
 35 | 54,1,2,125,273,0,0,152,0,0.5,0,1,2,1
 36 | 51,1,3,125,213,0,0,125,1,1.4,2,1,2,1
 37 | 46,0,2,142,177,0,0,160,1,1.4,0,0,2,1
 38 | 54,0,2,135,304,1,1,170,0,0,2,0,2,1
 39 | 54,1,2,150,232,0,0,165,0,1.6,2,0,3,1
 40 | 65,0,2,155,269,0,1,148,0,0.8,2,0,2,1
 41 | 65,0,2,160,360,0,0,151,0,0.8,2,0,2,1
 42 | 51,0,2,140,308,0,0,142,0,1.5,2,1,2,1
 43 | 48,1,1,130,245,0,0,180,0,0.2,1,0,2,1
 44 | 45,1,0,104,208,0,0,148,1,3,1,0,2,1
 45 | 53,0,0,130,264,0,0,143,0,0.4,1,0,2,1
 46 | 39,1,2,140,321,0,0,182,0,0,2,0,2,1
 47 | 52,1,1,120,325,0,1,172,0,0.2,2,0,2,1
 48 | 44,1,2,140,235,0,0,180,0,0,2,0,2,1
 49 | 47,1,2,138,257,0,0,156,0,0,2,0,2,1
 50 | 53,0,2,128,216,0,0,115,0,0,2,0,0,1
 51 | 53,0,0,138,234,0,0,160,0,0,2,0,2,1
 52 | 51,0,2,130,256,0,0,149,0,0.5,2,0,2,1
 53 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1
 54 | 62,1,2,130,231,0,1,146,0,1.8,1,3,3,1
 55 | 44,0,2,108,141,0,1,175,0,0.6,1,0,2,1
 56 | 63,0,2,135,252,0,0,172,0,0,2,0,2,1
 57 | 52,1,1,134,201,0,1,158,0,0.8,2,1,2,1
 58 | 48,1,0,122,222,0,0,186,0,0,2,0,2,1
 59 | 45,1,0,115,260,0,0,185,0,0,2,0,2,1
 60 | 34,1,3,118,182,0,0,174,0,0,2,0,2,1
 61 | 57,0,0,128,303,0,0,159,0,0,2,1,2,1
 62 | 71,0,2,110,265,1,0,130,0,0,2,1,2,1
 63 | 54,1,1,108,309,0,1,156,0,0,2,0,3,1
 64 | 52,1,3,118,186,0,0,190,0,0,1,0,1,1
 65 | 41,1,1,135,203,0,1,132,0,0,1,0,1,1
 66 | 58,1,2,140,211,1,0,165,0,0,2,0,2,1
 67 | 35,0,0,138,183,0,1,182,0,1.4,2,0,2,1
 68 | 51,1,2,100,222,0,1,143,1,1.2,1,0,2,1
 69 | 45,0,1,130,234,0,0,175,0,0.6,1,0,2,1
 70 | 44,1,1,120,220,0,1,170,0,0,2,0,2,1
 71 | 62,0,0,124,209,0,1,163,0,0,2,0,2,1
 72 | 54,1,2,120,258,0,0,147,0,0.4,1,0,3,1
 73 | 51,1,2,94,227,0,1,154,1,0,2,1,3,1
 74 | 29,1,1,130,204,0,0,202,0,0,2,0,2,1
 75 | 51,1,0,140,261,0,0,186,1,0,2,0,2,1
 76 | 43,0,2,122,213,0,1,165,0,0.2,1,0,2,1
 77 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1
 78 | 51,1,2,125,245,1,0,166,0,2.4,1,0,2,1
 79 | 59,1,1,140,221,0,1,164,1,0,2,0,2,1
 80 | 52,1,1,128,205,1,1,184,0,0,2,0,2,1
 81 | 58,1,2,105,240,0,0,154,1,0.6,1,0,3,1
 82 | 41,1,2,112,250,0,1,179,0,0,2,0,2,1
 83 | 45,1,1,128,308,0,0,170,0,0,2,0,2,1
 84 | 60,0,2,102,318,0,1,160,0,0,2,1,2,1
 85 | 52,1,3,152,298,1,1,178,0,1.2,1,0,3,1
 86 | 42,0,0,102,265,0,0,122,0,0.6,1,0,2,1
 87 | 67,0,2,115,564,0,0,160,0,1.6,1,0,3,1
 88 | 68,1,2,118,277,0,1,151,0,1,2,1,3,1
 89 | 46,1,1,101,197,1,1,156,0,0,2,0,3,1
 90 | 54,0,2,110,214,0,1,158,0,1.6,1,0,2,1
 91 | 58,0,0,100,248,0,0,122,0,1,1,0,2,1
 92 | 48,1,2,124,255,1,1,175,0,0,2,2,2,1
 93 | 57,1,0,132,207,0,1,168,1,0,2,0,3,1
 94 | 52,1,2,138,223,0,1,169,0,0,2,4,2,1
 95 | 54,0,1,132,288,1,0,159,1,0,2,1,2,1
 96 | 45,0,1,112,160,0,1,138,0,0,1,0,2,1
 97 | 53,1,0,142,226,0,0,111,1,0,2,0,3,1
 98 | 62,0,0,140,394,0,0,157,0,1.2,1,0,2,1
 99 | 52,1,0,108,233,1,1,147,0,0.1,2,3,3,1
100 | 43,1,2,130,315,0,1,162,0,1.9,2,1,2,1
101 | 53,1,2,130,246,1,0,173,0,0,2,3,2,1
102 | 42,1,3,148,244,0,0,178,0,0.8,2,2,2,1
103 | 59,1,3,178,270,0,0,145,0,4.2,0,0,3,1
104 | 63,0,1,140,195,0,1,179,0,0,2,2,2,1
105 | 42,1,2,120,240,1,1,194,0,0.8,0,0,3,1
106 | 50,1,2,129,196,0,1,163,0,0,2,0,2,1
107 | 68,0,2,120,211,0,0,115,0,1.5,1,0,2,1
108 | 69,1,3,160,234,1,0,131,0,0.1,1,1,2,1
109 | 45,0,0,138,236,0,0,152,1,0.2,1,0,2,1
110 | 50,0,1,120,244,0,1,162,0,1.1,2,0,2,1
111 | 50,0,0,110,254,0,0,159,0,0,2,0,2,1
112 | 64,0,0,180,325,0,1,154,1,0,2,0,2,1
113 | 57,1,2,150,126,1,1,173,0,0.2,2,1,3,1
114 | 64,0,2,140,313,0,1,133,0,0.2,2,0,3,1
115 | 43,1,0,110,211,0,1,161,0,0,2,0,3,1
116 | 55,1,1,130,262,0,1,155,0,0,2,0,2,1
117 | 37,0,2,120,215,0,1,170,0,0,2,0,2,1
118 | 41,1,2,130,214,0,0,168,0,2,1,0,2,1
119 | 56,1,3,120,193,0,0,162,0,1.9,1,0,3,1
120 | 46,0,1,105,204,0,1,172,0,0,2,0,2,1
121 | 46,0,0,138,243,0,0,152,1,0,1,0,2,1
122 | 64,0,0,130,303,0,1,122,0,2,1,2,2,1
123 | 59,1,0,138,271,0,0,182,0,0,2,0,2,1
124 | 41,0,2,112,268,0,0,172,1,0,2,0,2,1
125 | 54,0,2,108,267,0,0,167,0,0,2,0,2,1
126 | 39,0,2,94,199,0,1,179,0,0,2,0,2,1
127 | 34,0,1,118,210,0,1,192,0,0.7,2,0,2,1
128 | 47,1,0,112,204,0,1,143,0,0.1,2,0,2,1
129 | 67,0,2,152,277,0,1,172,0,0,2,1,2,1
130 | 52,0,2,136,196,0,0,169,0,0.1,1,0,2,1
131 | 74,0,1,120,269,0,0,121,1,0.2,2,1,2,1
132 | 54,0,2,160,201,0,1,163,0,0,2,1,2,1
133 | 49,0,1,134,271,0,1,162,0,0,1,0,2,1
134 | 42,1,1,120,295,0,1,162,0,0,2,0,2,1
135 | 41,1,1,110,235,0,1,153,0,0,2,0,2,1
136 | 41,0,1,126,306,0,1,163,0,0,2,0,2,1
137 | 49,0,0,130,269,0,1,163,0,0,2,0,2,1
138 | 60,0,2,120,178,1,1,96,0,0,2,0,2,1
139 | 62,1,1,128,208,1,0,140,0,0,2,0,2,1
140 | 57,1,0,110,201,0,1,126,1,1.5,1,0,1,1
141 | 64,1,0,128,263,0,1,105,1,0.2,1,1,3,1
142 | 51,0,2,120,295,0,0,157,0,0.6,2,0,2,1
143 | 43,1,0,115,303,0,1,181,0,1.2,1,0,2,1
144 | 42,0,2,120,209,0,1,173,0,0,1,0,2,1
145 | 67,0,0,106,223,0,1,142,0,0.3,2,2,2,1
146 | 76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
147 | 70,1,1,156,245,0,0,143,0,0,2,0,2,1
148 | 44,0,2,118,242,0,1,149,0,0.3,1,1,2,1
149 | 60,0,3,150,240,0,1,171,0,0.9,2,0,2,1
150 | 44,1,2,120,226,0,1,169,0,0,2,0,2,1
151 | 42,1,2,130,180,0,1,150,0,0,2,0,2,1
152 | 66,1,0,160,228,0,0,138,0,2.3,2,0,1,1
153 | 71,0,0,112,149,0,1,125,0,1.6,1,0,2,1
154 | 64,1,3,170,227,0,0,155,0,0.6,1,0,3,1
155 | 66,0,2,146,278,0,0,152,0,0,1,1,2,1
156 | 39,0,2,138,220,0,1,152,0,0,1,0,2,1
157 | 58,0,0,130,197,0,1,131,0,0.6,1,0,2,1
158 | 47,1,2,130,253,0,1,179,0,0,2,0,2,1
159 | 35,1,1,122,192,0,1,174,0,0,2,0,2,1
160 | 58,1,1,125,220,0,1,144,0,0.4,1,4,3,1
161 | 56,1,1,130,221,0,0,163,0,0,2,0,3,1
162 | 56,1,1,120,240,0,1,169,0,0,0,0,2,1
163 | 55,0,1,132,342,0,1,166,0,1.2,2,0,2,1
164 | 41,1,1,120,157,0,1,182,0,0,2,0,2,1
165 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1
166 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1
167 | 67,1,0,160,286,0,0,108,1,1.5,1,3,2,0
168 | 67,1,0,120,229,0,0,129,1,2.6,1,2,3,0
169 | 62,0,0,140,268,0,0,160,0,3.6,0,2,2,0
170 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0
171 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
172 | 56,1,2,130,256,1,0,142,1,0.6,1,1,1,0
173 | 48,1,1,110,229,0,1,168,0,1,0,0,3,0
174 | 58,1,1,120,284,0,0,160,0,1.8,1,0,2,0
175 | 58,1,2,132,224,0,0,173,0,3.2,2,2,3,0
176 | 60,1,0,130,206,0,0,132,1,2.4,1,2,3,0
177 | 40,1,0,110,167,0,0,114,1,2,1,0,3,0
178 | 60,1,0,117,230,1,1,160,1,1.4,2,2,3,0
179 | 64,1,2,140,335,0,1,158,0,0,2,0,2,0
180 | 43,1,0,120,177,0,0,120,1,2.5,1,0,3,0
181 | 57,1,0,150,276,0,0,112,1,0.6,1,1,1,0
182 | 55,1,0,132,353,0,1,132,1,1.2,1,1,3,0
183 | 65,0,0,150,225,0,0,114,0,1,1,3,3,0
184 | 61,0,0,130,330,0,0,169,0,0,2,0,2,0
185 | 58,1,2,112,230,0,0,165,0,2.5,1,1,3,0
186 | 50,1,0,150,243,0,0,128,0,2.6,1,0,3,0
187 | 44,1,0,112,290,0,0,153,0,0,2,1,2,0
188 | 60,1,0,130,253,0,1,144,1,1.4,2,1,3,0
189 | 54,1,0,124,266,0,0,109,1,2.2,1,1,3,0
190 | 50,1,2,140,233,0,1,163,0,0.6,1,1,3,0
191 | 41,1,0,110,172,0,0,158,0,0,2,0,3,0
192 | 51,0,0,130,305,0,1,142,1,1.2,1,0,3,0
193 | 58,1,0,128,216,0,0,131,1,2.2,1,3,3,0
194 | 54,1,0,120,188,0,1,113,0,1.4,1,1,3,0
195 | 60,1,0,145,282,0,0,142,1,2.8,1,2,3,0
196 | 60,1,2,140,185,0,0,155,0,3,1,0,2,0
197 | 59,1,0,170,326,0,0,140,1,3.4,0,0,3,0
198 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0
199 | 67,1,0,125,254,1,1,163,0,0.2,1,2,3,0
200 | 62,1,0,120,267,0,1,99,1,1.8,1,2,3,0
201 | 65,1,0,110,248,0,0,158,0,0.6,2,2,1,0
202 | 44,1,0,110,197,0,0,177,0,0,2,1,2,0
203 | 60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
204 | 58,1,0,150,270,0,0,111,1,0.8,2,0,3,0
205 | 68,1,2,180,274,1,0,150,1,1.6,1,0,3,0
206 | 62,0,0,160,164,0,0,145,0,6.2,0,3,3,0
207 | 52,1,0,128,255,0,1,161,1,0,2,1,3,0
208 | 59,1,0,110,239,0,0,142,1,1.2,1,1,3,0
209 | 60,0,0,150,258,0,0,157,0,2.6,1,2,3,0
210 | 49,1,2,120,188,0,1,139,0,2,1,3,3,0
211 | 59,1,0,140,177,0,1,162,1,0,2,1,3,0
212 | 57,1,2,128,229,0,0,150,0,0.4,1,1,3,0
213 | 61,1,0,120,260,0,1,140,1,3.6,1,1,3,0
214 | 39,1,0,118,219,0,1,140,0,1.2,1,0,3,0
215 | 61,0,0,145,307,0,0,146,1,1,1,0,3,0
216 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0
217 | 43,0,0,132,341,1,0,136,1,3,1,0,3,0
218 | 62,0,2,130,263,0,1,97,0,1.2,1,1,3,0
219 | 63,1,0,130,330,1,0,132,1,1.8,2,3,3,0
220 | 65,1,0,135,254,0,0,127,0,2.8,1,1,3,0
221 | 48,1,0,130,256,1,0,150,1,0,2,2,3,0
222 | 63,0,0,150,407,0,0,154,0,4,1,3,3,0
223 | 55,1,0,140,217,0,1,111,1,5.6,0,0,3,0
224 | 65,1,3,138,282,1,0,174,0,1.4,1,1,2,0
225 | 56,0,0,200,288,1,0,133,1,4,0,2,3,0
226 | 54,1,0,110,239,0,1,126,1,2.8,1,1,3,0
227 | 70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
228 | 62,1,1,120,281,0,0,103,0,1.4,1,1,3,0
229 | 35,1,0,120,198,0,1,130,1,1.6,1,0,3,0
230 | 59,1,3,170,288,0,0,159,0,0.2,1,0,3,0
231 | 64,1,2,125,309,0,1,131,1,1.8,1,0,3,0
232 | 47,1,2,108,243,0,1,152,0,0,2,0,2,0
233 | 57,1,0,165,289,1,0,124,0,1,1,3,3,0
234 | 55,1,0,160,289,0,0,145,1,0.8,1,1,3,0
235 | 64,1,0,120,246,0,0,96,1,2.2,0,1,2,0
236 | 70,1,0,130,322,0,0,109,0,2.4,1,3,2,0
237 | 51,1,0,140,299,0,1,173,1,1.6,2,0,3,0
238 | 58,1,0,125,300,0,0,171,0,0,2,2,3,0
239 | 60,1,0,140,293,0,0,170,0,1.2,1,2,3,0
240 | 77,1,0,125,304,0,0,162,1,0,2,3,2,0
241 | 35,1,0,126,282,0,0,156,1,0,2,0,3,0
242 | 70,1,2,160,269,0,1,112,1,2.9,1,1,3,0
243 | 59,0,0,174,249,0,1,143,1,0,1,0,2,0
244 | 64,1,0,145,212,0,0,132,0,2,1,2,1,0
245 | 57,1,0,152,274,0,1,88,1,1.2,1,1,3,0
246 | 56,1,0,132,184,0,0,105,1,2.1,1,1,1,0
247 | 48,1,0,124,274,0,0,166,0,0.5,1,0,3,0
248 | 56,0,0,134,409,0,0,150,1,1.9,1,2,3,0
249 | 66,1,1,160,246,0,1,120,1,0,1,3,1,0
250 | 54,1,1,192,283,0,0,195,0,0,2,1,3,0
251 | 69,1,2,140,254,0,0,146,0,2,1,3,3,0
252 | 51,1,0,140,298,0,1,122,1,4.2,1,3,3,0
253 | 43,1,0,132,247,1,0,143,1,0.1,1,4,3,0
254 | 62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
255 | 67,1,0,100,299,0,0,125,1,0.9,1,2,2,0
256 | 59,1,3,160,273,0,0,125,0,0,2,0,2,0
257 | 45,1,0,142,309,0,0,147,1,0,1,3,3,0
258 | 58,1,0,128,259,0,0,130,1,3,1,2,3,0
259 | 50,1,0,144,200,0,0,126,1,0.9,1,0,3,0
260 | 62,0,0,150,244,0,1,154,1,1.4,1,0,2,0
261 | 38,1,3,120,231,0,1,182,1,3.8,1,0,3,0
262 | 66,0,0,178,228,1,1,165,1,1,1,2,3,0
263 | 52,1,0,112,230,0,1,160,0,0,2,1,2,0
264 | 53,1,0,123,282,0,1,95,1,2,1,2,3,0
265 | 63,0,0,108,269,0,1,169,1,1.8,1,2,2,0
266 | 54,1,0,110,206,0,0,108,1,0,1,1,2,0
267 | 66,1,0,112,212,0,0,132,1,0.1,2,1,2,0
268 | 55,0,0,180,327,0,2,117,1,3.4,1,0,2,0
269 | 49,1,2,118,149,0,0,126,0,0.8,2,3,2,0
270 | 54,1,0,122,286,0,0,116,1,3.2,1,2,2,0
271 | 56,1,0,130,283,1,0,103,1,1.6,0,0,3,0
272 | 46,1,0,120,249,0,0,144,0,0.8,2,0,3,0
273 | 61,1,3,134,234,0,1,145,0,2.6,1,2,2,0
274 | 67,1,0,120,237,0,1,71,0,1,1,0,2,0
275 | 58,1,0,100,234,0,1,156,0,0.1,2,1,3,0
276 | 47,1,0,110,275,0,0,118,1,1,1,1,2,0
277 | 52,1,0,125,212,0,1,168,0,1,2,2,3,0
278 | 58,1,0,146,218,0,1,105,0,2,1,1,3,0
279 | 57,1,1,124,261,0,1,141,0,0.3,2,0,3,0
280 | 58,0,1,136,319,1,0,152,0,0,2,2,2,0
281 | 61,1,0,138,166,0,0,125,1,3.6,1,1,2,0
282 | 42,1,0,136,315,0,1,125,1,1.8,1,0,1,0
283 | 52,1,0,128,204,1,1,156,1,1,1,0,0,0
284 | 59,1,2,126,218,1,1,134,0,2.2,1,1,1,0
285 | 40,1,0,152,223,0,1,181,0,0,2,0,3,0
286 | 61,1,0,140,207,0,0,138,1,1.9,2,1,3,0
287 | 46,1,0,140,311,0,1,120,1,1.8,1,2,3,0
288 | 59,1,3,134,204,0,1,162,0,0.8,2,2,2,0
289 | 57,1,1,154,232,0,0,164,0,0,2,1,2,0
290 | 57,1,0,110,335,0,1,143,1,3,1,1,3,0
291 | 55,0,0,128,205,0,2,130,1,2,1,1,3,0
292 | 61,1,0,148,203,0,1,161,0,0,2,1,3,0
293 | 58,1,0,114,318,0,2,140,0,4.4,0,3,1,0
294 | 58,0,0,170,225,1,0,146,1,2.8,1,2,1,0
295 | 67,1,2,152,212,0,0,150,0,0.8,1,0,3,0
296 | 44,1,0,120,169,0,1,144,1,2.8,0,0,1,0
297 | 63,1,0,140,187,0,0,144,1,4,2,2,3,0
298 | 63,0,0,124,197,0,1,136,1,0,1,0,2,0
299 | 59,1,0,164,176,1,0,90,0,1,1,2,1,0
300 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
301 | 45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
302 | 68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
303 | 57,1,0,130,131,0,1,115,1,1.2,1,1,3,0
304 | 57,0,1,130,236,0,0,174,0,0,1,1,2,0
305 | 


--------------------------------------------------------------------------------
/uci_heart_disease/Heart Disease Inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Heart Disease Inference\n",
  8 |     "\n",
  9 |     "In this notebook, we will try to look at just the inference part of the heart disease classification solution\n",
 10 |     "\n",
 11 |     "The process of applying the same transformations like data pre-processing, feature engineering etc. which was applied during the training process, and then applying the trained ML model to generate predictions is termed as the inference process."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "### Import Modules"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import pandas as pd\n",
 28 |     "import numpy as np"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "### Get Inference Data"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# in real-time use cases, this code should be replaced with live flowing data\n",
 45 |     "\n",
 46 |     "data = pd.read_csv(\"Data/inference_heart_disease.csv\") # Live connection to the database\n",
 47 |     "data.drop_duplicates(subset=None, inplace=True)\n",
 48 |     "data.duplicated().any()\n",
 49 |     "inference_df = data.copy()\n",
 50 |     "inference_data, labels = inference_df[inference_df.columns.drop('target')], inference_df['target']"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 3,
 56 |    "metadata": {},
 57 |    "outputs": [
 58 |     {
 59 |      "data": {
 60 |       "text/plain": [
 61 |        "Index(['age', 'sex', 'chest_pain_type', 'resting_bp', 'cholestoral',\n",
 62 |        "       'fasting_blood_sugar', 'restecg', 'max_hr', 'exang', 'oldpeak', 'slope',\n",
 63 |        "       'num_major_vessels', 'thal'],\n",
 64 |        "      dtype='object')"
 65 |       ]
 66 |      },
 67 |      "execution_count": 3,
 68 |      "metadata": {},
 69 |      "output_type": "execute_result"
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "inference_data.columns"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 4,
 79 |    "metadata": {},
 80 |    "outputs": [
 81 |     {
 82 |      "data": {
 83 |       "text/html": [
 84 |        "<div>\n",
 85 |        "<style scoped>\n",
 86 |        "    .dataframe tbody tr th:only-of-type {\n",
 87 |        "        vertical-align: middle;\n",
 88 |        "    }\n",
 89 |        "\n",
 90 |        "    .dataframe tbody tr th {\n",
 91 |        "        vertical-align: top;\n",
 92 |        "    }\n",
 93 |        "\n",
 94 |        "    .dataframe thead th {\n",
 95 |        "        text-align: right;\n",
 96 |        "    }\n",
 97 |        "</style>\n",
 98 |        "<table border=\"1\" class=\"dataframe\">\n",
 99 |        "  <thead>\n",
100 |        "    <tr style=\"text-align: right;\">\n",
101 |        "      <th></th>\n",
102 |        "      <th>age</th>\n",
103 |        "      <th>sex</th>\n",
104 |        "      <th>chest_pain_type</th>\n",
105 |        "      <th>resting_bp</th>\n",
106 |        "      <th>cholestoral</th>\n",
107 |        "      <th>fasting_blood_sugar</th>\n",
108 |        "      <th>restecg</th>\n",
109 |        "      <th>max_hr</th>\n",
110 |        "      <th>exang</th>\n",
111 |        "      <th>oldpeak</th>\n",
112 |        "      <th>slope</th>\n",
113 |        "      <th>num_major_vessels</th>\n",
114 |        "      <th>thal</th>\n",
115 |        "    </tr>\n",
116 |        "  </thead>\n",
117 |        "  <tbody>\n",
118 |        "    <tr>\n",
119 |        "      <th>0</th>\n",
120 |        "      <td>66</td>\n",
121 |        "      <td>1</td>\n",
122 |        "      <td>0</td>\n",
123 |        "      <td>120</td>\n",
124 |        "      <td>302</td>\n",
125 |        "      <td>0</td>\n",
126 |        "      <td>0</td>\n",
127 |        "      <td>151</td>\n",
128 |        "      <td>0</td>\n",
129 |        "      <td>0.4</td>\n",
130 |        "      <td>1</td>\n",
131 |        "      <td>0</td>\n",
132 |        "      <td>2</td>\n",
133 |        "    </tr>\n",
134 |        "    <tr>\n",
135 |        "      <th>1</th>\n",
136 |        "      <td>52</td>\n",
137 |        "      <td>1</td>\n",
138 |        "      <td>0</td>\n",
139 |        "      <td>112</td>\n",
140 |        "      <td>230</td>\n",
141 |        "      <td>0</td>\n",
142 |        "      <td>1</td>\n",
143 |        "      <td>160</td>\n",
144 |        "      <td>0</td>\n",
145 |        "      <td>0.0</td>\n",
146 |        "      <td>2</td>\n",
147 |        "      <td>1</td>\n",
148 |        "      <td>2</td>\n",
149 |        "    </tr>\n",
150 |        "    <tr>\n",
151 |        "      <th>2</th>\n",
152 |        "      <td>63</td>\n",
153 |        "      <td>0</td>\n",
154 |        "      <td>1</td>\n",
155 |        "      <td>140</td>\n",
156 |        "      <td>195</td>\n",
157 |        "      <td>0</td>\n",
158 |        "      <td>1</td>\n",
159 |        "      <td>179</td>\n",
160 |        "      <td>0</td>\n",
161 |        "      <td>0.0</td>\n",
162 |        "      <td>2</td>\n",
163 |        "      <td>2</td>\n",
164 |        "      <td>2</td>\n",
165 |        "    </tr>\n",
166 |        "    <tr>\n",
167 |        "      <th>3</th>\n",
168 |        "      <td>46</td>\n",
169 |        "      <td>1</td>\n",
170 |        "      <td>2</td>\n",
171 |        "      <td>150</td>\n",
172 |        "      <td>231</td>\n",
173 |        "      <td>0</td>\n",
174 |        "      <td>1</td>\n",
175 |        "      <td>147</td>\n",
176 |        "      <td>0</td>\n",
177 |        "      <td>3.6</td>\n",
178 |        "      <td>1</td>\n",
179 |        "      <td>0</td>\n",
180 |        "      <td>2</td>\n",
181 |        "    </tr>\n",
182 |        "    <tr>\n",
183 |        "      <th>4</th>\n",
184 |        "      <td>63</td>\n",
185 |        "      <td>1</td>\n",
186 |        "      <td>0</td>\n",
187 |        "      <td>130</td>\n",
188 |        "      <td>254</td>\n",
189 |        "      <td>0</td>\n",
190 |        "      <td>0</td>\n",
191 |        "      <td>147</td>\n",
192 |        "      <td>0</td>\n",
193 |        "      <td>1.4</td>\n",
194 |        "      <td>1</td>\n",
195 |        "      <td>1</td>\n",
196 |        "      <td>3</td>\n",
197 |        "    </tr>\n",
198 |        "  </tbody>\n",
199 |        "</table>\n",
200 |        "</div>"
201 |       ],
202 |       "text/plain": [
203 |        "   age  sex  chest_pain_type  resting_bp  cholestoral  fasting_blood_sugar  \\\n",
204 |        "0   66    1                0         120          302                    0   \n",
205 |        "1   52    1                0         112          230                    0   \n",
206 |        "2   63    0                1         140          195                    0   \n",
207 |        "3   46    1                2         150          231                    0   \n",
208 |        "4   63    1                0         130          254                    0   \n",
209 |        "\n",
210 |        "   restecg  max_hr  exang  oldpeak  slope  num_major_vessels  thal  \n",
211 |        "0        0     151      0      0.4      1                  0     2  \n",
212 |        "1        1     160      0      0.0      2                  1     2  \n",
213 |        "2        1     179      0      0.0      2                  2     2  \n",
214 |        "3        1     147      0      3.6      1                  0     2  \n",
215 |        "4        0     147      0      1.4      1                  1     3  "
216 |       ]
217 |      },
218 |      "execution_count": 4,
219 |      "metadata": {},
220 |      "output_type": "execute_result"
221 |     }
222 |    ],
223 |    "source": [
224 |     "inference_data.head()"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "### Apply Same Pre-processing"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 5,
237 |    "metadata": {},
238 |    "outputs": [
239 |     {
240 |      "data": {
241 |       "text/html": [
242 |        "<div>\n",
243 |        "<style scoped>\n",
244 |        "    .dataframe tbody tr th:only-of-type {\n",
245 |        "        vertical-align: middle;\n",
246 |        "    }\n",
247 |        "\n",
248 |        "    .dataframe tbody tr th {\n",
249 |        "        vertical-align: top;\n",
250 |        "    }\n",
251 |        "\n",
252 |        "    .dataframe thead th {\n",
253 |        "        text-align: right;\n",
254 |        "    }\n",
255 |        "</style>\n",
256 |        "<table border=\"1\" class=\"dataframe\">\n",
257 |        "  <thead>\n",
258 |        "    <tr style=\"text-align: right;\">\n",
259 |        "      <th></th>\n",
260 |        "      <th>0</th>\n",
261 |        "      <th>1</th>\n",
262 |        "      <th>2</th>\n",
263 |        "      <th>3</th>\n",
264 |        "      <th>4</th>\n",
265 |        "      <th>5</th>\n",
266 |        "      <th>6</th>\n",
267 |        "      <th>7</th>\n",
268 |        "      <th>8</th>\n",
269 |        "      <th>9</th>\n",
270 |        "      <th>...</th>\n",
271 |        "      <th>13</th>\n",
272 |        "      <th>14</th>\n",
273 |        "      <th>15</th>\n",
274 |        "      <th>16</th>\n",
275 |        "      <th>17</th>\n",
276 |        "      <th>18</th>\n",
277 |        "      <th>19</th>\n",
278 |        "      <th>20</th>\n",
279 |        "      <th>21</th>\n",
280 |        "      <th>22</th>\n",
281 |        "    </tr>\n",
282 |        "  </thead>\n",
283 |        "  <tbody>\n",
284 |        "    <tr>\n",
285 |        "      <th>0</th>\n",
286 |        "      <td>1.000000</td>\n",
287 |        "      <td>1.0</td>\n",
288 |        "      <td>0.464286</td>\n",
289 |        "      <td>0.814607</td>\n",
290 |        "      <td>0.0</td>\n",
291 |        "      <td>0.457447</td>\n",
292 |        "      <td>0.0</td>\n",
293 |        "      <td>0.111111</td>\n",
294 |        "      <td>0.0</td>\n",
295 |        "      <td>0.0</td>\n",
296 |        "      <td>...</td>\n",
297 |        "      <td>0.0</td>\n",
298 |        "      <td>1.0</td>\n",
299 |        "      <td>0.0</td>\n",
300 |        "      <td>1.0</td>\n",
301 |        "      <td>0.0</td>\n",
302 |        "      <td>0.0</td>\n",
303 |        "      <td>0.0</td>\n",
304 |        "      <td>1.0</td>\n",
305 |        "      <td>0.0</td>\n",
306 |        "      <td>0.0</td>\n",
307 |        "    </tr>\n",
308 |        "    <tr>\n",
309 |        "      <th>1</th>\n",
310 |        "      <td>0.621622</td>\n",
311 |        "      <td>1.0</td>\n",
312 |        "      <td>0.321429</td>\n",
313 |        "      <td>0.410112</td>\n",
314 |        "      <td>0.0</td>\n",
315 |        "      <td>0.553191</td>\n",
316 |        "      <td>0.0</td>\n",
317 |        "      <td>0.000000</td>\n",
318 |        "      <td>0.5</td>\n",
319 |        "      <td>0.0</td>\n",
320 |        "      <td>...</td>\n",
321 |        "      <td>0.0</td>\n",
322 |        "      <td>0.0</td>\n",
323 |        "      <td>1.0</td>\n",
324 |        "      <td>1.0</td>\n",
325 |        "      <td>0.0</td>\n",
326 |        "      <td>0.0</td>\n",
327 |        "      <td>0.0</td>\n",
328 |        "      <td>0.0</td>\n",
329 |        "      <td>1.0</td>\n",
330 |        "      <td>0.0</td>\n",
331 |        "    </tr>\n",
332 |        "    <tr>\n",
333 |        "      <th>2</th>\n",
334 |        "      <td>0.918919</td>\n",
335 |        "      <td>0.0</td>\n",
336 |        "      <td>0.821429</td>\n",
337 |        "      <td>0.213483</td>\n",
338 |        "      <td>0.0</td>\n",
339 |        "      <td>0.755319</td>\n",
340 |        "      <td>0.0</td>\n",
341 |        "      <td>0.000000</td>\n",
342 |        "      <td>1.0</td>\n",
343 |        "      <td>0.0</td>\n",
344 |        "      <td>...</td>\n",
345 |        "      <td>0.0</td>\n",
346 |        "      <td>0.0</td>\n",
347 |        "      <td>1.0</td>\n",
348 |        "      <td>0.0</td>\n",
349 |        "      <td>1.0</td>\n",
350 |        "      <td>0.0</td>\n",
351 |        "      <td>0.0</td>\n",
352 |        "      <td>0.0</td>\n",
353 |        "      <td>1.0</td>\n",
354 |        "      <td>0.0</td>\n",
355 |        "    </tr>\n",
356 |        "    <tr>\n",
357 |        "      <th>3</th>\n",
358 |        "      <td>0.459459</td>\n",
359 |        "      <td>1.0</td>\n",
360 |        "      <td>1.000000</td>\n",
361 |        "      <td>0.415730</td>\n",
362 |        "      <td>0.0</td>\n",
363 |        "      <td>0.414894</td>\n",
364 |        "      <td>0.0</td>\n",
365 |        "      <td>1.000000</td>\n",
366 |        "      <td>0.0</td>\n",
367 |        "      <td>0.0</td>\n",
368 |        "      <td>...</td>\n",
369 |        "      <td>0.0</td>\n",
370 |        "      <td>1.0</td>\n",
371 |        "      <td>0.0</td>\n",
372 |        "      <td>0.0</td>\n",
373 |        "      <td>0.0</td>\n",
374 |        "      <td>1.0</td>\n",
375 |        "      <td>0.0</td>\n",
376 |        "      <td>0.0</td>\n",
377 |        "      <td>1.0</td>\n",
378 |        "      <td>0.0</td>\n",
379 |        "    </tr>\n",
380 |        "    <tr>\n",
381 |        "      <th>4</th>\n",
382 |        "      <td>0.918919</td>\n",
383 |        "      <td>1.0</td>\n",
384 |        "      <td>0.642857</td>\n",
385 |        "      <td>0.544944</td>\n",
386 |        "      <td>0.0</td>\n",
387 |        "      <td>0.414894</td>\n",
388 |        "      <td>0.0</td>\n",
389 |        "      <td>0.388889</td>\n",
390 |        "      <td>0.5</td>\n",
391 |        "      <td>0.0</td>\n",
392 |        "      <td>...</td>\n",
393 |        "      <td>0.0</td>\n",
394 |        "      <td>1.0</td>\n",
395 |        "      <td>0.0</td>\n",
396 |        "      <td>1.0</td>\n",
397 |        "      <td>0.0</td>\n",
398 |        "      <td>0.0</td>\n",
399 |        "      <td>0.0</td>\n",
400 |        "      <td>1.0</td>\n",
401 |        "      <td>0.0</td>\n",
402 |        "      <td>0.0</td>\n",
403 |        "    </tr>\n",
404 |        "    <tr>\n",
405 |        "      <th>5</th>\n",
406 |        "      <td>0.324324</td>\n",
407 |        "      <td>1.0</td>\n",
408 |        "      <td>0.642857</td>\n",
409 |        "      <td>0.320225</td>\n",
410 |        "      <td>0.0</td>\n",
411 |        "      <td>0.638298</td>\n",
412 |        "      <td>0.0</td>\n",
413 |        "      <td>0.555556</td>\n",
414 |        "      <td>0.0</td>\n",
415 |        "      <td>0.0</td>\n",
416 |        "      <td>...</td>\n",
417 |        "      <td>0.0</td>\n",
418 |        "      <td>1.0</td>\n",
419 |        "      <td>0.0</td>\n",
420 |        "      <td>0.0</td>\n",
421 |        "      <td>0.0</td>\n",
422 |        "      <td>1.0</td>\n",
423 |        "      <td>0.0</td>\n",
424 |        "      <td>1.0</td>\n",
425 |        "      <td>0.0</td>\n",
426 |        "      <td>0.0</td>\n",
427 |        "    </tr>\n",
428 |        "    <tr>\n",
429 |        "      <th>6</th>\n",
430 |        "      <td>0.270270</td>\n",
431 |        "      <td>0.0</td>\n",
432 |        "      <td>0.000000</td>\n",
433 |        "      <td>0.235955</td>\n",
434 |        "      <td>0.0</td>\n",
435 |        "      <td>0.755319</td>\n",
436 |        "      <td>0.0</td>\n",
437 |        "      <td>0.000000</td>\n",
438 |        "      <td>0.0</td>\n",
439 |        "      <td>0.0</td>\n",
440 |        "      <td>...</td>\n",
441 |        "      <td>0.0</td>\n",
442 |        "      <td>0.0</td>\n",
443 |        "      <td>1.0</td>\n",
444 |        "      <td>0.0</td>\n",
445 |        "      <td>0.0</td>\n",
446 |        "      <td>1.0</td>\n",
447 |        "      <td>0.0</td>\n",
448 |        "      <td>0.0</td>\n",
449 |        "      <td>1.0</td>\n",
450 |        "      <td>0.0</td>\n",
451 |        "    </tr>\n",
452 |        "    <tr>\n",
453 |        "      <th>7</th>\n",
454 |        "      <td>0.513514</td>\n",
455 |        "      <td>1.0</td>\n",
456 |        "      <td>0.642857</td>\n",
457 |        "      <td>0.556180</td>\n",
458 |        "      <td>1.0</td>\n",
459 |        "      <td>0.446809</td>\n",
460 |        "      <td>1.0</td>\n",
461 |        "      <td>0.000000</td>\n",
462 |        "      <td>1.0</td>\n",
463 |        "      <td>0.0</td>\n",
464 |        "      <td>...</td>\n",
465 |        "      <td>0.0</td>\n",
466 |        "      <td>0.0</td>\n",
467 |        "      <td>1.0</td>\n",
468 |        "      <td>1.0</td>\n",
469 |        "      <td>0.0</td>\n",
470 |        "      <td>0.0</td>\n",
471 |        "      <td>0.0</td>\n",
472 |        "      <td>1.0</td>\n",
473 |        "      <td>0.0</td>\n",
474 |        "      <td>0.0</td>\n",
475 |        "    </tr>\n",
476 |        "    <tr>\n",
477 |        "      <th>8</th>\n",
478 |        "      <td>0.756757</td>\n",
479 |        "      <td>1.0</td>\n",
480 |        "      <td>0.285714</td>\n",
481 |        "      <td>1.000000</td>\n",
482 |        "      <td>0.0</td>\n",
483 |        "      <td>0.372340</td>\n",
484 |        "      <td>1.0</td>\n",
485 |        "      <td>0.833333</td>\n",
486 |        "      <td>0.5</td>\n",
487 |        "      <td>0.0</td>\n",
488 |        "      <td>...</td>\n",
489 |        "      <td>0.0</td>\n",
490 |        "      <td>1.0</td>\n",
491 |        "      <td>0.0</td>\n",
492 |        "      <td>1.0</td>\n",
493 |        "      <td>0.0</td>\n",
494 |        "      <td>0.0</td>\n",
495 |        "      <td>0.0</td>\n",
496 |        "      <td>0.0</td>\n",
497 |        "      <td>1.0</td>\n",
498 |        "      <td>0.0</td>\n",
499 |        "    </tr>\n",
500 |        "    <tr>\n",
501 |        "      <th>9</th>\n",
502 |        "      <td>0.486486</td>\n",
503 |        "      <td>1.0</td>\n",
504 |        "      <td>0.785714</td>\n",
505 |        "      <td>0.561798</td>\n",
506 |        "      <td>0.0</td>\n",
507 |        "      <td>0.510638</td>\n",
508 |        "      <td>0.0</td>\n",
509 |        "      <td>0.000000</td>\n",
510 |        "      <td>0.0</td>\n",
511 |        "      <td>0.0</td>\n",
512 |        "      <td>...</td>\n",
513 |        "      <td>0.0</td>\n",
514 |        "      <td>0.0</td>\n",
515 |        "      <td>1.0</td>\n",
516 |        "      <td>0.0</td>\n",
517 |        "      <td>0.0</td>\n",
518 |        "      <td>1.0</td>\n",
519 |        "      <td>0.0</td>\n",
520 |        "      <td>1.0</td>\n",
521 |        "      <td>0.0</td>\n",
522 |        "      <td>0.0</td>\n",
523 |        "    </tr>\n",
524 |        "    <tr>\n",
525 |        "      <th>10</th>\n",
526 |        "      <td>0.729730</td>\n",
527 |        "      <td>1.0</td>\n",
528 |        "      <td>0.553571</td>\n",
529 |        "      <td>0.516854</td>\n",
530 |        "      <td>1.0</td>\n",
531 |        "      <td>0.382979</td>\n",
532 |        "      <td>1.0</td>\n",
533 |        "      <td>0.333333</td>\n",
534 |        "      <td>0.5</td>\n",
535 |        "      <td>0.0</td>\n",
536 |        "      <td>...</td>\n",
537 |        "      <td>0.0</td>\n",
538 |        "      <td>1.0</td>\n",
539 |        "      <td>0.0</td>\n",
540 |        "      <td>1.0</td>\n",
541 |        "      <td>0.0</td>\n",
542 |        "      <td>0.0</td>\n",
543 |        "      <td>0.0</td>\n",
544 |        "      <td>1.0</td>\n",
545 |        "      <td>0.0</td>\n",
546 |        "      <td>0.0</td>\n",
547 |        "    </tr>\n",
548 |        "    <tr>\n",
549 |        "      <th>11</th>\n",
550 |        "      <td>0.756757</td>\n",
551 |        "      <td>0.0</td>\n",
552 |        "      <td>0.821429</td>\n",
553 |        "      <td>0.471910</td>\n",
554 |        "      <td>0.0</td>\n",
555 |        "      <td>0.159574</td>\n",
556 |        "      <td>1.0</td>\n",
557 |        "      <td>0.055556</td>\n",
558 |        "      <td>0.0</td>\n",
559 |        "      <td>0.0</td>\n",
560 |        "      <td>...</td>\n",
561 |        "      <td>0.0</td>\n",
562 |        "      <td>1.0</td>\n",
563 |        "      <td>0.0</td>\n",
564 |        "      <td>1.0</td>\n",
565 |        "      <td>0.0</td>\n",
566 |        "      <td>0.0</td>\n",
567 |        "      <td>0.0</td>\n",
568 |        "      <td>0.0</td>\n",
569 |        "      <td>1.0</td>\n",
570 |        "      <td>0.0</td>\n",
571 |        "    </tr>\n",
572 |        "    <tr>\n",
573 |        "      <th>12</th>\n",
574 |        "      <td>0.648649</td>\n",
575 |        "      <td>1.0</td>\n",
576 |        "      <td>0.857143</td>\n",
577 |        "      <td>0.387640</td>\n",
578 |        "      <td>0.0</td>\n",
579 |        "      <td>0.031915</td>\n",
580 |        "      <td>1.0</td>\n",
581 |        "      <td>0.000000</td>\n",
582 |        "      <td>0.0</td>\n",
583 |        "      <td>0.0</td>\n",
584 |        "      <td>...</td>\n",
585 |        "      <td>0.0</td>\n",
586 |        "      <td>0.0</td>\n",
587 |        "      <td>1.0</td>\n",
588 |        "      <td>1.0</td>\n",
589 |        "      <td>0.0</td>\n",
590 |        "      <td>0.0</td>\n",
591 |        "      <td>0.0</td>\n",
592 |        "      <td>1.0</td>\n",
593 |        "      <td>0.0</td>\n",
594 |        "      <td>0.0</td>\n",
595 |        "    </tr>\n",
596 |        "    <tr>\n",
597 |        "      <th>13</th>\n",
598 |        "      <td>0.324324</td>\n",
599 |        "      <td>1.0</td>\n",
600 |        "      <td>0.464286</td>\n",
601 |        "      <td>0.000000</td>\n",
602 |        "      <td>0.0</td>\n",
603 |        "      <td>0.787234</td>\n",
604 |        "      <td>0.0</td>\n",
605 |        "      <td>0.000000</td>\n",
606 |        "      <td>0.0</td>\n",
607 |        "      <td>0.0</td>\n",
608 |        "      <td>...</td>\n",
609 |        "      <td>0.0</td>\n",
610 |        "      <td>0.0</td>\n",
611 |        "      <td>1.0</td>\n",
612 |        "      <td>0.0</td>\n",
613 |        "      <td>1.0</td>\n",
614 |        "      <td>0.0</td>\n",
615 |        "      <td>0.0</td>\n",
616 |        "      <td>0.0</td>\n",
617 |        "      <td>1.0</td>\n",
618 |        "      <td>0.0</td>\n",
619 |        "    </tr>\n",
620 |        "    <tr>\n",
621 |        "      <th>14</th>\n",
622 |        "      <td>0.675676</td>\n",
623 |        "      <td>1.0</td>\n",
624 |        "      <td>0.285714</td>\n",
625 |        "      <td>0.275281</td>\n",
626 |        "      <td>0.0</td>\n",
627 |        "      <td>0.000000</td>\n",
628 |        "      <td>1.0</td>\n",
629 |        "      <td>0.000000</td>\n",
630 |        "      <td>0.5</td>\n",
631 |        "      <td>0.0</td>\n",
632 |        "      <td>...</td>\n",
633 |        "      <td>0.0</td>\n",
634 |        "      <td>1.0</td>\n",
635 |        "      <td>0.0</td>\n",
636 |        "      <td>1.0</td>\n",
637 |        "      <td>0.0</td>\n",
638 |        "      <td>0.0</td>\n",
639 |        "      <td>0.0</td>\n",
640 |        "      <td>1.0</td>\n",
641 |        "      <td>0.0</td>\n",
642 |        "      <td>0.0</td>\n",
643 |        "    </tr>\n",
644 |        "    <tr>\n",
645 |        "      <th>15</th>\n",
646 |        "      <td>0.702703</td>\n",
647 |        "      <td>0.0</td>\n",
648 |        "      <td>0.732143</td>\n",
649 |        "      <td>0.522472</td>\n",
650 |        "      <td>0.0</td>\n",
651 |        "      <td>0.563830</td>\n",
652 |        "      <td>0.0</td>\n",
653 |        "      <td>0.388889</td>\n",
654 |        "      <td>0.0</td>\n",
655 |        "      <td>0.0</td>\n",
656 |        "      <td>...</td>\n",
657 |        "      <td>0.0</td>\n",
658 |        "      <td>1.0</td>\n",
659 |        "      <td>0.0</td>\n",
660 |        "      <td>0.0</td>\n",
661 |        "      <td>1.0</td>\n",
662 |        "      <td>0.0</td>\n",
663 |        "      <td>0.0</td>\n",
664 |        "      <td>1.0</td>\n",
665 |        "      <td>0.0</td>\n",
666 |        "      <td>0.0</td>\n",
667 |        "    </tr>\n",
668 |        "    <tr>\n",
669 |        "      <th>16</th>\n",
670 |        "      <td>0.351351</td>\n",
671 |        "      <td>1.0</td>\n",
672 |        "      <td>0.821429</td>\n",
673 |        "      <td>0.387640</td>\n",
674 |        "      <td>0.0</td>\n",
675 |        "      <td>0.744681</td>\n",
676 |        "      <td>0.0</td>\n",
677 |        "      <td>0.000000</td>\n",
678 |        "      <td>0.0</td>\n",
679 |        "      <td>0.0</td>\n",
680 |        "      <td>...</td>\n",
681 |        "      <td>0.0</td>\n",
682 |        "      <td>0.0</td>\n",
683 |        "      <td>1.0</td>\n",
684 |        "      <td>1.0</td>\n",
685 |        "      <td>0.0</td>\n",
686 |        "      <td>0.0</td>\n",
687 |        "      <td>0.0</td>\n",
688 |        "      <td>0.0</td>\n",
689 |        "      <td>1.0</td>\n",
690 |        "      <td>0.0</td>\n",
691 |        "    </tr>\n",
692 |        "    <tr>\n",
693 |        "      <th>17</th>\n",
694 |        "      <td>0.000000</td>\n",
695 |        "      <td>1.0</td>\n",
696 |        "      <td>0.642857</td>\n",
697 |        "      <td>0.264045</td>\n",
698 |        "      <td>0.0</td>\n",
699 |        "      <td>1.000000</td>\n",
700 |        "      <td>0.0</td>\n",
701 |        "      <td>0.000000</td>\n",
702 |        "      <td>0.0</td>\n",
703 |        "      <td>0.0</td>\n",
704 |        "      <td>...</td>\n",
705 |        "      <td>0.0</td>\n",
706 |        "      <td>0.0</td>\n",
707 |        "      <td>1.0</td>\n",
708 |        "      <td>0.0</td>\n",
709 |        "      <td>1.0</td>\n",
710 |        "      <td>0.0</td>\n",
711 |        "      <td>0.0</td>\n",
712 |        "      <td>1.0</td>\n",
713 |        "      <td>0.0</td>\n",
714 |        "      <td>0.0</td>\n",
715 |        "    </tr>\n",
716 |        "    <tr>\n",
717 |        "      <th>18</th>\n",
718 |        "      <td>0.567568</td>\n",
719 |        "      <td>0.0</td>\n",
720 |        "      <td>0.464286</td>\n",
721 |        "      <td>0.348315</td>\n",
722 |        "      <td>0.0</td>\n",
723 |        "      <td>0.531915</td>\n",
724 |        "      <td>0.0</td>\n",
725 |        "      <td>0.444444</td>\n",
726 |        "      <td>0.0</td>\n",
727 |        "      <td>0.0</td>\n",
728 |        "      <td>...</td>\n",
729 |        "      <td>0.0</td>\n",
730 |        "      <td>1.0</td>\n",
731 |        "      <td>0.0</td>\n",
732 |        "      <td>0.0</td>\n",
733 |        "      <td>0.0</td>\n",
734 |        "      <td>1.0</td>\n",
735 |        "      <td>0.0</td>\n",
736 |        "      <td>0.0</td>\n",
737 |        "      <td>1.0</td>\n",
738 |        "      <td>0.0</td>\n",
739 |        "    </tr>\n",
740 |        "    <tr>\n",
741 |        "      <th>19</th>\n",
742 |        "      <td>0.648649</td>\n",
743 |        "      <td>1.0</td>\n",
744 |        "      <td>0.821429</td>\n",
745 |        "      <td>0.258427</td>\n",
746 |        "      <td>1.0</td>\n",
747 |        "      <td>0.500000</td>\n",
748 |        "      <td>1.0</td>\n",
749 |        "      <td>0.861111</td>\n",
750 |        "      <td>0.0</td>\n",
751 |        "      <td>0.0</td>\n",
752 |        "      <td>...</td>\n",
753 |        "      <td>1.0</td>\n",
754 |        "      <td>0.0</td>\n",
755 |        "      <td>0.0</td>\n",
756 |        "      <td>1.0</td>\n",
757 |        "      <td>0.0</td>\n",
758 |        "      <td>0.0</td>\n",
759 |        "      <td>0.0</td>\n",
760 |        "      <td>1.0</td>\n",
761 |        "      <td>0.0</td>\n",
762 |        "      <td>0.0</td>\n",
763 |        "    </tr>\n",
764 |        "  </tbody>\n",
765 |        "</table>\n",
766 |        "<p>20 rows × 23 columns</p>\n",
767 |        "</div>"
768 |       ],
769 |       "text/plain": [
770 |        "          0    1         2         3    4         5    6         7    8    9   \\\n",
771 |        "0   1.000000  1.0  0.464286  0.814607  0.0  0.457447  0.0  0.111111  0.0  0.0   \n",
772 |        "1   0.621622  1.0  0.321429  0.410112  0.0  0.553191  0.0  0.000000  0.5  0.0   \n",
773 |        "2   0.918919  0.0  0.821429  0.213483  0.0  0.755319  0.0  0.000000  1.0  0.0   \n",
774 |        "3   0.459459  1.0  1.000000  0.415730  0.0  0.414894  0.0  1.000000  0.0  0.0   \n",
775 |        "4   0.918919  1.0  0.642857  0.544944  0.0  0.414894  0.0  0.388889  0.5  0.0   \n",
776 |        "5   0.324324  1.0  0.642857  0.320225  0.0  0.638298  0.0  0.555556  0.0  0.0   \n",
777 |        "6   0.270270  0.0  0.000000  0.235955  0.0  0.755319  0.0  0.000000  0.0  0.0   \n",
778 |        "7   0.513514  1.0  0.642857  0.556180  1.0  0.446809  1.0  0.000000  1.0  0.0   \n",
779 |        "8   0.756757  1.0  0.285714  1.000000  0.0  0.372340  1.0  0.833333  0.5  0.0   \n",
780 |        "9   0.486486  1.0  0.785714  0.561798  0.0  0.510638  0.0  0.000000  0.0  0.0   \n",
781 |        "10  0.729730  1.0  0.553571  0.516854  1.0  0.382979  1.0  0.333333  0.5  0.0   \n",
782 |        "11  0.756757  0.0  0.821429  0.471910  0.0  0.159574  1.0  0.055556  0.0  0.0   \n",
783 |        "12  0.648649  1.0  0.857143  0.387640  0.0  0.031915  1.0  0.000000  0.0  0.0   \n",
784 |        "13  0.324324  1.0  0.464286  0.000000  0.0  0.787234  0.0  0.000000  0.0  0.0   \n",
785 |        "14  0.675676  1.0  0.285714  0.275281  0.0  0.000000  1.0  0.000000  0.5  0.0   \n",
786 |        "15  0.702703  0.0  0.732143  0.522472  0.0  0.563830  0.0  0.388889  0.0  0.0   \n",
787 |        "16  0.351351  1.0  0.821429  0.387640  0.0  0.744681  0.0  0.000000  0.0  0.0   \n",
788 |        "17  0.000000  1.0  0.642857  0.264045  0.0  1.000000  0.0  0.000000  0.0  0.0   \n",
789 |        "18  0.567568  0.0  0.464286  0.348315  0.0  0.531915  0.0  0.444444  0.0  0.0   \n",
790 |        "19  0.648649  1.0  0.821429  0.258427  1.0  0.500000  1.0  0.861111  0.0  0.0   \n",
791 |        "\n",
792 |        "    ...   13   14   15   16   17   18   19   20   21   22  \n",
793 |        "0   ...  0.0  1.0  0.0  1.0  0.0  0.0  0.0  1.0  0.0  0.0  \n",
794 |        "1   ...  0.0  0.0  1.0  1.0  0.0  0.0  0.0  0.0  1.0  0.0  \n",
795 |        "2   ...  0.0  0.0  1.0  0.0  1.0  0.0  0.0  0.0  1.0  0.0  \n",
796 |        "3   ...  0.0  1.0  0.0  0.0  0.0  1.0  0.0  0.0  1.0  0.0  \n",
797 |        "4   ...  0.0  1.0  0.0  1.0  0.0  0.0  0.0  1.0  0.0  0.0  \n",
798 |        "5   ...  0.0  1.0  0.0  0.0  0.0  1.0  0.0  1.0  0.0  0.0  \n",
799 |        "6   ...  0.0  0.0  1.0  0.0  0.0  1.0  0.0  0.0  1.0  0.0  \n",
800 |        "7   ...  0.0  0.0  1.0  1.0  0.0  0.0  0.0  1.0  0.0  0.0  \n",
801 |        "8   ...  0.0  1.0  0.0  1.0  0.0  0.0  0.0  0.0  1.0  0.0  \n",
802 |        "9   ...  0.0  0.0  1.0  0.0  0.0  1.0  0.0  1.0  0.0  0.0  \n",
803 |        "10  ...  0.0  1.0  0.0  1.0  0.0  0.0  0.0  1.0  0.0  0.0  \n",
804 |        "11  ...  0.0  1.0  0.0  1.0  0.0  0.0  0.0  0.0  1.0  0.0  \n",
805 |        "12  ...  0.0  0.0  1.0  1.0  0.0  0.0  0.0  1.0  0.0  0.0  \n",
806 |        "13  ...  0.0  0.0  1.0  0.0  1.0  0.0  0.0  0.0  1.0  0.0  \n",
807 |        "14  ...  0.0  1.0  0.0  1.0  0.0  0.0  0.0  1.0  0.0  0.0  \n",
808 |        "15  ...  0.0  1.0  0.0  0.0  1.0  0.0  0.0  1.0  0.0  0.0  \n",
809 |        "16  ...  0.0  0.0  1.0  1.0  0.0  0.0  0.0  0.0  1.0  0.0  \n",
810 |        "17  ...  0.0  0.0  1.0  0.0  1.0  0.0  0.0  1.0  0.0  0.0  \n",
811 |        "18  ...  0.0  1.0  0.0  0.0  0.0  1.0  0.0  0.0  1.0  0.0  \n",
812 |        "19  ...  1.0  0.0  0.0  1.0  0.0  0.0  0.0  1.0  0.0  0.0  \n",
813 |        "\n",
814 |        "[20 rows x 23 columns]"
815 |       ]
816 |      },
817 |      "execution_count": 5,
818 |      "metadata": {},
819 |      "output_type": "execute_result"
820 |     }
821 |    ],
822 |    "source": [
823 |     "features_to_encode = ['thal', 'slope', 'chest_pain_type', 'restecg']\n",
824 |     "encoded_df = pd.DataFrame(columns= ['age', 'sex', 'resting_bp', 'cholestoral', 'fasting_blood_sugar',\n",
825 |     "   'max_hr', 'exang', 'oldpeak', 'num_major_vessels', 'thal_0', 'thal_1',\n",
826 |     "   'thal_2', 'thal_3', 'slope_0', 'slope_1', 'slope_2',\n",
827 |     "   'chest_pain_type_0', 'chest_pain_type_1', 'chest_pain_type_2',\n",
828 |     "   'chest_pain_type_3', 'restecg_0', 'restecg_1', 'restecg_2'])\n",
829 |     "placeholder_df = pd.DataFrame()\n",
830 |     "\n",
831 |     "# One-Hot Encoding using get_dummies for the specified categorical features\n",
832 |     "for f in features_to_encode:\n",
833 |     "    if(f in inference_data.columns):\n",
834 |     "        encoded = pd.get_dummies(inference_data[f])\n",
835 |     "        encoded = encoded.add_prefix(f + '_')\n",
836 |     "        placeholder_df = pd.concat([placeholder_df, encoded], axis=1)\n",
837 |     "    else:\n",
838 |     "        print('Feature not found')\n",
839 |     "\n",
840 |     "# Implement these steps to prevent dimension mismatch during inference\n",
841 |     "for feature in encoded_df.columns:\n",
842 |     "    if feature in inference_data.columns:\n",
843 |     "        encoded_df[feature] = inference_data[feature]\n",
844 |     "    if feature in placeholder_df.columns:\n",
845 |     "        encoded_df[feature] = placeholder_df[feature]\n",
846 |     "# fill all null values\n",
847 |     "encoded_df.fillna(0, inplace=True)\n",
848 |     "\n",
849 |     "from sklearn import preprocessing\n",
850 |     "# normalization\n",
851 |     "val = encoded_df.values \n",
852 |     "min_max_normalizer = preprocessing.MinMaxScaler()\n",
853 |     "norm_val = min_max_normalizer.fit_transform(val)\n",
854 |     "df2 = pd.DataFrame(norm_val)\n",
855 |     "\n",
856 |     "processed_inference_data = df2.copy()\n",
857 |     "processed_inference_data"
858 |    ]
859 |   },
860 |   {
861 |    "cell_type": "markdown",
862 |    "metadata": {},
863 |    "source": [
864 |     "### Load Saved Model"
865 |    ]
866 |   },
867 |   {
868 |    "cell_type": "code",
869 |    "execution_count": null,
870 |    "metadata": {},
871 |    "outputs": [],
872 |    "source": [
873 |     "import joblib\n",
874 |     "model = joblib.load('aditya_model1_adaboost.joblib')\n",
875 |     "model"
876 |    ]
877 |   },
878 |   {
879 |    "cell_type": "markdown",
880 |    "metadata": {},
881 |    "source": [
882 |     "### Prediction on inference data"
883 |    ]
884 |   },
885 |   {
886 |    "cell_type": "code",
887 |    "execution_count": 7,
888 |    "metadata": {},
889 |    "outputs": [
890 |     {
891 |      "data": {
892 |       "text/plain": [
893 |        "array([1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1.,\n",
894 |        "       1., 1., 0.])"
895 |       ]
896 |      },
897 |      "execution_count": 7,
898 |      "metadata": {},
899 |      "output_type": "execute_result"
900 |     }
901 |    ],
902 |    "source": [
903 |     "model.predict(processed_inference_data)"
904 |    ]
905 |   },
906 |   {
907 |    "cell_type": "markdown",
908 |    "metadata": {},
909 |    "source": [
910 |     "### Scoring check on prediction"
911 |    ]
912 |   },
913 |   {
914 |    "cell_type": "code",
915 |    "execution_count": 8,
916 |    "metadata": {},
917 |    "outputs": [
918 |     {
919 |      "data": {
920 |       "text/plain": [
921 |        "0.85"
922 |       ]
923 |      },
924 |      "execution_count": 8,
925 |      "metadata": {},
926 |      "output_type": "execute_result"
927 |     }
928 |    ],
929 |    "source": [
930 |     "from sklearn.metrics import accuracy_score\n",
931 |     "accuracy_score(labels, model.predict(processed_inference_data))"
932 |    ]
933 |   },
934 |   {
935 |    "cell_type": "code",
936 |    "execution_count": null,
937 |    "metadata": {},
938 |    "outputs": [],
939 |    "source": []
940 |   }
941 |  ],
942 |  "metadata": {
943 |   "kernelspec": {
944 |    "display_name": "Python 3",
945 |    "language": "python",
946 |    "name": "python3"
947 |   },
948 |   "language_info": {
949 |    "codemirror_mode": {
950 |     "name": "ipython",
951 |     "version": 3
952 |    },
953 |    "file_extension": ".py",
954 |    "mimetype": "text/x-python",
955 |    "name": "python",
956 |    "nbconvert_exporter": "python",
957 |    "pygments_lexer": "ipython3",
958 |    "version": "3.7.7"
959 |   }
960 |  },
961 |  "nbformat": 4,
962 |  "nbformat_minor": 5
963 | }
964 | 


--------------------------------------------------------------------------------