├── templates
    ├── eda.html
    ├── experiment_history.html
    ├── log.html
    ├── train.html
    ├── files.html
    ├── log_files.html
    ├── saved_models_files.html
    ├── update_model.html
    ├── index.html
    ├── predict.html
    └── header.html
├── housing
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   └── configuration.py
    ├── entity
    │   ├── __init__.py
    │   ├── experiment.py
    │   ├── artifact_entity.py
    │   ├── config_entity.py
    │   ├── housing_predictor.py
    │   └── model_factory.py
    ├── pipeline
    │   ├── __init__.py
    │   └── pipeline.py
    ├── util
    │   ├── __init__.py
    │   └── util.py
    ├── component
    │   ├── __init__.py
    │   ├── model_pusher.py
    │   ├── data_ingestion.py
    │   ├── data_validation.py
    │   ├── model_trainer.py
    │   ├── model_evaluation.py
    │   └── data_transformation.py
    ├── logger
    │   └── __init__.py
    ├── exception
    │   └── __init__.py
    └── constant
    │   └── __init__.py
├── notebook
    ├── sample.json
    ├── preprocessing.pkl
    ├── Untitled.ipynb
    ├── example3.ipynb
    ├── prediction.ipynb
    ├── log.ipynb
    ├── model_training.ipynb
    ├── EDA.ipynb
    └── example.ipynb
├── .dockerignore
├── study
    ├── project.png
    └── code-writing-flow.png
├── requirements.txt
├── Dockerfile
├── config
    ├── model.yaml
    ├── schema.yaml
    └── config.yaml
├── setup.py
├── demo.py
├── .github
    └── workflows
    │   └── main.yaml
├── README.md
├── .gitignore
├── app.py
└── LICENSE


/templates/eda.html:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/housing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/notebook/sample.json:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/housing/config/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/housing/entity/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/housing/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/housing/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/housing/component/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | venv/
2 | .git
3 | .gitignore
4 | 


--------------------------------------------------------------------------------
/study/project.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avnyadav/machine_learning_project/HEAD/study/project.png


--------------------------------------------------------------------------------
/notebook/preprocessing.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avnyadav/machine_learning_project/HEAD/notebook/preprocessing.pkl


--------------------------------------------------------------------------------
/study/code-writing-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avnyadav/machine_learning_project/HEAD/study/code-writing-flow.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | Flask
 3 | gunicorn
 4 | sklearn
 5 | pandas
 6 | PyYAML
 7 | evidently
 8 | dill
 9 | matplotlib
10 | -e .


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.7
 2 | COPY . /app
 3 | WORKDIR /app
 4 | RUN pip install -r requirements.txt
 5 | EXPOSE $PORT
 6 | CMD gunicorn --workers=1 --bind 0.0.0.0:$PORT app:app
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/templates/experiment_history.html:
--------------------------------------------------------------------------------
 1 | {% extends 'header.html' %}
 2 | 
 3 | {% block head %}
 4 | 
 5 | 
 6 | <title>Train Status</title>
 7 | {% endblock %}
 8 | 
 9 | {% block content %}
10 | 
11 | 
12 | Go to <a class="btn btn-primary" href="/">Home</a>
13 | <div class="row">
14 |   
15 |  <div class="col-md-12">
16 |     {{ context['experiment']|safe }}
17 |     </div>
18 | </div>
19 | 
20 | 
21 |         
22 | {% endblock %}


--------------------------------------------------------------------------------
/templates/log.html:
--------------------------------------------------------------------------------
 1 | {% extends 'header.html' %}
 2 | 
 3 | {% block head %}
 4 | 
 5 | 
 6 | <title>Log Details</title>
 7 | {% endblock %}
 8 | 
 9 | {% block content %}
10 | 
11 | 
12 | Go to <a class="btn btn-primary" href="/">Home</a>
13 | <div class="row">
14 |   
15 |     <div class="col-md-12 " style="margin-bottom:20px;height:500px;overflow:scroll">
16 |     {{ context['log']|safe }}
17 |     </div>
18 | </div>
19 | 
20 | 
21 |         
22 | {% endblock %}


--------------------------------------------------------------------------------
/templates/train.html:
--------------------------------------------------------------------------------
 1 | {% extends 'header.html' %}
 2 | 
 3 | {% block head %}
 4 | 
 5 | 
 6 | <title>Train Status</title>
 7 | {% endblock %}
 8 | 
 9 | 
10 | 
11 | 
12 | {% block content %}
13 | 
14 | 
15 | Go to <a class="btn btn-primary" href="/">Home</a>
16 | <div class="row">
17 |     <div class="alert alert-primary" role="alert">
18 |         {{ context['message']}}
19 |       </div>
20 |   
21 |         {{ context['experiment']|safe }}
22 | 
23 | 
24 |    
25 | </div>
26 | 
27 |         
28 | {% endblock %}


--------------------------------------------------------------------------------
/housing/entity/experiment.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | class Experiment:
 5 |     running_status=False
 6 |     def __new__(cls,*args,**kwargs):
 7 |         if Experiment.running_status:
 8 |             raise Exception("Experiment is already running hence new experiment can not be created")
 9 |         return super(Experiment,cls).__new__(cls,*args,**kwargs)
10 | 
11 |     def __init__(self,experiment_id):
12 |         self.experiment_id = experiment_id
13 |         self.running_status = Experiment.running_status
14 | 
15 | 


--------------------------------------------------------------------------------
/config/model.yaml:
--------------------------------------------------------------------------------
 1 | grid_search:
 2 |   class: GridSearchCV
 3 |   module: sklearn.model_selection
 4 |   params:
 5 |     cv: 5
 6 |     verbose: 2
 7 | model_selection:
 8 |   module_0:
 9 |     class: LinearRegression
10 |     module: sklearn.linear_model
11 |     params:
12 |       fit_intercept: true
13 |     search_param_grid:
14 |       fit_intercept:
15 |       - true
16 |       - false
17 |   module_1:
18 |     class: RandomForestRegressor
19 |     module: sklearn.ensemble
20 |     params:
21 |       min_samples_leaf: 3
22 |     search_param_grid:
23 |       min_samples_leaf:
24 |       - 6
25 | 


--------------------------------------------------------------------------------
/config/schema.yaml:
--------------------------------------------------------------------------------
 1 | columns:
 2 |   longitude: float
 3 |   latitude: float
 4 |   housing_median_age: float
 5 |   total_rooms: float
 6 |   total_bedrooms: float
 7 |   population: float
 8 |   households: float
 9 |   median_income: float
10 |   median_house_value: float
11 |   ocean_proximity: category
12 | 
13 | numerical_columns:
14 |   - longitude
15 |   - latitude
16 |   - housing_median_age
17 |   - total_rooms
18 |   - total_bedrooms
19 |   - population
20 |   - households
21 |   - median_income
22 | 
23 | 
24 | 
25 | categorical_columns:
26 |   - ocean_proximity
27 | 
28 | 
29 | 
30 | 
31 | target_column: median_house_value
32 | 
33 | domain_value:
34 |   ocean_proximity:
35 |     - <1H OCEAN
36 |     - INLAND
37 |     - ISLAND
38 |     - NEAR BAY
39 |     - NEAR OCEAN
40 | 
41 | 
42 | 
43 |   
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/templates/files.html:
--------------------------------------------------------------------------------
 1 | {% extends 'header.html' %}
 2 | 
 3 | {% block head %}
 4 | 
 5 | 
 6 | <title>Housing Estimator</title>
 7 | {% endblock %}
 8 | 
 9 | {% block content %}
10 | 
11 | 
12 |     
13 | {% if "housing" in result['parent_folder'] %}
14 | <a href="/artifact/{{result['parent_folder']}}"><i class="fa fa-arrow-left" aria-hidden="true"></i>Back</a>
15 | 
16 | <h4 style="text-align:center;">
17 | {{result['parent_label']}}
18 | </h4>
19 |     {% endif %}
20 | 
21 | <div class="row">
22 |     {% for href,label in result["files"].items() %}
23 |     <div class="col-md-2 text-center">
24 |         <a href="/artifact/{{href}}" style="text-decoration: none;">
25 | 
26 |             <i class="fa {% if '.' in label %} fa-file {% else %} fa-folder {% endif %} fa-8x" aria-hidden="true"></i> <br>{{ label }}
27 |             {% if '.' in label %}
28 |         <i class="fa fa-download" aria-hidden="true"></i>
29 |         {% endif %}
30 |     </a>
31 |     </div>
32 |     {% endfor %}
33 |    
34 | </div>
35 | 
36 |     {% endblock %}


--------------------------------------------------------------------------------
/templates/log_files.html:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 |   
 4 | {% extends 'header.html' %}
 5 | 
 6 | {% block head %}
 7 | 
 8 |       
 9 | <title>Log Files</title>
10 | {% endblock %}
11 | 
12 | {% block content %}
13 | 
14 | 
15 | 
16 |     
17 | {% if "logs" in result['parent_folder'] %}
18 | <a href="/logs/{{result['parent_folder']}}"><i class="fa fa-arrow-left" aria-hidden="true"></i>Back</a>
19 | 
20 | <h4 style="text-align:center;">
21 | {{result['parent_label']}}
22 | </h4>
23 |     {% endif %}
24 | 
25 | <div class="row">
26 |     {% for href,label in result["files"].items() %}
27 |     <div class="col-md-2 text-center">
28 |         <a href="/logs/{{href}}" style="text-decoration: none;">
29 | 
30 |             <i class="fa {% if '.' in label %} fa-file {% else %} fa-folder {% endif %} fa-8x" aria-hidden="true"></i> <br>{{ label }}
31 |             {% if '.' in label %}
32 |         <i class="fa fa-download" aria-hidden="true"></i>
33 |         {% endif %}
34 |     </a>
35 |     </div>
36 |     {% endfor %}
37 |    
38 | </div>
39 | 
40 | 
41 | {% endblock %}


--------------------------------------------------------------------------------
/templates/saved_models_files.html:
--------------------------------------------------------------------------------
 1 | {% extends 'header.html' %}
 2 | 
 3 | {% block head %}
 4 | 
 5 | 
 6 | <title>Model List</title>
 7 | {% endblock %}
 8 | 
 9 | {% block content %}
10 | 
11 | 
12 | 
13 | 
14 |     {% if "saved_models" in result['parent_folder'] %}
15 |     <a href="/saved_models/{{result['parent_folder']}}"><i class="fa fa-arrow-left" aria-hidden="true"></i>Back</a>
16 |     
17 |     <h4 style="text-align:center;">
18 |     {{result['parent_label']}}
19 |     </h4>
20 |         {% endif %}
21 | 
22 |     <div class="row">
23 |         {% for href,label in result["files"].items() %}
24 |         <div class="col-md-2 text-center">
25 |             <a href="/saved_models/{{href}}" style="text-decoration: none;">
26 | 
27 |                 <i class="fa {% if '.' in label %} fa-file {% else %} fa-folder {% endif %} fa-8x" aria-hidden="true"></i> <br>{{ label }}
28 |                 {% if '.' in label %}
29 |             <i class="fa fa-download" aria-hidden="true"></i>
30 |             {% endif %}
31 |         </a>
32 |         </div>
33 |         {% endfor %}
34 |        
35 |     </div>
36 | 
37 | {% endblock %}


--------------------------------------------------------------------------------
/housing/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from datetime import datetime
 3 | import os
 4 | import pandas as pd
 5 | from housing.constant import get_current_time_stamp 
 6 | LOG_DIR="logs"
 7 | 
 8 | def get_log_file_name():
 9 |     return f"log_{get_current_time_stamp()}.log"
10 | 
11 | LOG_FILE_NAME=get_log_file_name()
12 | 
13 | os.makedirs(LOG_DIR,exist_ok=True)
14 | 
15 | LOG_FILE_PATH = os.path.join(LOG_DIR,LOG_FILE_NAME)
16 | 
17 | 
18 | 
19 | logging.basicConfig(filename=LOG_FILE_PATH,
20 | filemode="w",
21 | format='[%(asctime)s]^;%(levelname)s^;%(lineno)d^;%(filename)s^;%(funcName)s()^;%(message)s',
22 | level=logging.INFO
23 | )
24 | 
25 | def get_log_dataframe(file_path):
26 |     data=[]
27 |     with open(file_path) as log_file:
28 |         for line in log_file.readlines():
29 |             data.append(line.split("^;"))
30 | 
31 |     log_df = pd.DataFrame(data)
32 |     columns=["Time stamp","Log Level","line number","file name","function name","message"]
33 |     log_df.columns=columns
34 |     
35 |     log_df["log_message"] = log_df['Time stamp'].astype(str) +":$"+ log_df["message"]
36 | 
37 |     return log_df[["log_message"]]
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/housing/entity/artifact_entity.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | 
 4 | 
 5 | DataIngestionArtifact = namedtuple("DataIngestionArtifact",
 6 | [ "train_file_path", "test_file_path", "is_ingested", "message"])
 7 | 
 8 | 
 9 | DataValidationArtifact = namedtuple("DataValidationArtifact",
10 | ["schema_file_path","report_file_path","report_page_file_path","is_validated","message"])
11 | 
12 | 
13 | DataTransformationArtifact = namedtuple("DataTransformationArtifact",
14 |  ["is_transformed", "message", "transformed_train_file_path","transformed_test_file_path",
15 |      "preprocessed_object_file_path"])
16 | 
17 | ModelTrainerArtifact = namedtuple("ModelTrainerArtifact", ["is_trained", "message", "trained_model_file_path",
18 |                                                            "train_rmse", "test_rmse", "train_accuracy", "test_accuracy",
19 |                                                            "model_accuracy"])
20 | 
21 | ModelEvaluationArtifact = namedtuple("ModelEvaluationArtifact", ["is_model_accepted", "evaluated_model_path"])
22 | 
23 | ModelPusherArtifact = namedtuple("ModelPusherArtifact", ["is_model_pusher", "export_model_file_path"])
24 | 
25 | 


--------------------------------------------------------------------------------
/housing/entity/config_entity.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | 
 4 | DataIngestionConfig=namedtuple("DataIngestionConfig",
 5 | ["dataset_download_url","tgz_download_dir","raw_data_dir","ingested_train_dir","ingested_test_dir"])
 6 | 
 7 | 
 8 | DataValidationConfig = namedtuple("DataValidationConfig", ["schema_file_path","report_file_path","report_page_file_path"])
 9 | 
10 | DataTransformationConfig = namedtuple("DataTransformationConfig", ["add_bedroom_per_room",
11 |                                                                    "transformed_train_dir",
12 |                                                                    "transformed_test_dir",
13 |                                                                    "preprocessed_object_file_path"])
14 | 
15 | 
16 | ModelTrainerConfig = namedtuple("ModelTrainerConfig", ["trained_model_file_path","base_accuracy","model_config_file_path"])
17 | 
18 | ModelEvaluationConfig = namedtuple("ModelEvaluationConfig", ["model_evaluation_file_path","time_stamp"])
19 | 
20 | 
21 | ModelPusherConfig = namedtuple("ModelPusherConfig", ["export_dir_path"])
22 | 
23 | TrainingPipelineConfig = namedtuple("TrainingPipelineConfig", ["artifact_dir"])


--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
 1 | training_pipeline_config:
 2 |   pipeline_name: housing
 3 |   artifact_dir: artifact
 4 | 
 5 | data_ingestion_config:
 6 |   dataset_download_url: https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz
 7 |   raw_data_dir: raw_data
 8 |   tgz_download_dir: tgz_data
 9 |   ingested_dir: ingested_data
10 |   ingested_train_dir: train
11 |   ingested_test_dir: test 
12 | 
13 | 
14 | 
15 | 
16 | data_validation_config:
17 |   schema_dir: config
18 |   schema_file_name: schema.yaml
19 |   report_file_name: report.json
20 |   report_page_file_name: report.html
21 | 
22 | data_transformation_config:
23 |   add_bedroom_per_room: true
24 |   transformed_dir: transformed_data
25 |   transformed_train_dir: train
26 |   transformed_test_dir: test
27 |   preprocessing_dir: preprocessed
28 |   preprocessed_object_file_name: preprocessed.pkl
29 |   
30 | model_trainer_config:
31 |   trained_model_dir: trained_model
32 |   model_file_name: model.pkl
33 |   base_accuracy: 0.6
34 |   model_config_dir: config
35 |   model_config_file_name: model.yaml
36 | 
37 | 
38 | model_evaluation_config:
39 |   model_evaluation_file_name: model_evaluation.yaml
40 |   
41 | 
42 | model_pusher_config:
43 |   model_export_dir: saved_models
44 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup,find_packages
 2 | from typing import List
 3 | 
 4 | #Declaring variables for setup functions
 5 | PROJECT_NAME="housing-predictor"
 6 | VERSION="0.0.3"
 7 | AUTHOR="Avnish Yadav"
 8 | DESRCIPTION="This is a first FSDS Nov batch Machine Learning Project"
 9 | 
10 | REQUIREMENT_FILE_NAME="requirements.txt"
11 | 
12 | HYPHEN_E_DOT = "-e ."
13 | 
14 | 
15 | def get_requirements_list() -> List[str]:
16 |     """
17 |     Description: This function is going to return list of requirement
18 |     mention in requirements.txt file
19 |     return This function is going to return a list which contain name
20 |     of libraries mentioned in requirements.txt file
21 |     """
22 |     with open(REQUIREMENT_FILE_NAME) as requirement_file:
23 |         requirement_list = requirement_file.readlines()
24 |         requirement_list = [requirement_name.replace("\n", "") for requirement_name in requirement_list]
25 |         if HYPHEN_E_DOT in requirement_list:
26 |             requirement_list.remove(HYPHEN_E_DOT)
27 |         return requirement_list
28 | 
29 | 
30 | 
31 | setup(
32 | name=PROJECT_NAME,
33 | version=VERSION,
34 | author=AUTHOR,
35 | description=DESRCIPTION,
36 | packages=find_packages(), 
37 | install_requires=get_requirements_list()
38 | )
39 | 
40 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | from housing.pipeline.pipeline import Pipeline
 2 | from housing.exception import HousingException
 3 | from housing.logger import logging
 4 | from housing.config.configuration import Configuartion
 5 | from housing.component.data_transformation import DataTransformation
 6 | import os
 7 | def main():
 8 |     try:
 9 |         config_path = os.path.join("config","config.yaml")
10 |         pipeline = Pipeline(Configuartion(config_file_path=config_path))
11 |         #pipeline.run_pipeline()
12 |         pipeline.start()
13 |         logging.info("main function execution completed.")
14 |         # # data_validation_config = Configuartion().get_data_transformation_config()
15 |         # # print(data_validation_config)
16 |         # schema_file_path=r"D:\Project\machine_learning_project\config\schema.yaml"
17 |         # file_path=r"D:\Project\machine_learning_project\housing\artifact\data_ingestion\2022-06-27-19-13-17\ingested_data\train\housing.csv"
18 | 
19 |         # df= DataTransformation.load_data(file_path=file_path,schema_file_path=schema_file_path)
20 |         # print(df.columns)
21 |         # print(df.dtypes)
22 | 
23 |     except Exception as e:
24 |         logging.error(f"{e}")
25 |         print(e)
26 | 
27 | 
28 | 
29 | if __name__=="__main__":
30 |     main()
31 | 
32 | 


--------------------------------------------------------------------------------
/housing/exception/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | class HousingException(Exception):
 5 |     
 6 |     def __init__(self, error_message:Exception,error_detail:sys):
 7 |         super().__init__(error_message)
 8 |         self.error_message=HousingException.get_detailed_error_message(error_message=error_message,
 9 |                                                                        error_detail=error_detail
10 |                                                                         )
11 | 
12 | 
13 |     @staticmethod
14 |     def get_detailed_error_message(error_message:Exception,error_detail:sys)->str:
15 |         """
16 |         error_message: Exception object
17 |         error_detail: object of sys module
18 |         """
19 |         _,_ ,exec_tb = error_detail.exc_info()
20 |         exception_block_line_number = exec_tb.tb_frame.f_lineno
21 |         try_block_line_number = exec_tb.tb_lineno
22 |         file_name = exec_tb.tb_frame.f_code.co_filename
23 |         error_message = f"""
24 |         Error occured in script: 
25 |         [ {file_name} ] at 
26 |         try block line number: [{try_block_line_number}] and exception block line number: [{exception_block_line_number}] 
27 |         error message: [{error_message}]
28 |         """
29 |         return error_message
30 | 
31 |     def __str__(self):
32 |         return self.error_message
33 | 
34 | 
35 |     def __repr__(self) -> str:
36 |         return HousingException.__name__.str()
37 | 
38 | 


--------------------------------------------------------------------------------
/templates/update_model.html:
--------------------------------------------------------------------------------
 1 | {% extends 'header.html' %}
 2 | 
 3 | {% block head %}
 4 | 
 5 | 
 6 | <title>Model Config</title>
 7 | {% endblock %}
 8 | 
 9 | {% block content %}
10 | 
11 | 
12 | Go to <a class="btn btn-primary" href="/">Home</a>
13 | <div class="row">
14 |   
15 |     <div class="col-md-12 " style="margin-bottom:20px;height:500px;overflow:scroll">
16 |         <form method="post">
17 |             <div class="mb-3">
18 |                 <label for="exampleFormControlTextarea1" class="form-label">Existing model config</label>
19 |                 <textarea class="form-control" id="exampleFormControlTextarea1" rows="10"  readonly>
20 |                     {{ result['model_config'] }}
21 | 
22 |                 </textarea>
23 |               </div>
24 |         
25 |               <div class="mb-3">
26 |               <a href="https://jsonformatter.curiousconcept.com/#" class="btn btn-primary" target="_blank">Validate model config json</a>
27 |         </div>
28 | 
29 |         <div class="mb-3">
30 |             <input type="submit"  class="btn btn-danger" value="Update model config"/>
31 |           </div>
32 |               <div class="mb-3">
33 |             <label for="exampleFormControlTextarea2" class="form-label">Model Config JSON:</label>
34 |             <textarea class="form-control" name="new_model_config" id="exampleFormControlTextarea2" rows="10"></textarea>
35 |         </div>
36 |         
37 |         </form>
38 |     </div>
39 | </div>
40 | 
41 | 
42 |         
43 | {% endblock %}


--------------------------------------------------------------------------------
/.github/workflows/main.yaml:
--------------------------------------------------------------------------------
 1 | # Your workflow name.
 2 | name: Deploy to heroku.
 3 | 
 4 | # Run workflow on every push to main branch.
 5 | on:
 6 |   push:
 7 |     branches: [main]
 8 | 
 9 | # Your workflows jobs.
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       # Check-out your repository.
15 |       - name: Checkout
16 |         uses: actions/checkout@v2
17 | 
18 | 
19 | ### ⬇ IMPORTANT PART ⬇ ###
20 | 
21 |       - name: Build, Push and Release a Docker container to Heroku. # Your custom step name
22 |         uses: gonuit/heroku-docker-deploy@v1.3.3 # GitHub action name (leave it as it is).
23 |         with:
24 |           # Below you must provide variables for your Heroku app.
25 | 
26 |           # The email address associated with your Heroku account.
27 |           # If you don't want to use repository secrets (which is recommended) you can do:
28 |           # email: my.email@example.com
29 |           email: ${{ secrets.HEROKU_EMAIL }}
30 |           
31 |           # Heroku API key associated with provided user's email.
32 |           # Api Key is available under your Heroku account settings.
33 |           heroku_api_key: ${{ secrets.HEROKU_API_KEY }}
34 |           
35 |           # Name of the heroku application to which the build is to be sent.
36 |           heroku_app_name: ${{ secrets.HEROKU_APP_NAME }}
37 | 
38 |           # (Optional, default: "./")
39 |           # Dockerfile directory.
40 |           # For example, if you have a Dockerfile in the root of your project, leave it as follows:
41 |           dockerfile_directory: ./
42 | 
43 |           # (Optional, default: "Dockerfile")
44 |           # Dockerfile name.
45 |           dockerfile_name: Dockerfile
46 | 
47 |           # (Optional, default: "")
48 |           # Additional options of docker build command.
49 |           docker_options: "--no-cache"
50 | 
51 |           # (Optional, default: "web")
52 |           # Select the process type for which you want the docker container to be uploaded.
53 |           # By default, this argument is set to "web".
54 |           # For more information look at https://devcenter.heroku.com/articles/process-model
55 |           process_type: web
56 |           
57 |    
58 |           
59 | ### ⬆ IMPORTANT PART ⬆ ###


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Application url:
  2 | [HousingPredictor](https://ml-regression-app.herokuapp.com/)
  3 | 
  4 | ## Start Machine Learning project.
  5 | 
  6 | ### Software and account Requirement.
  7 | 
  8 | 1. [Github Account](https://github.com)
  9 | 2. [Heroku Account](https://dashboard.heroku.com/login)
 10 | 3. [VS Code IDE](https://code.visualstudio.com/download)
 11 | 4. [GIT cli](https://git-scm.com/downloads)
 12 | 5. [GIT Documentation](https://git-scm.com/docs/gittutorial)
 13 | 
 14 | 
 15 | Creating conda environment
 16 | ```
 17 | conda create -p venv python==3.7 -y
 18 | ```
 19 | ```
 20 | conda activate venv/
 21 | ```
 22 | OR 
 23 | ```
 24 | conda activate venv
 25 | ```
 26 | 
 27 | ```
 28 | pip install -r requirements.txt
 29 | ```
 30 | 
 31 | To Add files to git
 32 | ```
 33 | git add .
 34 | ```
 35 | 
 36 | OR
 37 | ```
 38 | git add <file_name>
 39 | ```
 40 | 
 41 | > Note: To ignore file or folder from git we can write name of file/folder in .gitignore file
 42 | 
 43 | To check the git status 
 44 | ```
 45 | git status
 46 | ```
 47 | To check all version maintained by git
 48 | ```
 49 | git log
 50 | ```
 51 | 
 52 | To create version/commit all changes by git
 53 | ```
 54 | git commit -m "message"
 55 | ```
 56 | 
 57 | To send version/changes to github
 58 | ```
 59 | git push origin main
 60 | ```
 61 | 
 62 | To check remote url 
 63 | ```
 64 | git remote -v
 65 | ```
 66 | 
 67 | To setup CI/CD pipeline in heroku we need 3 information
 68 | 1. HEROKU_EMAIL = anishyadav7045075175@gmail.com
 69 | 2. HEROKU_API_KEY = <>
 70 | 3. HEROKU_APP_NAME = ml-regression-app
 71 | 
 72 | BUILD DOCKER IMAGE
 73 | ```
 74 | docker build -t <image_name>:<tagname> .
 75 | ```
 76 | > Note: Image name for docker must be lowercase
 77 | 
 78 | 
 79 | To list docker image
 80 | ```
 81 | docker images
 82 | ```
 83 | 
 84 | Run docker image
 85 | ```
 86 | docker run -p 5000:5000 -e PORT=5000 f8c749e73678
 87 | ```
 88 | 
 89 | To check running container in docker
 90 | ```
 91 | docker ps
 92 | ```
 93 | 
 94 | Tos stop docker conatiner
 95 | ```
 96 | docker stop <container_id>
 97 | ```
 98 | 
 99 | 
100 | 
101 | ```
102 | python setup.py install
103 | ```
104 | 
105 | 
106 | Install ipykernel
107 | 
108 | ```
109 | pip install ipykernel
110 | ```
111 | 
112 | 
113 | Data Drift:
114 | When your datset stats gets change we call it as data drift
115 | 
116 | 
117 | 
118 | ## Write a function to get training file path from artifact dir


--------------------------------------------------------------------------------
/housing/component/model_pusher.py:
--------------------------------------------------------------------------------
 1 | from housing.logger import logging
 2 | from housing.exception import HousingException
 3 | from housing.entity.artifact_entity import ModelPusherArtifact, ModelEvaluationArtifact 
 4 | from housing.entity.config_entity import ModelPusherConfig
 5 | import os, sys
 6 | import shutil
 7 | 
 8 | 
 9 | class ModelPusher:
10 | 
11 |     def __init__(self, model_pusher_config: ModelPusherConfig,
12 |                  model_evaluation_artifact: ModelEvaluationArtifact
13 |                  ):
14 |         try:
15 |             logging.info(f"{'>>' * 30}Model Pusher log started.{'<<' * 30} ")
16 |             self.model_pusher_config = model_pusher_config
17 |             self.model_evaluation_artifact = model_evaluation_artifact
18 | 
19 |         except Exception as e:
20 |             raise HousingException(e, sys) from e
21 | 
22 |     def export_model(self) -> ModelPusherArtifact:
23 |         try:
24 |             evaluated_model_file_path = self.model_evaluation_artifact.evaluated_model_path
25 |             export_dir = self.model_pusher_config.export_dir_path
26 |             model_file_name = os.path.basename(evaluated_model_file_path)
27 |             export_model_file_path = os.path.join(export_dir, model_file_name)
28 |             logging.info(f"Exporting model file: [{export_model_file_path}]")
29 |             os.makedirs(export_dir, exist_ok=True)
30 | 
31 |             shutil.copy(src=evaluated_model_file_path, dst=export_model_file_path)
32 |             #we can call a function to save model to Azure blob storage/ google cloud strorage / s3 bucket
33 |             logging.info(
34 |                 f"Trained model: {evaluated_model_file_path} is copied in export dir:[{export_model_file_path}]")
35 | 
36 |             model_pusher_artifact = ModelPusherArtifact(is_model_pusher=True,
37 |                                                         export_model_file_path=export_model_file_path
38 |                                                         )
39 |             logging.info(f"Model pusher artifact: [{model_pusher_artifact}]")
40 |             return model_pusher_artifact
41 |         except Exception as e:
42 |             raise HousingException(e, sys) from e
43 | 
44 |     def initiate_model_pusher(self) -> ModelPusherArtifact:
45 |         try:
46 |             return self.export_model()
47 |         except Exception as e:
48 |             raise HousingException(e, sys) from e
49 | 
50 |     def __del__(self):
51 |         logging.info(f"{'>>' * 20}Model Pusher log completed.{'<<' * 20} ")


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | saved_models/*
  6 | # C extensions
  7 | *.so
  8 | housing/artifact
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | .idea/*
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/housing/entity/housing_predictor.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | from housing.exception import HousingException
 5 | from housing.util.util import load_object
 6 | 
 7 | import pandas as pd
 8 | 
 9 | 
10 | class HousingData:
11 | 
12 |     def __init__(self,
13 |                  longitude: float,
14 |                  latitude: float,
15 |                  housing_median_age: float,
16 |                  total_rooms: float,
17 |                  total_bedrooms: float,
18 |                  population: float,
19 |                  households: float,
20 |                  median_income: float,
21 |                  ocean_proximity: str,
22 |                  median_house_value: float = None
23 |                  ):
24 |         try:
25 |             self.longitude = longitude
26 |             self.latitude = latitude
27 |             self.housing_median_age = housing_median_age
28 |             self.total_rooms = total_rooms
29 |             self.total_bedrooms = total_bedrooms
30 |             self.population = population
31 |             self.households = households
32 |             self.median_income = median_income
33 |             self.ocean_proximity = ocean_proximity
34 |             self.median_house_value = median_house_value
35 |         except Exception as e:
36 |             raise HousingException(e, sys) from e
37 | 
38 |     def get_housing_input_data_frame(self):
39 | 
40 |         try:
41 |             housing_input_dict = self.get_housing_data_as_dict()
42 |             return pd.DataFrame(housing_input_dict)
43 |         except Exception as e:
44 |             raise HousingException(e, sys) from e
45 | 
46 |     def get_housing_data_as_dict(self):
47 |         try:
48 |             input_data = {
49 |                 "longitude": [self.longitude],
50 |                 "latitude": [self.latitude],
51 |                 "housing_median_age": [self.housing_median_age],
52 |                 "total_rooms": [self.total_rooms],
53 |                 "total_bedrooms": [self.total_bedrooms],
54 |                 "population": [self.population],
55 |                 "households": [self.households],
56 |                 "median_income": [self.median_income],
57 |                 "ocean_proximity": [self.ocean_proximity]}
58 |             return input_data
59 |         except Exception as e:
60 |             raise HousingException(e, sys)
61 | 
62 | 
63 | class HousingPredictor:
64 | 
65 |     def __init__(self, model_dir: str):
66 |         try:
67 |             self.model_dir = model_dir
68 |         except Exception as e:
69 |             raise HousingException(e, sys) from e
70 | 
71 |     def get_latest_model_path(self):
72 |         try:
73 |             folder_name = list(map(int, os.listdir(self.model_dir)))
74 |             latest_model_dir = os.path.join(self.model_dir, f"{max(folder_name)}")
75 |             file_name = os.listdir(latest_model_dir)[0]
76 |             latest_model_path = os.path.join(latest_model_dir, file_name)
77 |             return latest_model_path
78 |         except Exception as e:
79 |             raise HousingException(e, sys) from e
80 | 
81 |     def predict(self, X):
82 |         try:
83 |             model_path = self.get_latest_model_path()
84 |             model = load_object(file_path=model_path)
85 |             median_house_value = model.predict(X)
86 |             return median_house_value
87 |         except Exception as e:
88 |             raise HousingException(e, sys) from e


--------------------------------------------------------------------------------
/notebook/Untitled.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "id": "09acfffe",
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "da={\"a\":[1,2,3]}"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 4,
 25 |    "id": "5d96c8ba",
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "df=pd.DataFrame(da)"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 7,
 35 |    "id": "5422324a",
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "df=df.astype('str')"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 11,
 45 |    "id": "df4cba4e",
 46 |    "metadata": {},
 47 |    "outputs": [
 48 |     {
 49 |      "data": {
 50 |       "text/html": [
 51 |        "<div>\n",
 52 |        "<style scoped>\n",
 53 |        "    .dataframe tbody tr th:only-of-type {\n",
 54 |        "        vertical-align: middle;\n",
 55 |        "    }\n",
 56 |        "\n",
 57 |        "    .dataframe tbody tr th {\n",
 58 |        "        vertical-align: top;\n",
 59 |        "    }\n",
 60 |        "\n",
 61 |        "    .dataframe thead th {\n",
 62 |        "        text-align: right;\n",
 63 |        "    }\n",
 64 |        "</style>\n",
 65 |        "<table border=\"1\" class=\"dataframe\">\n",
 66 |        "  <thead>\n",
 67 |        "    <tr style=\"text-align: right;\">\n",
 68 |        "      <th></th>\n",
 69 |        "      <th>a</th>\n",
 70 |        "    </tr>\n",
 71 |        "  </thead>\n",
 72 |        "  <tbody>\n",
 73 |        "    <tr>\n",
 74 |        "      <th>0</th>\n",
 75 |        "      <td>1 1</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>1</th>\n",
 79 |        "      <td>2 2</td>\n",
 80 |        "    </tr>\n",
 81 |        "    <tr>\n",
 82 |        "      <th>2</th>\n",
 83 |        "      <td>3 3</td>\n",
 84 |        "    </tr>\n",
 85 |        "  </tbody>\n",
 86 |        "</table>\n",
 87 |        "</div>"
 88 |       ],
 89 |       "text/plain": [
 90 |        "     a\n",
 91 |        "0  1 1\n",
 92 |        "1  2 2\n",
 93 |        "2  3 3"
 94 |       ]
 95 |      },
 96 |      "execution_count": 11,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     }
100 |    ],
101 |    "source": [
102 |     "df+\" \"+df"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "id": "654b3846",
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": []
112 |   }
113 |  ],
114 |  "metadata": {
115 |   "kernelspec": {
116 |    "display_name": "Python 3.7.0 (conda)",
117 |    "language": "python",
118 |    "name": "python3"
119 |   },
120 |   "language_info": {
121 |    "codemirror_mode": {
122 |     "name": "ipython",
123 |     "version": 3
124 |    },
125 |    "file_extension": ".py",
126 |    "mimetype": "text/x-python",
127 |    "name": "python",
128 |    "nbconvert_exporter": "python",
129 |    "pygments_lexer": "ipython3",
130 |    "version": "3.7.0"
131 |   },
132 |   "vscode": {
133 |    "interpreter": {
134 |     "hash": "fc6fa6e48c86001677d15bc9af4f846353042d089527ab27e7c7a4474d3b154b"
135 |    }
136 |   }
137 |  },
138 |  "nbformat": 4,
139 |  "nbformat_minor": 5
140 | }
141 | 


--------------------------------------------------------------------------------
/housing/util/util.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | from housing.exception import HousingException
  3 | import os,sys
  4 | import numpy as np
  5 | import dill
  6 | import pandas as pd
  7 | from housing.constant import *
  8 | 
  9 | 
 10 | def write_yaml_file(file_path:str,data:dict=None):
 11 |     """
 12 |     Create yaml file 
 13 |     file_path: str
 14 |     data: dict
 15 |     """
 16 |     try:
 17 |         os.makedirs(os.path.dirname(file_path), exist_ok=True)
 18 |         with open(file_path,"w") as yaml_file:
 19 |             if data is not None:
 20 |                 yaml.dump(data,yaml_file)
 21 |     except Exception as e:
 22 |         raise HousingException(e,sys)
 23 | 
 24 | 
 25 | def read_yaml_file(file_path:str)->dict:
 26 |     """
 27 |     Reads a YAML file and returns the contents as a dictionary.
 28 |     file_path: str
 29 |     """
 30 |     try:
 31 |         with open(file_path, 'rb') as yaml_file:
 32 |             return yaml.safe_load(yaml_file)
 33 |     except Exception as e:
 34 |         raise HousingException(e,sys) from e
 35 | 
 36 | 
 37 | def save_numpy_array_data(file_path: str, array: np.array):
 38 |     """
 39 |     Save numpy array data to file
 40 |     file_path: str location of file to save
 41 |     array: np.array data to save
 42 |     """
 43 |     try:
 44 |         dir_path = os.path.dirname(file_path)
 45 |         os.makedirs(dir_path, exist_ok=True)
 46 |         with open(file_path, 'wb') as file_obj:
 47 |             np.save(file_obj, array)
 48 |     except Exception as e:
 49 |         raise HousingException(e, sys) from e
 50 | 
 51 | 
 52 | def load_numpy_array_data(file_path: str) -> np.array:
 53 |     """
 54 |     load numpy array data from file
 55 |     file_path: str location of file to load
 56 |     return: np.array data loaded
 57 |     """
 58 |     try:
 59 |         with open(file_path, 'rb') as file_obj:
 60 |             return np.load(file_obj)
 61 |     except Exception as e:
 62 |         raise HousingException(e, sys) from e
 63 | 
 64 | 
 65 | def save_object(file_path:str,obj):
 66 |     """
 67 |     file_path: str
 68 |     obj: Any sort of object
 69 |     """
 70 |     try:
 71 |         dir_path = os.path.dirname(file_path)
 72 |         os.makedirs(dir_path, exist_ok=True)
 73 |         with open(file_path, "wb") as file_obj:
 74 |             dill.dump(obj, file_obj)
 75 |     except Exception as e:
 76 |         raise HousingException(e,sys) from e
 77 | 
 78 | 
 79 | def load_object(file_path:str):
 80 |     """
 81 |     file_path: str
 82 |     """
 83 |     try:
 84 |         with open(file_path, "rb") as file_obj:
 85 |             return dill.load(file_obj)
 86 |     except Exception as e:
 87 |         raise HousingException(e,sys) from e
 88 | 
 89 | 
 90 | def load_data(file_path: str, schema_file_path: str) -> pd.DataFrame:
 91 |     try:
 92 |         datatset_schema = read_yaml_file(schema_file_path)
 93 | 
 94 |         schema = datatset_schema[DATASET_SCHEMA_COLUMNS_KEY]
 95 | 
 96 |         dataframe = pd.read_csv(file_path)
 97 | 
 98 |         error_messgae = ""
 99 | 
100 | 
101 |         for column in dataframe.columns:
102 |             if column in list(schema.keys()):
103 |                 dataframe[column].astype(schema[column])
104 |             else:
105 |                 error_messgae = f"{error_messgae} \nColumn: [{column}] is not in the schema."
106 |         if len(error_messgae) > 0:
107 |             raise Exception(error_messgae)
108 |         return dataframe
109 | 
110 |     except Exception as e:
111 |         raise HousingException(e,sys) from e
112 |     


--------------------------------------------------------------------------------
/housing/constant/__init__.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import os
  4 | from datetime import datetime
  5 | 
  6 | 
  7 | def get_current_time_stamp():
  8 |     return f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}"
  9 | 
 10 |     
 11 | ROOT_DIR = os.getcwd()  #to get current working directory
 12 | CONFIG_DIR = "config"
 13 | CONFIG_FILE_NAME = "config.yaml"
 14 | CONFIG_FILE_PATH = os.path.join(ROOT_DIR,CONFIG_DIR,CONFIG_FILE_NAME)
 15 | 
 16 | 
 17 | 
 18 | CURRENT_TIME_STAMP = get_current_time_stamp()
 19 | 
 20 | 
 21 | 
 22 | 
 23 | # Training pipeline related variable
 24 | TRAINING_PIPELINE_CONFIG_KEY = "training_pipeline_config"
 25 | TRAINING_PIPELINE_ARTIFACT_DIR_KEY = "artifact_dir"
 26 | TRAINING_PIPELINE_NAME_KEY = "pipeline_name"
 27 | 
 28 | 
 29 | # Data Ingestion related variable
 30 | 
 31 | DATA_INGESTION_CONFIG_KEY = "data_ingestion_config"
 32 | DATA_INGESTION_ARTIFACT_DIR = "data_ingestion"
 33 | DATA_INGESTION_DOWNLOAD_URL_KEY = "dataset_download_url"
 34 | DATA_INGESTION_RAW_DATA_DIR_KEY = "raw_data_dir"
 35 | DATA_INGESTION_TGZ_DOWNLOAD_DIR_KEY = "tgz_download_dir"
 36 | DATA_INGESTION_INGESTED_DIR_NAME_KEY = "ingested_dir"
 37 | DATA_INGESTION_TRAIN_DIR_KEY = "ingested_train_dir"
 38 | DATA_INGESTION_TEST_DIR_KEY = "ingested_test_dir"
 39 | 
 40 | # Data Validation related variable
 41 | 
 42 | # Data Validation related variables
 43 | DATA_VALIDATION_CONFIG_KEY = "data_validation_config"
 44 | DATA_VALIDATION_SCHEMA_FILE_NAME_KEY = "schema_file_name"
 45 | DATA_VALIDATION_SCHEMA_DIR_KEY = "schema_dir"
 46 | DATA_VALIDATION_ARTIFACT_DIR_NAME="data_validation"
 47 | DATA_VALIDATION_REPORT_FILE_NAME_KEY = "report_file_name"
 48 | DATA_VALIDATION_REPORT_PAGE_FILE_NAME_KEY = "report_page_file_name"
 49 | 
 50 | 
 51 | 
 52 | # Data Transformation related variables
 53 | DATA_TRANSFORMATION_ARTIFACT_DIR = "data_transformation"
 54 | DATA_TRANSFORMATION_CONFIG_KEY = "data_transformation_config"
 55 | DATA_TRANSFORMATION_ADD_BEDROOM_PER_ROOM_KEY = "add_bedroom_per_room"
 56 | DATA_TRANSFORMATION_DIR_NAME_KEY = "transformed_dir"
 57 | DATA_TRANSFORMATION_TRAIN_DIR_NAME_KEY = "transformed_train_dir"
 58 | DATA_TRANSFORMATION_TEST_DIR_NAME_KEY = "transformed_test_dir"
 59 | DATA_TRANSFORMATION_PREPROCESSING_DIR_KEY = "preprocessing_dir"
 60 | DATA_TRANSFORMATION_PREPROCESSED_FILE_NAME_KEY = "preprocessed_object_file_name"
 61 | 
 62 | 
 63 | 
 64 | COLUMN_TOTAL_ROOMS = "total_rooms"
 65 | COLUMN_POPULATION = "population"
 66 | COLUMN_HOUSEHOLDS = "households"
 67 | COLUMN_TOTAL_BEDROOM = "total_bedrooms"
 68 | DATASET_SCHEMA_COLUMNS_KEY=  "columns"
 69 | 
 70 | NUMERICAL_COLUMN_KEY="numerical_columns"
 71 | CATEGORICAL_COLUMN_KEY = "categorical_columns"
 72 | 
 73 | 
 74 | TARGET_COLUMN_KEY="target_column"
 75 | 
 76 | 
 77 | # Model Training related variables
 78 | 
 79 | MODEL_TRAINER_ARTIFACT_DIR = "model_trainer"
 80 | MODEL_TRAINER_CONFIG_KEY = "model_trainer_config"
 81 | MODEL_TRAINER_TRAINED_MODEL_DIR_KEY = "trained_model_dir"
 82 | MODEL_TRAINER_TRAINED_MODEL_FILE_NAME_KEY = "model_file_name"
 83 | MODEL_TRAINER_BASE_ACCURACY_KEY = "base_accuracy"
 84 | MODEL_TRAINER_MODEL_CONFIG_DIR_KEY = "model_config_dir"
 85 | MODEL_TRAINER_MODEL_CONFIG_FILE_NAME_KEY = "model_config_file_name"
 86 | 
 87 | 
 88 | MODEL_EVALUATION_CONFIG_KEY = "model_evaluation_config"
 89 | MODEL_EVALUATION_FILE_NAME_KEY = "model_evaluation_file_name"
 90 | MODEL_EVALUATION_ARTIFACT_DIR = "model_evaluation"
 91 | # Model Pusher config key
 92 | MODEL_PUSHER_CONFIG_KEY = "model_pusher_config"
 93 | MODEL_PUSHER_MODEL_EXPORT_DIR_KEY = "model_export_dir"
 94 | 
 95 | BEST_MODEL_KEY = "best_model"
 96 | HISTORY_KEY = "history"
 97 | MODEL_PATH_KEY = "model_path"
 98 | 
 99 | EXPERIMENT_DIR_NAME="experiment"
100 | EXPERIMENT_FILE_NAME="experiment.csv"


--------------------------------------------------------------------------------
/notebook/example3.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from scipy.stats import ks_2samp"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 16,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "arr1 = np.arange(10)\n",
 28 |     "arr2=np.arange(10)"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 17,
 34 |    "metadata": {},
 35 |    "outputs": [
 36 |     {
 37 |      "data": {
 38 |       "text/plain": [
 39 |        "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
 40 |       ]
 41 |      },
 42 |      "execution_count": 17,
 43 |      "metadata": {},
 44 |      "output_type": "execute_result"
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "arr1"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 18,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "data": {
 58 |       "text/plain": [
 59 |        "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
 60 |       ]
 61 |      },
 62 |      "execution_count": 18,
 63 |      "metadata": {},
 64 |      "output_type": "execute_result"
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "arr2"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 19,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "res = ks_2samp(arr1,arr2)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": []
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 20,
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "data": {
 92 |       "text/plain": [
 93 |        "1"
 94 |       ]
 95 |      },
 96 |      "execution_count": 20,
 97 |      "metadata": {},
 98 |      "output_type": "execute_result"
 99 |     }
100 |    ],
101 |    "source": [
102 |     "round(res.pvalue)"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "Null: Two datset are from same distribution\n",
112 |     "\n",
113 |     "Alterate: Two dataset are not from same distribution\n",
114 |     "\n",
115 |     "\n",
116 |     "if p>=0.05 :\n",
117 |     "    We have sufficent proof that null hypothesis is True\n",
118 |     "else:\n",
119 |     "    We don't have sufficient proof that null hypothesis is True\n"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 25,
125 |    "metadata": {},
126 |    "outputs": [
127 |     {
128 |      "data": {
129 |       "text/plain": [
130 |        "1.0"
131 |       ]
132 |      },
133 |      "execution_count": 25,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "res= ks_2samp(arr1,arr2)\n",
140 |     "round(res.pvalue,3)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": []
149 |   }
150 |  ],
151 |  "metadata": {
152 |   "kernelspec": {
153 |    "display_name": "Python 3.7.0",
154 |    "language": "python",
155 |    "name": "python3"
156 |   },
157 |   "language_info": {
158 |    "codemirror_mode": {
159 |     "name": "ipython",
160 |     "version": 3
161 |    },
162 |    "file_extension": ".py",
163 |    "mimetype": "text/x-python",
164 |    "name": "python",
165 |    "nbconvert_exporter": "python",
166 |    "pygments_lexer": "ipython3",
167 |    "version": "3.7.0"
168 |   },
169 |   "orig_nbformat": 4,
170 |   "vscode": {
171 |    "interpreter": {
172 |     "hash": "7a29293c9d4d8b93126739266382f07a312940ff8d40640417510f0b045f4058"
173 |    }
174 |   }
175 |  },
176 |  "nbformat": 4,
177 |  "nbformat_minor": 2
178 | }
179 | 


--------------------------------------------------------------------------------
/notebook/prediction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from housing.entity import housing_predictor"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 5,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "housing_data=housing_predictor.HousingData(-118.39,34.12,29.0,6447.0,1012.0,2184.0,960.0,8.2816,'<1H OCEAN')"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 6,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/plain": [
 29 |        "{'longitude': [-118.39],\n",
 30 |        " 'latitude': [34.12],\n",
 31 |        " 'housing_median_age': [29.0],\n",
 32 |        " 'total_rooms': [6447.0],\n",
 33 |        " 'total_bedrooms': [1012.0],\n",
 34 |        " 'population': [2184.0],\n",
 35 |        " 'households': [960.0],\n",
 36 |        " 'median_income': [8.2816],\n",
 37 |        " 'ocean_proximity': ['<1H OCEAN']}"
 38 |       ]
 39 |      },
 40 |      "execution_count": 6,
 41 |      "metadata": {},
 42 |      "output_type": "execute_result"
 43 |     }
 44 |    ],
 45 |    "source": [
 46 |     "housing_data.get_housing_data_as_dict()"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 8,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "df=housing_data.get_housing_input_data_frame()"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 9,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "from housing.entity.housing_predictor import HousingPredictor"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 12,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "model_path=\"/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/saved_models\"\n",
 74 |     "housing_predictor= HousingPredictor(model_dir=model_path)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 13,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "data": {
 84 |       "text/plain": [
 85 |        "'/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/saved_models'"
 86 |       ]
 87 |      },
 88 |      "execution_count": 13,
 89 |      "metadata": {},
 90 |      "output_type": "execute_result"
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "housing_predictor.model_dir"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 14,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "data": {
104 |       "text/plain": [
105 |        "'/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/saved_models/20220706202006/model.pkl'"
106 |       ]
107 |      },
108 |      "execution_count": 14,
109 |      "metadata": {},
110 |      "output_type": "execute_result"
111 |     }
112 |    ],
113 |    "source": [
114 |     "housing_predictor.get_latest_model_path()"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 15,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "data": {
124 |       "text/plain": [
125 |        "424328.0048828125"
126 |       ]
127 |      },
128 |      "execution_count": 15,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "housing_predictor.predict(df)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": []
143 |   }
144 |  ],
145 |  "metadata": {
146 |   "kernelspec": {
147 |    "display_name": "Python 3.7.0 (conda)",
148 |    "language": "python",
149 |    "name": "python3"
150 |   },
151 |   "language_info": {
152 |    "codemirror_mode": {
153 |     "name": "ipython",
154 |     "version": 3
155 |    },
156 |    "file_extension": ".py",
157 |    "mimetype": "text/x-python",
158 |    "name": "python",
159 |    "nbconvert_exporter": "python",
160 |    "pygments_lexer": "ipython3",
161 |    "version": "3.7.0"
162 |   },
163 |   "orig_nbformat": 4,
164 |   "vscode": {
165 |    "interpreter": {
166 |     "hash": "fc6fa6e48c86001677d15bc9af4f846353042d089527ab27e7c7a4474d3b154b"
167 |    }
168 |   }
169 |  },
170 |  "nbformat": 4,
171 |  "nbformat_minor": 2
172 | }
173 | 


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
  1 | {% extends 'header.html' %}
  2 | 
  3 | {% block head %}
  4 | 
  5 | 
  6 | <title>Index</title>
  7 | {% endblock %}
  8 | 
  9 | {% block content %}
 10 | 
 11 | <div class="row">
 12 |     <div class="col-md-12 col-lg-12 col-sm-12"  style="margin-bottom:20px;">
 13 |         <div class="row">
 14 |             <div class="col-md-8">
 15 |         <div class="card">
 16 |             <div class="card-header">
 17 |                 Want to learn project like this.
 18 |             </div>
 19 |             <div class="card-body">
 20 |         
 21 |                 <h5 class="card-title">Check Full stack data science at iNeuron portal.</h5>
 22 |                 <p class="card-text">Full-stack data science course is a live mentor-led job guaranteed certification program with a full-time one-year internship provided by iNeuron, in this course you will learn the entire stack required to work in the data science, data analytics, and big data domain, including machine learning, deep learning, computer vision NLP and Big data including MLOps and cloud infrastructure, along with real-time industry projects and product development with the iNeuron product development team, which will enable you to contribute on various levels.</p>
 23 |                 <a class="btn btn-success" target="_blank" href="https://courses.ineuron.ai/Full-Stack-Data-Science-Bootcamp?campaign=affiliate&coupon_code=FZYHXMNX">Check Course</a>
 24 |             </div>
 25 |         </div>
 26 |         </div>
 27 |         <div class="col-md-4">
 28 |             <iframe  style = "width:100%;height:100%" src="https://www.youtube.com/embed/42IQIKMSukY" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
 29 |             </div>
 30 | 
 31 | 
 32 |         </div>
 33 |         </div>
 34 | <div class="col-md-4 col-lg-4 col-sm-12">
 35 | <div class="card">
 36 |     <div class="card-header">
 37 |        View Logs
 38 |     </div>
 39 |     <div class="card-body">
 40 | 
 41 |         <h5 class="card-title">Log Files</h5>
 42 |         <p class="card-text">All model training log file can be downloaded.</p>
 43 |         <a class="btn btn-primary" href="/logs">Check Logs</a>
 44 |     </div>
 45 | </div>
 46 | </div>
 47 | 
 48 | 
 49 | <div class="col-md-4 col-lg-4 col-sm-12" style="margin-bottom:20px;">
 50 |     <div class="card">
 51 |         <div class="card-header">
 52 |            View Artifacts
 53 |         </div>
 54 |         <div class="card-body">
 55 |     
 56 |             <h5 class="card-title">Artifact Files</h5>
 57 |             <p class="card-text">All model artifacts file can be downloaded.</p>
 58 |             <a class="btn btn-primary" href="/artifact">Check Artifacts</a>
 59 |         </div>
 60 |     </div>
 61 |     </div>
 62 | 
 63 | 
 64 | 
 65 |     <div class="col-md-4 col-lg-4 col-sm-12"  style="margin-bottom:20px;">
 66 |         <div class="card">
 67 |             <div class="card-header">
 68 |                View Trained Models
 69 |             </div>
 70 |             <div class="card-body">
 71 |         
 72 |                 <h5 class="card-title">Trained Models</h5>
 73 |                 <p class="card-text">All model file can be downloaded.</p>
 74 |                 <a class="btn btn-primary" href="/saved_models">Check Models</a>
 75 |             </div>
 76 |         </div>
 77 |         </div>
 78 | 
 79 |         
 80 |     <div class="col-md-4 col-lg-4 col-sm-12"  style="margin-bottom:20px;">
 81 |         <div class="card">
 82 |             <div class="card-header">
 83 |               Estimate California price
 84 |             </div>
 85 |             <div class="card-body">
 86 |         
 87 |                 <h5 class="card-title">Access California estimator</h5>
 88 |                 <p class="card-text">Form will be displayed. Submit the form to get estimated price for california house.</p>
 89 |                 <a class="btn btn-primary" href="/predict">Get Estimated Price</a>
 90 |             </div>
 91 |         </div>
 92 |         </div>
 93 |  
 94 |         <div class="col-md-4 col-lg-4 col-sm-12"  style="margin-bottom:20px;">
 95 |             <div class="card">
 96 |                 <div class="card-header">
 97 |                   Train housing estimator model.
 98 |                 </div>
 99 |                 <div class="card-body">
100 |             
101 |                     <h5 class="card-title">Initiate model training.</h5>
102 |                     <p class="card-text">Model training will be done. Files such as log, artifact and models can be viewed and downloaded using appropriate link.</p>
103 |                     <a class="btn btn-danger" href="/train">Initiate Training</a>
104 |                 </div>
105 |             </div>
106 |             </div>
107 |         
108 | 
109 | 
110 | 
111 | 
112 |             
113 | </div>
114 | 
115 | {% endblock %}


--------------------------------------------------------------------------------
/templates/predict.html:
--------------------------------------------------------------------------------
  1 | {% extends 'header.html' %}
  2 | 
  3 | {% block head %}
  4 | 
  5 | 
  6 | <title>Housing Estimator</title>
  7 | {% endblock %}
  8 | 
  9 | {% block content %}
 10 | 
 11 | <div class="row">
 12 |     <div class="col-md-6 col-sm-12 col-xs-6 col-lg-6">
 13 | 
 14 | 
 15 |         <form action="/predict" method="post">
 16 |             <legend>Housing Estimation Form</legend>
 17 |             <div class="mb-3">
 18 |                 <label class="form-label">Longitude</label>
 19 |                 <input class="form-control" type="text" name="longitude" placeholder="Enter a value of longitude"
 20 |                     required />
 21 |             </div>
 22 |             <div class="mb-3">
 23 |                 <label class="form-label">Latitude</label>
 24 |                 <input class="form-control" type="text" name="latitude" placeholder="Enter a value of latitude"
 25 |                     required />
 26 | 
 27 |             </div>
 28 |             <div class="mb-3">
 29 |                 <label class="form-label">House Median Age</label>
 30 |                 <input class="form-control" type="text" name="housing_median_age"
 31 |                     placeholder="Enter a value of housing_median_age" />
 32 | 
 33 |             </div>
 34 |             <div class="mb-3">
 35 |                 <label class="form-label">Total Rooms</label>
 36 |                 <input class="form-control" type="text" name="total_rooms" placeholder="Enter a value of total_rooms"
 37 |                     required />
 38 | 
 39 |             </div>
 40 |             <div class="mb-3">
 41 |                 <label class="form-label">Total Bedrooms</label>
 42 |                 <input class="form-control" type="text" name="total_bedrooms"
 43 |                     placeholder="Enter a value of total_bedrooms" required />
 44 |             </div>
 45 |             <div class="mb-3">
 46 |                 <label class="form-label">Population</label>
 47 |                 <input class="form-control" type="text" name="population" placeholder="Enter a value of population"
 48 |                     required />
 49 |             </div>
 50 |             <div class="mb-3">
 51 |                 <label class="form-label">Households</label>
 52 |                 <input class="form-control" type="text" name="households" placeholder="Enter a value of households"
 53 |                     required />
 54 |             </div>
 55 |             <div class="mb-3">
 56 |                 <label class="form-label">Median Income</label>
 57 |                 <input class="form-control" type="text" name="median_income"
 58 |                     placeholder="Enter a value of median_income" required />
 59 |             </div>
 60 |             <div class="mb-3">
 61 |                 <label class="form-label">Ocean Proximity</label>
 62 |                 <select class="form-control" name="ocean_proximity" placeholder="Enter a value of ocean_proximity"
 63 |                     required>
 64 | 
 65 |                     <option value="<1H OCEAN">
 66 |                         &lt;1H OCEAN
 67 |                     </option>
 68 |                     <option value="INLAND">
 69 |                         INLAND
 70 |                     </option>
 71 |                     <option value="NEAR BAY">
 72 |                         NEAR BAY
 73 |                     </option>
 74 |                     <option value="NEAR OCEAN">
 75 |                         NEAR OCEAN
 76 |                     </option>
 77 |                     <option value="ISLAND">
 78 |                         ISLAND
 79 |                     </option>
 80 | 
 81 |                 </select>
 82 |             </div>
 83 |             <div class="mb-3">
 84 | 
 85 |                 <input class="btn btn-primary" type="submit" value="Predict Median House Value" required />
 86 |             </div>
 87 | 
 88 |         </form>
 89 |     </div>
 90 | 
 91 |     <div class="col-md-6 col-sm-12 col-xs-6 col-lg-6">
 92 |         <div class="card">
 93 |             <div class="card-header">
 94 |               California housing price
 95 |             </div>
 96 |             <div class="card-body">
 97 |         {% if context['housing_data'] is not none %}
 98 |         <table class="table table-striped">
 99 |             <caption>Califoria Housing Prediction </caption>
100 |             <tr>
101 |                 <th>Input Feature</th>
102 |                 <th>Feature Value</th>
103 | 
104 |             </tr>
105 |             {% for column,value in context['housing_data'].items() %}
106 | 
107 | 
108 | 
109 |             <tr>
110 |                 <td>{{column}}</td>
111 |                 <td>{{value[0]}}</td>
112 |             </tr>
113 | 
114 |             {% endfor %}
115 |             <tr>
116 | 
117 |                 <td>median_house_value </td>
118 |                 <td>
119 |                     {{ context['median_house_value'] }}
120 |                 </td>
121 |             </tr>
122 |         </table>
123 | 
124 |         {% else %}
125 |        
126 |               <h5 class="card-title">Submit Form</h5>
127 |               <p class="card-text">Kindly provide necessary information to estimate housing price in california</p>
128 |             
129 |          
130 | 
131 |         {% endif %}
132 |         Go to <a href="/" class="btn btn-primary">Home</a>
133 |     </div>
134 | </div>
135 |     </div>
136 | 
137 |     {% endblock %}


--------------------------------------------------------------------------------
/housing/component/data_ingestion.py:
--------------------------------------------------------------------------------
  1 | from housing.entity.config_entity import DataIngestionConfig
  2 | import sys,os
  3 | from housing.exception import HousingException
  4 | from housing.logger import logging
  5 | from housing.entity.artifact_entity import DataIngestionArtifact
  6 | import tarfile
  7 | import numpy as np
  8 | from six.moves import urllib
  9 | import pandas as pd
 10 | from sklearn.model_selection import StratifiedShuffleSplit
 11 | 
 12 | class DataIngestion:
 13 | 
 14 |     def __init__(self,data_ingestion_config:DataIngestionConfig ):
 15 |         try:
 16 |             logging.info(f"{'>>'*20}Data Ingestion log started.{'<<'*20} ")
 17 |             self.data_ingestion_config = data_ingestion_config
 18 | 
 19 |         except Exception as e:
 20 |             raise HousingException(e,sys)
 21 |     
 22 | 
 23 |     def download_housing_data(self,) -> str:
 24 |         try:
 25 |             #extraction remote url to download dataset
 26 |             download_url = self.data_ingestion_config.dataset_download_url
 27 | 
 28 |             #folder location to download file
 29 |             tgz_download_dir = self.data_ingestion_config.tgz_download_dir
 30 |             
 31 |             os.makedirs(tgz_download_dir,exist_ok=True)
 32 | 
 33 |             housing_file_name = os.path.basename(download_url)
 34 | 
 35 |             tgz_file_path = os.path.join(tgz_download_dir, housing_file_name)
 36 | 
 37 |             logging.info(f"Downloading file from :[{download_url}] into :[{tgz_file_path}]")
 38 |             urllib.request.urlretrieve(download_url, tgz_file_path)
 39 |             logging.info(f"File :[{tgz_file_path}] has been downloaded successfully.")
 40 |             return tgz_file_path
 41 | 
 42 |         except Exception as e:
 43 |             raise HousingException(e,sys) from e
 44 | 
 45 |     def extract_tgz_file(self,tgz_file_path:str):
 46 |         try:
 47 |             raw_data_dir = self.data_ingestion_config.raw_data_dir
 48 | 
 49 |             if os.path.exists(raw_data_dir):
 50 |                 os.remove(raw_data_dir)
 51 | 
 52 |             os.makedirs(raw_data_dir,exist_ok=True)
 53 | 
 54 |             logging.info(f"Extracting tgz file: [{tgz_file_path}] into dir: [{raw_data_dir}]")
 55 |             with tarfile.open(tgz_file_path) as housing_tgz_file_obj:
 56 |                 housing_tgz_file_obj.extractall(path=raw_data_dir)
 57 |             logging.info(f"Extraction completed")
 58 | 
 59 |         except Exception as e:
 60 |             raise HousingException(e,sys) from e
 61 |     
 62 |     def split_data_as_train_test(self) -> DataIngestionArtifact:
 63 |         try:
 64 |             raw_data_dir = self.data_ingestion_config.raw_data_dir
 65 | 
 66 |             file_name = os.listdir(raw_data_dir)[0]
 67 | 
 68 |             housing_file_path = os.path.join(raw_data_dir,file_name)
 69 | 
 70 | 
 71 |             logging.info(f"Reading csv file: [{housing_file_path}]")
 72 |             housing_data_frame = pd.read_csv(housing_file_path)
 73 | 
 74 |             housing_data_frame["income_cat"] = pd.cut(
 75 |                 housing_data_frame["median_income"],
 76 |                 bins=[0.0, 1.5, 3.0, 4.5, 6.0, np.inf],
 77 |                 labels=[1,2,3,4,5]
 78 |             )
 79 |             
 80 | 
 81 |             logging.info(f"Splitting data into train and test")
 82 |             strat_train_set = None
 83 |             strat_test_set = None
 84 | 
 85 |             split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
 86 | 
 87 |             for train_index,test_index in split.split(housing_data_frame, housing_data_frame["income_cat"]):
 88 |                 strat_train_set = housing_data_frame.loc[train_index].drop(["income_cat"],axis=1)
 89 |                 strat_test_set = housing_data_frame.loc[test_index].drop(["income_cat"],axis=1)
 90 | 
 91 |             train_file_path = os.path.join(self.data_ingestion_config.ingested_train_dir,
 92 |                                             file_name)
 93 | 
 94 |             test_file_path = os.path.join(self.data_ingestion_config.ingested_test_dir,
 95 |                                         file_name)
 96 |             
 97 |             if strat_train_set is not None:
 98 |                 os.makedirs(self.data_ingestion_config.ingested_train_dir,exist_ok=True)
 99 |                 logging.info(f"Exporting training datset to file: [{train_file_path}]")
100 |                 strat_train_set.to_csv(train_file_path,index=False)
101 | 
102 |             if strat_test_set is not None:
103 |                 os.makedirs(self.data_ingestion_config.ingested_test_dir, exist_ok= True)
104 |                 logging.info(f"Exporting test dataset to file: [{test_file_path}]")
105 |                 strat_test_set.to_csv(test_file_path,index=False)
106 |             
107 | 
108 |             data_ingestion_artifact = DataIngestionArtifact(train_file_path=train_file_path,
109 |                                 test_file_path=test_file_path,
110 |                                 is_ingested=True,
111 |                                 message=f"Data ingestion completed successfully."
112 |                                 )
113 |             logging.info(f"Data Ingestion artifact:[{data_ingestion_artifact}]")
114 |             return data_ingestion_artifact
115 | 
116 |         except Exception as e:
117 |             raise HousingException(e,sys) from e
118 | 
119 |     def initiate_data_ingestion(self)-> DataIngestionArtifact:
120 |         try:
121 |             tgz_file_path =  self.download_housing_data()
122 |             self.extract_tgz_file(tgz_file_path=tgz_file_path)
123 |             return self.split_data_as_train_test()
124 |         except Exception as e:
125 |             raise HousingException(e,sys) from e
126 |     
127 | 
128 | 
129 |     def __del__(self):
130 |         logging.info(f"{'>>'*20}Data Ingestion log completed.{'<<'*20} \n\n")
131 | 


--------------------------------------------------------------------------------
/housing/component/data_validation.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from housing.logger import logging
  4 | from housing.exception import HousingException
  5 | from housing.entity.config_entity import DataValidationConfig
  6 | from housing.entity.artifact_entity import DataIngestionArtifact,DataValidationArtifact
  7 | import os,sys
  8 | import pandas  as pd
  9 | from evidently.model_profile import Profile
 10 | from evidently.model_profile.sections import DataDriftProfileSection
 11 | from evidently.dashboard import Dashboard
 12 | from evidently.dashboard.tabs import DataDriftTab
 13 | import json
 14 | 
 15 | class DataValidation:
 16 |     
 17 | 
 18 |     def __init__(self, data_validation_config:DataValidationConfig,
 19 |         data_ingestion_artifact:DataIngestionArtifact):
 20 |         try:
 21 |             logging.info(f"{'>>'*30}Data Valdaition log started.{'<<'*30} \n\n")
 22 |             self.data_validation_config = data_validation_config
 23 |             self.data_ingestion_artifact = data_ingestion_artifact
 24 |         except Exception as e:
 25 |             raise HousingException(e,sys) from e
 26 | 
 27 | 
 28 |     def get_train_and_test_df(self):
 29 |         try:
 30 |             train_df = pd.read_csv(self.data_ingestion_artifact.train_file_path)
 31 |             test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path)
 32 |             return train_df,test_df
 33 |         except Exception as e:
 34 |             raise HousingException(e,sys) from e
 35 | 
 36 | 
 37 |     def is_train_test_file_exists(self)->bool:
 38 |         try:
 39 |             logging.info("Checking if training and test file is available")
 40 |             is_train_file_exist = False
 41 |             is_test_file_exist = False
 42 | 
 43 |             train_file_path = self.data_ingestion_artifact.train_file_path
 44 |             test_file_path = self.data_ingestion_artifact.test_file_path
 45 | 
 46 |             is_train_file_exist = os.path.exists(train_file_path)
 47 |             is_test_file_exist = os.path.exists(test_file_path)
 48 | 
 49 |             is_available =  is_train_file_exist and is_test_file_exist
 50 | 
 51 |             logging.info(f"Is train and test file exists?-> {is_available}")
 52 |             
 53 |             if not is_available:
 54 |                 training_file = self.data_ingestion_artifact.train_file_path
 55 |                 testing_file = self.data_ingestion_artifact.test_file_path
 56 |                 message=f"Training file: {training_file} or Testing file: {testing_file}" \
 57 |                     "is not present"
 58 |                 raise Exception(message)
 59 | 
 60 |             return is_available
 61 |         except Exception as e:
 62 |             raise HousingException(e,sys) from e
 63 | 
 64 |     
 65 |     def validate_dataset_schema(self)->bool:
 66 |         try:
 67 |             validation_status = False
 68 |             
 69 |             #Assigment validate training and testing dataset using schema file
 70 |             #1. Number of Column
 71 |             #2. Check the value of ocean proximity 
 72 |             # acceptable values     <1H OCEAN
 73 |             # INLAND
 74 |             # ISLAND
 75 |             # NEAR BAY
 76 |             # NEAR OCEAN
 77 |             #3. Check column names
 78 | 
 79 | 
 80 |             validation_status = True
 81 |             return validation_status 
 82 |         except Exception as e:
 83 |             raise HousingException(e,sys) from e
 84 | 
 85 |     def get_and_save_data_drift_report(self):
 86 |         try:
 87 |             profile = Profile(sections=[DataDriftProfileSection()])
 88 | 
 89 |             train_df,test_df = self.get_train_and_test_df()
 90 | 
 91 |             profile.calculate(train_df,test_df)
 92 | 
 93 |             report = json.loads(profile.json())
 94 | 
 95 |             report_file_path = self.data_validation_config.report_file_path
 96 |             report_dir = os.path.dirname(report_file_path)
 97 |             os.makedirs(report_dir,exist_ok=True)
 98 | 
 99 |             with open(report_file_path,"w") as report_file:
100 |                 json.dump(report, report_file, indent=6)
101 |             return report
102 |         except Exception as e:
103 |             raise HousingException(e,sys) from e
104 | 
105 |     def save_data_drift_report_page(self):
106 |         try:
107 |             dashboard = Dashboard(tabs=[DataDriftTab()])
108 |             train_df,test_df = self.get_train_and_test_df()
109 |             dashboard.calculate(train_df,test_df)
110 | 
111 |             report_page_file_path = self.data_validation_config.report_page_file_path
112 |             report_page_dir = os.path.dirname(report_page_file_path)
113 |             os.makedirs(report_page_dir,exist_ok=True)
114 | 
115 |             dashboard.save(report_page_file_path)
116 |         except Exception as e:
117 |             raise HousingException(e,sys) from e
118 | 
119 |     def is_data_drift_found(self)->bool:
120 |         try:
121 |             report = self.get_and_save_data_drift_report()
122 |             self.save_data_drift_report_page()
123 |             return True
124 |         except Exception as e:
125 |             raise HousingException(e,sys) from e
126 | 
127 |     def initiate_data_validation(self)->DataValidationArtifact :
128 |         try:
129 |             self.is_train_test_file_exists()
130 |             self.validate_dataset_schema()
131 |             self.is_data_drift_found()
132 | 
133 |             data_validation_artifact = DataValidationArtifact(
134 |                 schema_file_path=self.data_validation_config.schema_file_path,
135 |                 report_file_path=self.data_validation_config.report_file_path,
136 |                 report_page_file_path=self.data_validation_config.report_page_file_path,
137 |                 is_validated=True,
138 |                 message="Data Validation performed successully."
139 |             )
140 |             logging.info(f"Data validation artifact: {data_validation_artifact}")
141 |             return data_validation_artifact
142 |         except Exception as e:
143 |             raise HousingException(e,sys) from e
144 | 
145 | 
146 |     def __del__(self):
147 |         logging.info(f"{'>>'*30}Data Valdaition log completed.{'<<'*30} \n\n")
148 |         
149 | 
150 | 
151 | 
152 | 


--------------------------------------------------------------------------------
/housing/component/model_trainer.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from housing.exception import HousingException
  3 | import sys
  4 | from housing.logger import logging
  5 | from typing import List
  6 | from housing.entity.artifact_entity import DataTransformationArtifact, ModelTrainerArtifact
  7 | from housing.entity.config_entity import ModelTrainerConfig
  8 | from housing.util.util import load_numpy_array_data,save_object,load_object
  9 | from housing.entity.model_factory import MetricInfoArtifact, ModelFactory,GridSearchedBestModel
 10 | from housing.entity.model_factory import evaluate_regression_model
 11 | 
 12 | 
 13 | 
 14 | class HousingEstimatorModel:
 15 |     def __init__(self, preprocessing_object, trained_model_object):
 16 |         """
 17 |         TrainedModel constructor
 18 |         preprocessing_object: preprocessing_object
 19 |         trained_model_object: trained_model_object
 20 |         """
 21 |         self.preprocessing_object = preprocessing_object
 22 |         self.trained_model_object = trained_model_object
 23 | 
 24 |     def predict(self, X):
 25 |         """
 26 |         function accepts raw inputs and then transformed raw input using preprocessing_object
 27 |         which gurantees that the inputs are in the same format as the training data
 28 |         At last it perform prediction on transformed features
 29 |         """
 30 |         transformed_feature = self.preprocessing_object.transform(X)
 31 |         return self.trained_model_object.predict(transformed_feature)
 32 | 
 33 |     def __repr__(self):
 34 |         return f"{type(self.trained_model_object).__name__}()"
 35 | 
 36 |     def __str__(self):
 37 |         return f"{type(self.trained_model_object).__name__}()"
 38 | 
 39 | 
 40 | 
 41 | 
 42 | class ModelTrainer:
 43 | 
 44 |     def __init__(self, model_trainer_config:ModelTrainerConfig, data_transformation_artifact: DataTransformationArtifact):
 45 |         try:
 46 |             logging.info(f"{'>>' * 30}Model trainer log started.{'<<' * 30} ")
 47 |             self.model_trainer_config = model_trainer_config
 48 |             self.data_transformation_artifact = data_transformation_artifact
 49 |         except Exception as e:
 50 |             raise HousingException(e, sys) from e
 51 | 
 52 |     def initiate_model_trainer(self)->ModelTrainerArtifact:
 53 |         try:
 54 |             logging.info(f"Loading transformed training dataset")
 55 |             transformed_train_file_path = self.data_transformation_artifact.transformed_train_file_path
 56 |             train_array = load_numpy_array_data(file_path=transformed_train_file_path)
 57 | 
 58 |             logging.info(f"Loading transformed testing dataset")
 59 |             transformed_test_file_path = self.data_transformation_artifact.transformed_test_file_path
 60 |             test_array = load_numpy_array_data(file_path=transformed_test_file_path)
 61 | 
 62 |             logging.info(f"Splitting training and testing input and target feature")
 63 |             x_train,y_train,x_test,y_test = train_array[:,:-1],train_array[:,-1],test_array[:,:-1],test_array[:,-1]
 64 |             
 65 | 
 66 |             logging.info(f"Extracting model config file path")
 67 |             model_config_file_path = self.model_trainer_config.model_config_file_path
 68 | 
 69 |             logging.info(f"Initializing model factory class using above model config file: {model_config_file_path}")
 70 |             model_factory = ModelFactory(model_config_path=model_config_file_path)
 71 |             
 72 |             
 73 |             base_accuracy = self.model_trainer_config.base_accuracy
 74 |             logging.info(f"Expected accuracy: {base_accuracy}")
 75 | 
 76 |             logging.info(f"Initiating operation model selecttion")
 77 |             best_model = model_factory.get_best_model(X=x_train,y=y_train,base_accuracy=base_accuracy)
 78 |             
 79 |             logging.info(f"Best model found on training dataset: {best_model}")
 80 |             
 81 |             logging.info(f"Extracting trained model list.")
 82 |             grid_searched_best_model_list:List[GridSearchedBestModel]=model_factory.grid_searched_best_model_list
 83 |             
 84 |             model_list = [model.best_model for model in grid_searched_best_model_list ]
 85 |             logging.info(f"Evaluation all trained model on training and testing dataset both")
 86 |             metric_info:MetricInfoArtifact = evaluate_regression_model(model_list=model_list,X_train=x_train,y_train=y_train,X_test=x_test,y_test=y_test,base_accuracy=base_accuracy)
 87 | 
 88 |             logging.info(f"Best found model on both training and testing dataset.")
 89 |             
 90 |             preprocessing_obj=  load_object(file_path=self.data_transformation_artifact.preprocessed_object_file_path)
 91 |             model_object = metric_info.model_object
 92 | 
 93 | 
 94 |             trained_model_file_path=self.model_trainer_config.trained_model_file_path
 95 |             housing_model = HousingEstimatorModel(preprocessing_object=preprocessing_obj,trained_model_object=model_object)
 96 |             logging.info(f"Saving model at path: {trained_model_file_path}")
 97 |             save_object(file_path=trained_model_file_path,obj=housing_model)
 98 | 
 99 | 
100 |             model_trainer_artifact=  ModelTrainerArtifact(is_trained=True,message="Model Trained successfully",
101 |             trained_model_file_path=trained_model_file_path,
102 |             train_rmse=metric_info.train_rmse,
103 |             test_rmse=metric_info.test_rmse,
104 |             train_accuracy=metric_info.train_accuracy,
105 |             test_accuracy=metric_info.test_accuracy,
106 |             model_accuracy=metric_info.model_accuracy
107 |             
108 |             )
109 | 
110 |             logging.info(f"Model Trainer Artifact: {model_trainer_artifact}")
111 |             return model_trainer_artifact
112 |         except Exception as e:
113 |             raise HousingException(e, sys) from e
114 | 
115 |     def __del__(self):
116 |         logging.info(f"{'>>' * 30}Model trainer log completed.{'<<' * 30} ")
117 | 
118 | 
119 | 
120 | #loading transformed training and testing datset
121 | #reading model config file 
122 | #getting best model on training datset
123 | #evaludation models on both training & testing datset -->model object
124 | #loading preprocessing pbject
125 | #custom model object by combining both preprocessing obj and model obj
126 | #saving custom model object
127 | #return model_trainer_artifact
128 | 


--------------------------------------------------------------------------------
/templates/header.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <!-- Required meta tags -->
  6 |     <meta charset="utf-8">
  7 |     <meta name="viewport" content="width=device-width, initial-scale=1">
  8 |     <script src="https://kit.fontawesome.com/778c911bcf.js" crossorigin="anonymous"></script>
  9 |     <style>
 10 |    
 11 |         .col-md-2 {
 12 |             margin-bottom: 20px;
 13 |             margin-top: 20px;
 14 |         }
 15 |     </style>
 16 |     <!-- Bootstrap CSS -->
 17 |     <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css" rel="stylesheet"
 18 |         integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
 19 |     {% block head %}{% endblock %}
 20 | </head>
 21 | <style>
 22 | 
 23 |     table{
 24 |         -webkit-box-shadow: 0px 7px 16px -2px rgba(0,0,0,0.75);
 25 | -moz-box-shadow: 0px 7px 16px -2px rgba(0,0,0,0.75);
 26 | box-shadow: 0px 7px 16px -2px rgba(0,0,0,0.75);
 27 | width:100%;
 28 | overflow:scroll;
 29 | table-layout: fixed;
 30 |     width: 100%; 
 31 |  
 32 |     overflow:scroll;
 33 |     min-height: 400px; /* table height here */
 34 |   border-collapse: collapse;
 35 |     }
 36 |     
 37 |     th,td {
 38 |         text-align:left;
 39 |     word-wrap: break-word;
 40 | }
 41 | </style>
 42 | <body style="background-color: #f1f1f1;">
 43 | 
 44 | 
 45 | 
 46 |     <!---
 47 |     
 48 |     navbar code
 49 |     
 50 |     
 51 |     
 52 |     -->
 53 | 
 54 |     <nav class="navbar navbar-expand-lg navbar-dark bg-dark">
 55 |         <div class="container-fluid">
 56 |             <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarTogglerDemo03"
 57 |                 aria-controls="navbarTogglerDemo03" aria-expanded="false" aria-label="Toggle navigation">
 58 |                 <span class="navbar-toggler-icon"></span>
 59 |             </button>
 60 |             <a class="navbar-brand" href="/">California Housing Price Estimation</a>
 61 |             <div class="collapse navbar-collapse" id="navbarTogglerDemo03">
 62 |                 <ul class="navbar-nav me-auto mb-2 mb-lg-0">
 63 |                     <li class="nav-item">
 64 |                         <a class="nav-link active" aria-current="page" href="/">Home</a>
 65 |                     </li>
 66 |                     <li class="nav-item">
 67 |                         <a class="nav-link {% if 'log' in request.path %} active {% endif %}" href="/logs">View Logs</a>
 68 |                     </li>
 69 |                     <li class="nav-item">
 70 |                         <a class="nav-link {% if 'artifact' in request.path %} active {% endif %}" href="/artifact">View Artifacts</a>
 71 |                     </li>
 72 |                     <li class="nav-item">
 73 |                         <a class="nav-link {% if 'saved_models' in request.path %} active {% endif %}" href="/saved_models">View Trained Model</a>
 74 |                     </li>
 75 |                     <li class="nav-item">
 76 |                         <a class="nav-link {% if 'predict' in request.path %} active {% endif %}" href="/predict">Estimate Housing Price</a>
 77 |                     </li>
 78 |                     <li class="nav-item">
 79 |                         <a class="nav-link {% if 'predict' in request.path %} active {% endif %}" href="/view_experiment_hist">Experiment History</a>
 80 |                     </li>
 81 |                 </ul>
 82 |             </div>
 83 |         </div>
 84 |         </div>
 85 |     </nav>
 86 | 
 87 | 
 88 | 
 89 |     <div class="row" style="margin-left: 5%;margin-right:5%;">
 90 |        
 91 |         <div class="col-md-2 col-sm-12 col-lg-2">
 92 |             <ul class=" list-group ">
 93 |                 <li class="list-group-item py-1  {% if 'log' in request.path %} active {% endif %}">
 94 |                     <a  style=" text-decoration: none;color:black" href="/logs">View Logs</a>
 95 |                 </li>
 96 |                 <li class="list-group-item py-1 {% if 'artifact' in request.path %} active {% endif %}">
 97 |                     <a style=" text-decoration: none;color:rgb(0, 0, 0)" href="/artifact">View Artifacts</a>
 98 |                 </li>
 99 |                 <li class="list-group-item py-1 {% if 'saved_models' in request.path %} active {% endif %}">
100 |                     <a style=" text-decoration: none;color:black" href="/saved_models">View Trained Model</a>
101 |                 </li>
102 |                 <li class="list-group-item py-1 {% if 'predict' in request.path %} active {% endif %}">
103 |                     <a style=" text-decoration: none;color:black" href="/predict">Estimate Housing Price</a>
104 |                 </li>
105 |                 <li class="list-group-item py-1 {% if 'train' in request.path %} active {% endif %}">
106 |                     <a style=" text-decoration: none;color:black" href="/train">Train Housing Price Estimator</a>
107 |                 </li>
108 |                 
109 | 
110 | 
111 |                 <li class="list-group-item py-1 {% if 'update_model_config' in request.path %} active {% endif %}">
112 |                     <a style=" text-decoration: none;color:black" href="/update_model_config">Update Model Config</a>
113 |                 </li>
114 |             </ul>
115 | 
116 |         </div>
117 | 
118 |         <div class="col-md-8 col-sm-12 col-lg-9" style="margin-top:20px;"> 
119 |             {% block content %}
120 | 
121 | 
122 |             {% endblock %}
123 | 
124 |         </div>
125 | 
126 |     </div>
127 | 
128 |     <!-- footer goes here-->
129 | 
130 | 
131 | 
132 | 
133 |     <footer class="text-center text-white " style="background-color: #d4d2d2;">
134 |         <!-- Grid container -->
135 |         <div class="container pt-4">
136 |             <!-- Section: Social media -->
137 |             <section class="mb-4">
138 |                 <!-- Facebook -->
139 |                 <a class="btn btn-link btn-floating btn-lg text-dark m-1" href="#!" role="button"
140 |                     data-mdb-ripple-color="dark"><i class="fab fa-facebook-f"></i></a>
141 | 
142 |                 <!-- Twitter -->
143 |                 <a class="btn btn-link btn-floating btn-lg text-dark m-1" href="#!" role="button"
144 |                     data-mdb-ripple-color="dark"><i class="fab fa-twitter"></i></a>
145 | 
146 |                 <!-- Google -->
147 |                 <a class="btn btn-link btn-floating btn-lg text-dark m-1" href="#!" role="button"
148 |                     data-mdb-ripple-color="dark"><i class="fab fa-google"></i></a>
149 | 
150 |                 <!-- Instagram -->
151 |                 <a class="btn btn-link btn-floating btn-lg text-dark m-1" href="#!" role="button"
152 |                     data-mdb-ripple-color="dark"><i class="fab fa-instagram"></i></a>
153 | 
154 |                 <!-- Linkedin -->
155 |                 <a class="btn btn-link btn-floating btn-lg text-dark m-1" href="#!" role="button"
156 |                     data-mdb-ripple-color="dark"><i class="fab fa-linkedin"></i></a>
157 |                 <!-- Github -->
158 |                 <a class="btn btn-link btn-floating btn-lg text-dark m-1" href="#!" role="button"
159 |                     data-mdb-ripple-color="dark"><i class="fab fa-github"></i></a>
160 |             </section>
161 |             <!-- Section: Social media -->
162 |         </div>
163 |         <!-- Grid container -->
164 | 
165 |         <!-- Copyright -->
166 |         <div class="text-center text-light p-3" style="background-color: rgba(3, 36, 67, 0.504);">
167 |             © 2022 Copyright:
168 |             <a class="text-light" href="https://ineuron.ai/">iNeuron Intelligence Pvt Limited</a>
169 |         </div>
170 |         <!-- Copyright -->
171 |     </footer>
172 | 
173 | 
174 |     <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js"
175 |         integrity="sha384-ka7Sk0Gln4gmtz2MlQnikT1wXgYsOg+OMhuP+IlRH9sENBO0LRn5q+8nbTov4+1p"
176 |         crossorigin="anonymous"></script>
177 | 
178 | </body>
179 | 
180 | </html>


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | from flask import Flask, request
  2 | import sys
  3 | 
  4 | import pip
  5 | from housing.util.util import read_yaml_file, write_yaml_file
  6 | from matplotlib.style import context
  7 | from housing.logger import logging
  8 | from housing.exception import HousingException
  9 | import os, sys
 10 | import json
 11 | from housing.config.configuration import Configuartion
 12 | from housing.constant import CONFIG_DIR, get_current_time_stamp
 13 | from housing.pipeline.pipeline import Pipeline
 14 | from housing.entity.housing_predictor import HousingPredictor, HousingData
 15 | from flask import send_file, abort, render_template
 16 | 
 17 | 
 18 | ROOT_DIR = os.getcwd()
 19 | LOG_FOLDER_NAME = "logs"
 20 | PIPELINE_FOLDER_NAME = "housing"
 21 | SAVED_MODELS_DIR_NAME = "saved_models"
 22 | MODEL_CONFIG_FILE_PATH = os.path.join(ROOT_DIR, CONFIG_DIR, "model.yaml")
 23 | LOG_DIR = os.path.join(ROOT_DIR, LOG_FOLDER_NAME)
 24 | PIPELINE_DIR = os.path.join(ROOT_DIR, PIPELINE_FOLDER_NAME)
 25 | MODEL_DIR = os.path.join(ROOT_DIR, SAVED_MODELS_DIR_NAME)
 26 | 
 27 | 
 28 | from housing.logger import get_log_dataframe
 29 | 
 30 | HOUSING_DATA_KEY = "housing_data"
 31 | MEDIAN_HOUSING_VALUE_KEY = "median_house_value"
 32 | 
 33 | app = Flask(__name__)
 34 | 
 35 | 
 36 | @app.route('/artifact', defaults={'req_path': 'housing'})
 37 | @app.route('/artifact/<path:req_path>')
 38 | def render_artifact_dir(req_path):
 39 |     os.makedirs("housing", exist_ok=True)
 40 |     # Joining the base and the requested path
 41 |     print(f"req_path: {req_path}")
 42 |     abs_path = os.path.join(req_path)
 43 |     print(abs_path)
 44 |     # Return 404 if path doesn't exist
 45 |     if not os.path.exists(abs_path):
 46 |         return abort(404)
 47 | 
 48 |     # Check if path is a file and serve
 49 |     if os.path.isfile(abs_path):
 50 |         if ".html" in abs_path:
 51 |             with open(abs_path, "r", encoding="utf-8") as file:
 52 |                 content = ''
 53 |                 for line in file.readlines():
 54 |                     content = f"{content}{line}"
 55 |                 return content
 56 |         return send_file(abs_path)
 57 | 
 58 |     # Show directory contents
 59 |     files = {os.path.join(abs_path, file_name): file_name for file_name in os.listdir(abs_path) if
 60 |              "artifact" in os.path.join(abs_path, file_name)}
 61 | 
 62 |     result = {
 63 |         "files": files,
 64 |         "parent_folder": os.path.dirname(abs_path),
 65 |         "parent_label": abs_path
 66 |     }
 67 |     return render_template('files.html', result=result)
 68 | 
 69 | 
 70 | @app.route('/', methods=['GET', 'POST'])
 71 | def index():
 72 |     try:
 73 |         return render_template('index.html')
 74 |     except Exception as e:
 75 |         return str(e)
 76 | 
 77 | 
 78 | @app.route('/view_experiment_hist', methods=['GET', 'POST'])
 79 | def view_experiment_history():
 80 |     experiment_df = Pipeline.get_experiments_status()
 81 |     context = {
 82 |         "experiment": experiment_df.to_html(classes='table table-striped col-12')
 83 |     }
 84 |     return render_template('experiment_history.html', context=context)
 85 | 
 86 | 
 87 | @app.route('/train', methods=['GET', 'POST'])
 88 | def train():
 89 |     message = ""
 90 |     pipeline = Pipeline(config=Configuartion(current_time_stamp=get_current_time_stamp()))
 91 |     if not Pipeline.experiment.running_status:
 92 |         message = "Training started."
 93 |         pipeline.start()
 94 |     else:
 95 |         message = "Training is already in progress."
 96 |     context = {
 97 |         "experiment": pipeline.get_experiments_status().to_html(classes='table table-striped col-12'),
 98 |         "message": message
 99 |     }
100 |     return render_template('train.html', context=context)
101 | 
102 | 
103 | @app.route('/predict', methods=['GET', 'POST'])
104 | def predict():
105 |     context = {
106 |         HOUSING_DATA_KEY: None,
107 |         MEDIAN_HOUSING_VALUE_KEY: None
108 |     }
109 | 
110 |     if request.method == 'POST':
111 |         longitude = float(request.form['longitude'])
112 |         latitude = float(request.form['latitude'])
113 |         housing_median_age = float(request.form['housing_median_age'])
114 |         total_rooms = float(request.form['total_rooms'])
115 |         total_bedrooms = float(request.form['total_bedrooms'])
116 |         population = float(request.form['population'])
117 |         households = float(request.form['households'])
118 |         median_income = float(request.form['median_income'])
119 |         ocean_proximity = request.form['ocean_proximity']
120 | 
121 |         housing_data = HousingData(longitude=longitude,
122 |                                    latitude=latitude,
123 |                                    housing_median_age=housing_median_age,
124 |                                    total_rooms=total_rooms,
125 |                                    total_bedrooms=total_bedrooms,
126 |                                    population=population,
127 |                                    households=households,
128 |                                    median_income=median_income,
129 |                                    ocean_proximity=ocean_proximity,
130 |                                    )
131 |         housing_df = housing_data.get_housing_input_data_frame()
132 |         housing_predictor = HousingPredictor(model_dir=MODEL_DIR)
133 |         median_housing_value = housing_predictor.predict(X=housing_df)
134 |         context = {
135 |             HOUSING_DATA_KEY: housing_data.get_housing_data_as_dict(),
136 |             MEDIAN_HOUSING_VALUE_KEY: median_housing_value,
137 |         }
138 |         return render_template('predict.html', context=context)
139 |     return render_template("predict.html", context=context)
140 | 
141 | 
142 | @app.route('/saved_models', defaults={'req_path': 'saved_models'})
143 | @app.route('/saved_models/<path:req_path>')
144 | def saved_models_dir(req_path):
145 |     os.makedirs("saved_models", exist_ok=True)
146 |     # Joining the base and the requested path
147 |     print(f"req_path: {req_path}")
148 |     abs_path = os.path.join(req_path)
149 |     print(abs_path)
150 |     # Return 404 if path doesn't exist
151 |     if not os.path.exists(abs_path):
152 |         return abort(404)
153 | 
154 |     # Check if path is a file and serve
155 |     if os.path.isfile(abs_path):
156 |         return send_file(abs_path)
157 | 
158 |     # Show directory contents
159 |     files = {os.path.join(abs_path, file): file for file in os.listdir(abs_path)}
160 | 
161 |     result = {
162 |         "files": files,
163 |         "parent_folder": os.path.dirname(abs_path),
164 |         "parent_label": abs_path
165 |     }
166 |     return render_template('saved_models_files.html', result=result)
167 | 
168 | 
169 | @app.route("/update_model_config", methods=['GET', 'POST'])
170 | def update_model_config():
171 |     try:
172 |         if request.method == 'POST':
173 |             model_config = request.form['new_model_config']
174 |             model_config = model_config.replace("'", '"')
175 |             print(model_config)
176 |             model_config = json.loads(model_config)
177 | 
178 |             write_yaml_file(file_path=MODEL_CONFIG_FILE_PATH, data=model_config)
179 | 
180 |         model_config = read_yaml_file(file_path=MODEL_CONFIG_FILE_PATH)
181 |         return render_template('update_model.html', result={"model_config": model_config})
182 | 
183 |     except  Exception as e:
184 |         logging.exception(e)
185 |         return str(e)
186 | 
187 | 
188 | @app.route(f'/logs', defaults={'req_path': f'{LOG_FOLDER_NAME}'})
189 | @app.route(f'/{LOG_FOLDER_NAME}/<path:req_path>')
190 | def render_log_dir(req_path):
191 |     os.makedirs(LOG_FOLDER_NAME, exist_ok=True)
192 |     # Joining the base and the requested path
193 |     logging.info(f"req_path: {req_path}")
194 |     abs_path = os.path.join(req_path)
195 |     print(abs_path)
196 |     # Return 404 if path doesn't exist
197 |     if not os.path.exists(abs_path):
198 |         return abort(404)
199 | 
200 |     # Check if path is a file and serve
201 |     if os.path.isfile(abs_path):
202 |         log_df = get_log_dataframe(abs_path)
203 |         context = {"log": log_df.to_html(classes="table-striped", index=False)}
204 |         return render_template('log.html', context=context)
205 | 
206 |     # Show directory contents
207 |     files = {os.path.join(abs_path, file): file for file in os.listdir(abs_path)}
208 | 
209 |     result = {
210 |         "files": files,
211 |         "parent_folder": os.path.dirname(abs_path),
212 |         "parent_label": abs_path
213 |     }
214 |     return render_template('log_files.html', result=result)
215 | 
216 | 
217 | if __name__ == "__main__":
218 |     app.run()
219 | 


--------------------------------------------------------------------------------
/housing/component/model_evaluation.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from housing.logger import logging
  4 | from housing.exception import HousingException
  5 | from housing.entity.config_entity import ModelEvaluationConfig
  6 | from housing.entity.artifact_entity import DataIngestionArtifact,DataValidationArtifact,ModelTrainerArtifact,ModelEvaluationArtifact
  7 | from housing.constant import *
  8 | import numpy as np
  9 | import os
 10 | import sys
 11 | from housing.util.util import write_yaml_file, read_yaml_file, load_object,load_data
 12 | from housing.entity.model_factory import evaluate_regression_model
 13 | 
 14 | 
 15 | 
 16 | 
 17 | class ModelEvaluation:
 18 | 
 19 |     def __init__(self, model_evaluation_config: ModelEvaluationConfig,
 20 |                  data_ingestion_artifact: DataIngestionArtifact,
 21 |                  data_validation_artifact: DataValidationArtifact,
 22 |                  model_trainer_artifact: ModelTrainerArtifact):
 23 |         try:
 24 |             logging.info(f"{'>>' * 30}Model Evaluation log started.{'<<' * 30} ")
 25 |             self.model_evaluation_config = model_evaluation_config
 26 |             self.model_trainer_artifact = model_trainer_artifact
 27 |             self.data_ingestion_artifact = data_ingestion_artifact
 28 |             self.data_validation_artifact = data_validation_artifact
 29 |         except Exception as e:
 30 |             raise HousingException(e, sys) from e
 31 | 
 32 |     def get_best_model(self):
 33 |         try:
 34 |             model = None
 35 |             model_evaluation_file_path = self.model_evaluation_config.model_evaluation_file_path
 36 | 
 37 |             if not os.path.exists(model_evaluation_file_path):
 38 |                 write_yaml_file(file_path=model_evaluation_file_path,
 39 |                                 )
 40 |                 return model
 41 |             model_eval_file_content = read_yaml_file(file_path=model_evaluation_file_path)
 42 | 
 43 |             model_eval_file_content = dict() if model_eval_file_content is None else model_eval_file_content
 44 | 
 45 |             if BEST_MODEL_KEY not in model_eval_file_content:
 46 |                 return model
 47 | 
 48 |             model = load_object(file_path=model_eval_file_content[BEST_MODEL_KEY][MODEL_PATH_KEY])
 49 |             return model
 50 |         except Exception as e:
 51 |             raise HousingException(e, sys) from e
 52 | 
 53 |     def update_evaluation_report(self, model_evaluation_artifact: ModelEvaluationArtifact):
 54 |         try:
 55 |             eval_file_path = self.model_evaluation_config.model_evaluation_file_path
 56 |             model_eval_content = read_yaml_file(file_path=eval_file_path)
 57 |             model_eval_content = dict() if model_eval_content is None else model_eval_content
 58 |             
 59 |             
 60 |             previous_best_model = None
 61 |             if BEST_MODEL_KEY in model_eval_content:
 62 |                 previous_best_model = model_eval_content[BEST_MODEL_KEY]
 63 | 
 64 |             logging.info(f"Previous eval result: {model_eval_content}")
 65 |             eval_result = {
 66 |                 BEST_MODEL_KEY: {
 67 |                     MODEL_PATH_KEY: model_evaluation_artifact.evaluated_model_path,
 68 |                 }
 69 |             }
 70 | 
 71 |             if previous_best_model is not None:
 72 |                 model_history = {self.model_evaluation_config.time_stamp: previous_best_model}
 73 |                 if HISTORY_KEY not in model_eval_content:
 74 |                     history = {HISTORY_KEY: model_history}
 75 |                     eval_result.update(history)
 76 |                 else:
 77 |                     model_eval_content[HISTORY_KEY].update(model_history)
 78 | 
 79 |             model_eval_content.update(eval_result)
 80 |             logging.info(f"Updated eval result:{model_eval_content}")
 81 |             write_yaml_file(file_path=eval_file_path, data=model_eval_content)
 82 | 
 83 |         except Exception as e:
 84 |             raise HousingException(e, sys) from e
 85 | 
 86 |     def initiate_model_evaluation(self) -> ModelEvaluationArtifact:
 87 |         try:
 88 |             trained_model_file_path = self.model_trainer_artifact.trained_model_file_path
 89 |             trained_model_object = load_object(file_path=trained_model_file_path)
 90 | 
 91 |             train_file_path = self.data_ingestion_artifact.train_file_path
 92 |             test_file_path = self.data_ingestion_artifact.test_file_path
 93 | 
 94 |             schema_file_path = self.data_validation_artifact.schema_file_path
 95 | 
 96 |             train_dataframe = load_data(file_path=train_file_path,
 97 |                                                            schema_file_path=schema_file_path,
 98 |                                                            )
 99 |             test_dataframe = load_data(file_path=test_file_path,
100 |                                                           schema_file_path=schema_file_path,
101 |                                                           )
102 |             schema_content = read_yaml_file(file_path=schema_file_path)
103 |             target_column_name = schema_content[TARGET_COLUMN_KEY]
104 | 
105 |             # target_column
106 |             logging.info(f"Converting target column into numpy array.")
107 |             train_target_arr = np.array(train_dataframe[target_column_name])
108 |             test_target_arr = np.array(test_dataframe[target_column_name])
109 |             logging.info(f"Conversion completed target column into numpy array.")
110 | 
111 |             # dropping target column from the dataframe
112 |             logging.info(f"Dropping target column from the dataframe.")
113 |             train_dataframe.drop(target_column_name, axis=1, inplace=True)
114 |             test_dataframe.drop(target_column_name, axis=1, inplace=True)
115 |             logging.info(f"Dropping target column from the dataframe completed.")
116 | 
117 |             model = self.get_best_model()
118 | 
119 |             if model is None:
120 |                 logging.info("Not found any existing model. Hence accepting trained model")
121 |                 model_evaluation_artifact = ModelEvaluationArtifact(evaluated_model_path=trained_model_file_path,
122 |                                                                     is_model_accepted=True)
123 |                 self.update_evaluation_report(model_evaluation_artifact)
124 |                 logging.info(f"Model accepted. Model eval artifact {model_evaluation_artifact} created")
125 |                 return model_evaluation_artifact
126 | 
127 |             model_list = [model, trained_model_object]
128 | 
129 |             metric_info_artifact = evaluate_regression_model(model_list=model_list,
130 |                                                                X_train=train_dataframe,
131 |                                                                y_train=train_target_arr,
132 |                                                                X_test=test_dataframe,
133 |                                                                y_test=test_target_arr,
134 |                                                                base_accuracy=self.model_trainer_artifact.model_accuracy,
135 |                                                                )
136 |             logging.info(f"Model evaluation completed. model metric artifact: {metric_info_artifact}")
137 | 
138 |             if metric_info_artifact is None:
139 |                 response = ModelEvaluationArtifact(is_model_accepted=False,
140 |                                                    evaluated_model_path=trained_model_file_path
141 |                                                    )
142 |                 logging.info(response)
143 |                 return response
144 | 
145 |             if metric_info_artifact.index_number == 1:
146 |                 model_evaluation_artifact = ModelEvaluationArtifact(evaluated_model_path=trained_model_file_path,
147 |                                                                     is_model_accepted=True)
148 |                 self.update_evaluation_report(model_evaluation_artifact)
149 |                 logging.info(f"Model accepted. Model eval artifact {model_evaluation_artifact} created")
150 | 
151 |             else:
152 |                 logging.info("Trained model is no better than existing model hence not accepting trained model")
153 |                 model_evaluation_artifact = ModelEvaluationArtifact(evaluated_model_path=trained_model_file_path,
154 |                                                                     is_model_accepted=False)
155 |             return model_evaluation_artifact
156 |         except Exception as e:
157 |             raise HousingException(e, sys) from e
158 | 
159 |     def __del__(self):
160 |         logging.info(f"{'=' * 20}Model Evaluation log completed.{'=' * 20} ")


--------------------------------------------------------------------------------
/housing/component/data_transformation.py:
--------------------------------------------------------------------------------
  1 | from cgi import test
  2 | from sklearn import preprocessing
  3 | from housing.exception import HousingException
  4 | from housing.logger import logging
  5 | from housing.entity.config_entity import DataTransformationConfig 
  6 | from housing.entity.artifact_entity import DataIngestionArtifact,\
  7 | DataValidationArtifact,DataTransformationArtifact
  8 | import sys,os
  9 | import numpy as np
 10 | from sklearn.base import BaseEstimator,TransformerMixin
 11 | from sklearn.preprocessing import StandardScaler,OneHotEncoder
 12 | from sklearn.pipeline import Pipeline
 13 | from sklearn.compose import ColumnTransformer
 14 | from sklearn.impute import SimpleImputer
 15 | import pandas as pd
 16 | from housing.constant import *
 17 | from housing.util.util import read_yaml_file,save_object,save_numpy_array_data,load_data
 18 | 
 19 | 
 20 | #   longitude: float
 21 | #   latitude: float
 22 | #   housing_median_age: float
 23 | #   total_rooms: float
 24 | #   total_bedrooms: float
 25 | #   population: float
 26 | #   households: float
 27 | #   median_income: float
 28 | #   median_house_value: float
 29 | #   ocean_proximity: category
 30 | #   income_cat: float
 31 | 
 32 | 
 33 | class FeatureGenerator(BaseEstimator, TransformerMixin):
 34 | 
 35 |     def __init__(self, add_bedrooms_per_room=True,
 36 |                  total_rooms_ix=3,
 37 |                  population_ix=5,
 38 |                  households_ix=6,
 39 |                  total_bedrooms_ix=4, columns=None):
 40 |         """
 41 |         FeatureGenerator Initialization
 42 |         add_bedrooms_per_room: bool
 43 |         total_rooms_ix: int index number of total rooms columns
 44 |         population_ix: int index number of total population columns
 45 |         households_ix: int index number of  households columns
 46 |         total_bedrooms_ix: int index number of bedrooms columns
 47 |         """
 48 |         try:
 49 |             self.columns = columns
 50 |             if self.columns is not None:
 51 |                 total_rooms_ix = self.columns.index(COLUMN_TOTAL_ROOMS)
 52 |                 population_ix = self.columns.index(COLUMN_POPULATION)
 53 |                 households_ix = self.columns.index(COLUMN_HOUSEHOLDS)
 54 |                 total_bedrooms_ix = self.columns.index(COLUMN_TOTAL_BEDROOM)
 55 | 
 56 |             self.add_bedrooms_per_room = add_bedrooms_per_room
 57 |             self.total_rooms_ix = total_rooms_ix
 58 |             self.population_ix = population_ix
 59 |             self.households_ix = households_ix
 60 |             self.total_bedrooms_ix = total_bedrooms_ix
 61 |         except Exception as e:
 62 |             raise HousingException(e, sys) from e
 63 | 
 64 |     def fit(self, X, y=None):
 65 |         return self
 66 | 
 67 |     def transform(self, X, y=None):
 68 |         try:
 69 |             room_per_household = X[:, self.total_rooms_ix] / \
 70 |                                  X[:, self.households_ix]
 71 |             population_per_household = X[:, self.population_ix] / \
 72 |                                        X[:, self.households_ix]
 73 |             if self.add_bedrooms_per_room:
 74 |                 bedrooms_per_room = X[:, self.total_bedrooms_ix] / \
 75 |                                     X[:, self.total_rooms_ix]
 76 |                 generated_feature = np.c_[
 77 |                     X, room_per_household, population_per_household, bedrooms_per_room]
 78 |             else:
 79 |                 generated_feature = np.c_[
 80 |                     X, room_per_household, population_per_household]
 81 | 
 82 |             return generated_feature
 83 |         except Exception as e:
 84 |             raise HousingException(e, sys) from e
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | class DataTransformation:
 91 | 
 92 |     def __init__(self, data_transformation_config: DataTransformationConfig,
 93 |                  data_ingestion_artifact: DataIngestionArtifact,
 94 |                  data_validation_artifact: DataValidationArtifact
 95 |                  ):
 96 |         try:
 97 |             logging.info(f"{'>>' * 30}Data Transformation log started.{'<<' * 30} ")
 98 |             self.data_transformation_config= data_transformation_config
 99 |             self.data_ingestion_artifact = data_ingestion_artifact
100 |             self.data_validation_artifact = data_validation_artifact
101 | 
102 |         except Exception as e:
103 |             raise HousingException(e,sys) from e
104 | 
105 |     
106 | 
107 |     def get_data_transformer_object(self)->ColumnTransformer:
108 |         try:
109 |             schema_file_path = self.data_validation_artifact.schema_file_path
110 | 
111 |             dataset_schema = read_yaml_file(file_path=schema_file_path)
112 | 
113 |             numerical_columns = dataset_schema[NUMERICAL_COLUMN_KEY]
114 |             categorical_columns = dataset_schema[CATEGORICAL_COLUMN_KEY]
115 | 
116 | 
117 |             num_pipeline = Pipeline(steps=[
118 |                 ('imputer', SimpleImputer(strategy="median")),
119 |                 ('feature_generator', FeatureGenerator(
120 |                     add_bedrooms_per_room=self.data_transformation_config.add_bedroom_per_room,
121 |                     columns=numerical_columns
122 |                 )),
123 |                 ('scaler', StandardScaler())
124 |             ]
125 |             )
126 | 
127 |             cat_pipeline = Pipeline(steps=[
128 |                  ('impute', SimpleImputer(strategy="most_frequent")),
129 |                  ('one_hot_encoder', OneHotEncoder()),
130 |                  ('scaler', StandardScaler(with_mean=False))
131 |             ]
132 |             )
133 | 
134 |             logging.info(f"Categorical columns: {categorical_columns}")
135 |             logging.info(f"Numerical columns: {numerical_columns}")
136 | 
137 | 
138 |             preprocessing = ColumnTransformer([
139 |                 ('num_pipeline', num_pipeline, numerical_columns),
140 |                 ('cat_pipeline', cat_pipeline, categorical_columns),
141 |             ])
142 |             return preprocessing
143 | 
144 |         except Exception as e:
145 |             raise HousingException(e,sys) from e   
146 | 
147 | 
148 |     def initiate_data_transformation(self)->DataTransformationArtifact:
149 |         try:
150 |             logging.info(f"Obtaining preprocessing object.")
151 |             preprocessing_obj = self.get_data_transformer_object()
152 | 
153 | 
154 |             logging.info(f"Obtaining training and test file path.")
155 |             train_file_path = self.data_ingestion_artifact.train_file_path
156 |             test_file_path = self.data_ingestion_artifact.test_file_path
157 |             
158 | 
159 |             schema_file_path = self.data_validation_artifact.schema_file_path
160 |             
161 |             logging.info(f"Loading training and test data as pandas dataframe.")
162 |             train_df = load_data(file_path=train_file_path, schema_file_path=schema_file_path)
163 |             
164 |             test_df = load_data(file_path=test_file_path, schema_file_path=schema_file_path)
165 | 
166 |             schema = read_yaml_file(file_path=schema_file_path)
167 | 
168 |             target_column_name = schema[TARGET_COLUMN_KEY]
169 | 
170 | 
171 |             logging.info(f"Splitting input and target feature from training and testing dataframe.")
172 |             input_feature_train_df = train_df.drop(columns=[target_column_name],axis=1)
173 |             target_feature_train_df = train_df[target_column_name]
174 | 
175 |             input_feature_test_df = test_df.drop(columns=[target_column_name],axis=1)
176 |             target_feature_test_df = test_df[target_column_name]
177 |             
178 | 
179 |             logging.info(f"Applying preprocessing object on training dataframe and testing dataframe")
180 |             input_feature_train_arr=preprocessing_obj.fit_transform(input_feature_train_df)
181 |             input_feature_test_arr = preprocessing_obj.transform(input_feature_test_df)
182 | 
183 | 
184 |             train_arr = np.c_[ input_feature_train_arr, np.array(target_feature_train_df)]
185 | 
186 |             test_arr = np.c_[input_feature_test_arr, np.array(target_feature_test_df)]
187 |             
188 |             transformed_train_dir = self.data_transformation_config.transformed_train_dir
189 |             transformed_test_dir = self.data_transformation_config.transformed_test_dir
190 | 
191 |             train_file_name = os.path.basename(train_file_path).replace(".csv",".npz")
192 |             test_file_name = os.path.basename(test_file_path).replace(".csv",".npz")
193 | 
194 |             transformed_train_file_path = os.path.join(transformed_train_dir, train_file_name)
195 |             transformed_test_file_path = os.path.join(transformed_test_dir, test_file_name)
196 | 
197 |             logging.info(f"Saving transformed training and testing array.")
198 |             
199 |             save_numpy_array_data(file_path=transformed_train_file_path,array=train_arr)
200 |             save_numpy_array_data(file_path=transformed_test_file_path,array=test_arr)
201 | 
202 |             preprocessing_obj_file_path = self.data_transformation_config.preprocessed_object_file_path
203 | 
204 |             logging.info(f"Saving preprocessing object.")
205 |             save_object(file_path=preprocessing_obj_file_path,obj=preprocessing_obj)
206 | 
207 |             data_transformation_artifact = DataTransformationArtifact(is_transformed=True,
208 |             message="Data transformation successfull.",
209 |             transformed_train_file_path=transformed_train_file_path,
210 |             transformed_test_file_path=transformed_test_file_path,
211 |             preprocessed_object_file_path=preprocessing_obj_file_path
212 | 
213 |             )
214 |             logging.info(f"Data transformationa artifact: {data_transformation_artifact}")
215 |             return data_transformation_artifact
216 |         except Exception as e:
217 |             raise HousingException(e,sys) from e
218 | 
219 |     def __del__(self):
220 |         logging.info(f"{'>>'*30}Data Transformation log completed.{'<<'*30} \n\n")
221 | 


--------------------------------------------------------------------------------
/housing/config/configuration.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from housing.entity.config_entity import DataIngestionConfig, DataTransformationConfig,DataValidationConfig,   \
  3 | ModelTrainerConfig,ModelEvaluationConfig,ModelPusherConfig,TrainingPipelineConfig
  4 | from housing.util.util import read_yaml_file
  5 | from housing.logger import logging
  6 | import sys,os
  7 | from housing.constant import *
  8 | from housing.exception import HousingException
  9 | 
 10 | 
 11 | class Configuartion:
 12 | 
 13 |     def __init__(self,
 14 |         config_file_path:str =CONFIG_FILE_PATH,
 15 |         current_time_stamp:str = CURRENT_TIME_STAMP
 16 |         ) -> None:
 17 |         try:
 18 |             self.config_info  = read_yaml_file(file_path=config_file_path)
 19 |             self.training_pipeline_config = self.get_training_pipeline_config()
 20 |             self.time_stamp = current_time_stamp
 21 |         except Exception as e:
 22 |             raise HousingException(e,sys) from e
 23 | 
 24 | 
 25 |     def get_data_ingestion_config(self) ->DataIngestionConfig:
 26 |         try:
 27 |             artifact_dir = self.training_pipeline_config.artifact_dir
 28 |             data_ingestion_artifact_dir=os.path.join(
 29 |                 artifact_dir,
 30 |                 DATA_INGESTION_ARTIFACT_DIR,
 31 |                 self.time_stamp
 32 |             )
 33 |             data_ingestion_info = self.config_info[DATA_INGESTION_CONFIG_KEY]
 34 |             
 35 |             dataset_download_url = data_ingestion_info[DATA_INGESTION_DOWNLOAD_URL_KEY]
 36 |             tgz_download_dir = os.path.join(
 37 |                 data_ingestion_artifact_dir,
 38 |                 data_ingestion_info[DATA_INGESTION_TGZ_DOWNLOAD_DIR_KEY]
 39 |             )
 40 |             raw_data_dir = os.path.join(data_ingestion_artifact_dir,
 41 |             data_ingestion_info[DATA_INGESTION_RAW_DATA_DIR_KEY]
 42 |             )
 43 | 
 44 |             ingested_data_dir = os.path.join(
 45 |                 data_ingestion_artifact_dir,
 46 |                 data_ingestion_info[DATA_INGESTION_INGESTED_DIR_NAME_KEY]
 47 |             )
 48 |             ingested_train_dir = os.path.join(
 49 |                 ingested_data_dir,
 50 |                 data_ingestion_info[DATA_INGESTION_TRAIN_DIR_KEY]
 51 |             )
 52 |             ingested_test_dir =os.path.join(
 53 |                 ingested_data_dir,
 54 |                 data_ingestion_info[DATA_INGESTION_TEST_DIR_KEY]
 55 |             )
 56 | 
 57 | 
 58 |             data_ingestion_config=DataIngestionConfig(
 59 |                 dataset_download_url=dataset_download_url, 
 60 |                 tgz_download_dir=tgz_download_dir, 
 61 |                 raw_data_dir=raw_data_dir, 
 62 |                 ingested_train_dir=ingested_train_dir, 
 63 |                 ingested_test_dir=ingested_test_dir
 64 |             )
 65 |             logging.info(f"Data Ingestion config: {data_ingestion_config}")
 66 |             return data_ingestion_config
 67 |         except Exception as e:
 68 |             raise HousingException(e,sys) from e
 69 | 
 70 |     def get_data_validation_config(self) -> DataValidationConfig:
 71 |         try:
 72 |             artifact_dir = self.training_pipeline_config.artifact_dir
 73 | 
 74 |             data_validation_artifact_dir=os.path.join(
 75 |                 artifact_dir,
 76 |                 DATA_VALIDATION_ARTIFACT_DIR_NAME,
 77 |                 self.time_stamp
 78 |             )
 79 |             data_validation_config = self.config_info[DATA_VALIDATION_CONFIG_KEY]
 80 | 
 81 | 
 82 |             schema_file_path = os.path.join(ROOT_DIR,
 83 |             data_validation_config[DATA_VALIDATION_SCHEMA_DIR_KEY],
 84 |             data_validation_config[DATA_VALIDATION_SCHEMA_FILE_NAME_KEY]
 85 |             )
 86 | 
 87 |             report_file_path = os.path.join(data_validation_artifact_dir,
 88 |             data_validation_config[DATA_VALIDATION_REPORT_FILE_NAME_KEY]
 89 |             )
 90 | 
 91 |             report_page_file_path = os.path.join(data_validation_artifact_dir,
 92 |             data_validation_config[DATA_VALIDATION_REPORT_PAGE_FILE_NAME_KEY]
 93 | 
 94 |             )
 95 | 
 96 |             data_validation_config = DataValidationConfig(
 97 |                 schema_file_path=schema_file_path,
 98 |                 report_file_path=report_file_path,
 99 |                 report_page_file_path=report_page_file_path,
100 |             )
101 |             return data_validation_config
102 |         except Exception as e:
103 |             raise HousingException(e,sys) from e
104 | 
105 |     def get_data_transformation_config(self) -> DataTransformationConfig:
106 |         try:
107 |             artifact_dir = self.training_pipeline_config.artifact_dir
108 | 
109 |             data_transformation_artifact_dir=os.path.join(
110 |                 artifact_dir,
111 |                 DATA_TRANSFORMATION_ARTIFACT_DIR,
112 |                 self.time_stamp
113 |             )
114 | 
115 |             data_transformation_config_info=self.config_info[DATA_TRANSFORMATION_CONFIG_KEY]
116 | 
117 |             add_bedroom_per_room=data_transformation_config_info[DATA_TRANSFORMATION_ADD_BEDROOM_PER_ROOM_KEY]
118 | 
119 | 
120 |             preprocessed_object_file_path = os.path.join(
121 |                 data_transformation_artifact_dir,
122 |                 data_transformation_config_info[DATA_TRANSFORMATION_PREPROCESSING_DIR_KEY],
123 |                 data_transformation_config_info[DATA_TRANSFORMATION_PREPROCESSED_FILE_NAME_KEY]
124 |             )
125 | 
126 |             
127 |             transformed_train_dir=os.path.join(
128 |             data_transformation_artifact_dir,
129 |             data_transformation_config_info[DATA_TRANSFORMATION_DIR_NAME_KEY],
130 |             data_transformation_config_info[DATA_TRANSFORMATION_TRAIN_DIR_NAME_KEY]
131 |             )
132 | 
133 | 
134 |             transformed_test_dir = os.path.join(
135 |             data_transformation_artifact_dir,
136 |             data_transformation_config_info[DATA_TRANSFORMATION_DIR_NAME_KEY],
137 |             data_transformation_config_info[DATA_TRANSFORMATION_TEST_DIR_NAME_KEY]
138 | 
139 |             )
140 |             
141 | 
142 |             data_transformation_config=DataTransformationConfig(
143 |                 add_bedroom_per_room=add_bedroom_per_room,
144 |                 preprocessed_object_file_path=preprocessed_object_file_path,
145 |                 transformed_train_dir=transformed_train_dir,
146 |                 transformed_test_dir=transformed_test_dir
147 |             )
148 | 
149 |             logging.info(f"Data transformation config: {data_transformation_config}")
150 |             return data_transformation_config
151 |         except Exception as e:
152 |             raise HousingException(e,sys) from e
153 | 
154 |     def get_model_trainer_config(self) -> ModelTrainerConfig:
155 |         try:
156 |             artifact_dir = self.training_pipeline_config.artifact_dir
157 | 
158 |             model_trainer_artifact_dir=os.path.join(
159 |                 artifact_dir,
160 |                 MODEL_TRAINER_ARTIFACT_DIR,
161 |                 self.time_stamp
162 |             )
163 |             model_trainer_config_info = self.config_info[MODEL_TRAINER_CONFIG_KEY]
164 |             trained_model_file_path = os.path.join(model_trainer_artifact_dir,
165 |             model_trainer_config_info[MODEL_TRAINER_TRAINED_MODEL_DIR_KEY],
166 |             model_trainer_config_info[MODEL_TRAINER_TRAINED_MODEL_FILE_NAME_KEY]
167 |             )
168 | 
169 |             model_config_file_path = os.path.join(model_trainer_config_info[MODEL_TRAINER_MODEL_CONFIG_DIR_KEY],
170 |             model_trainer_config_info[MODEL_TRAINER_MODEL_CONFIG_FILE_NAME_KEY]
171 |             )
172 | 
173 |             base_accuracy = model_trainer_config_info[MODEL_TRAINER_BASE_ACCURACY_KEY]
174 | 
175 |             model_trainer_config = ModelTrainerConfig(
176 |                 trained_model_file_path=trained_model_file_path,
177 |                 base_accuracy=base_accuracy,
178 |                 model_config_file_path=model_config_file_path
179 |             )
180 |             logging.info(f"Model trainer config: {model_trainer_config}")
181 |             return model_trainer_config
182 |         except Exception as e:
183 |             raise HousingException(e,sys) from e
184 | 
185 |     def get_model_evaluation_config(self) ->ModelEvaluationConfig:
186 |         try:
187 |             model_evaluation_config = self.config_info[MODEL_EVALUATION_CONFIG_KEY]
188 |             artifact_dir = os.path.join(self.training_pipeline_config.artifact_dir,
189 |                                         MODEL_EVALUATION_ARTIFACT_DIR, )
190 | 
191 |             model_evaluation_file_path = os.path.join(artifact_dir,
192 |                                                     model_evaluation_config[MODEL_EVALUATION_FILE_NAME_KEY])
193 |             response = ModelEvaluationConfig(model_evaluation_file_path=model_evaluation_file_path,
194 |                                             time_stamp=self.time_stamp)
195 |             
196 |             
197 |             logging.info(f"Model Evaluation Config: {response}.")
198 |             return response
199 |         except Exception as e:
200 |             raise HousingException(e,sys) from e
201 | 
202 | 
203 |     def get_model_pusher_config(self) -> ModelPusherConfig:
204 |         try:
205 |             time_stamp = f"{datetime.now().strftime('%Y%m%d%H%M%S')}"
206 |             model_pusher_config_info = self.config_info[MODEL_PUSHER_CONFIG_KEY]
207 |             export_dir_path = os.path.join(ROOT_DIR, model_pusher_config_info[MODEL_PUSHER_MODEL_EXPORT_DIR_KEY],
208 |                                            time_stamp)
209 | 
210 |             model_pusher_config = ModelPusherConfig(export_dir_path=export_dir_path)
211 |             logging.info(f"Model pusher config {model_pusher_config}")
212 |             return model_pusher_config
213 | 
214 |         except Exception as e:
215 |             raise HousingException(e,sys) from e
216 | 
217 |     def get_training_pipeline_config(self) ->TrainingPipelineConfig:
218 |         try:
219 |             training_pipeline_config = self.config_info[TRAINING_PIPELINE_CONFIG_KEY]
220 |             artifact_dir = os.path.join(ROOT_DIR,
221 |             training_pipeline_config[TRAINING_PIPELINE_NAME_KEY],
222 |             training_pipeline_config[TRAINING_PIPELINE_ARTIFACT_DIR_KEY]
223 |             )
224 | 
225 |             training_pipeline_config = TrainingPipelineConfig(artifact_dir=artifact_dir)
226 |             logging.info(f"Training pipleine config: {training_pipeline_config}")
227 |             return training_pipeline_config
228 |         except Exception as e:
229 |             raise HousingException(e,sys) from e


--------------------------------------------------------------------------------
/notebook/log.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "f=\"/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/logs/log_2022-07-06-19-40-36.log\""
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 10,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": []
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 21,
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "data": {
 35 |       "text/html": [
 36 |        "<div>\n",
 37 |        "<style scoped>\n",
 38 |        "    .dataframe tbody tr th:only-of-type {\n",
 39 |        "        vertical-align: middle;\n",
 40 |        "    }\n",
 41 |        "\n",
 42 |        "    .dataframe tbody tr th {\n",
 43 |        "        vertical-align: top;\n",
 44 |        "    }\n",
 45 |        "\n",
 46 |        "    .dataframe thead th {\n",
 47 |        "        text-align: right;\n",
 48 |        "    }\n",
 49 |        "</style>\n",
 50 |        "<table border=\"1\" class=\"dataframe\">\n",
 51 |        "  <thead>\n",
 52 |        "    <tr style=\"text-align: right;\">\n",
 53 |        "      <th></th>\n",
 54 |        "      <th>Time stamp</th>\n",
 55 |        "      <th>Log Level</th>\n",
 56 |        "      <th>line number</th>\n",
 57 |        "      <th>file name</th>\n",
 58 |        "      <th>function name</th>\n",
 59 |        "      <th>message</th>\n",
 60 |        "    </tr>\n",
 61 |        "  </thead>\n",
 62 |        "  <tbody>\n",
 63 |        "    <tr>\n",
 64 |        "      <th>0</th>\n",
 65 |        "      <td>[2022-07-06 19:40:36,738]</td>\n",
 66 |        "      <td>INFO</td>\n",
 67 |        "      <td>226</td>\n",
 68 |        "      <td>configuration.py</td>\n",
 69 |        "      <td>get_training_pipeline_config()</td>\n",
 70 |        "      <td>Training pipleine config: TrainingPipelineConf...</td>\n",
 71 |        "    </tr>\n",
 72 |        "    <tr>\n",
 73 |        "      <th>1</th>\n",
 74 |        "      <td>[2022-07-06 19:40:36,743]</td>\n",
 75 |        "      <td>INFO</td>\n",
 76 |        "      <td>224</td>\n",
 77 |        "      <td>_internal.py</td>\n",
 78 |        "      <td>_log()</td>\n",
 79 |        "      <td>* Running on http://127.0.0.1:5000 (Press CTR...</td>\n",
 80 |        "    </tr>\n",
 81 |        "    <tr>\n",
 82 |        "      <th>2</th>\n",
 83 |        "      <td>[2022-07-06 19:40:42,739]</td>\n",
 84 |        "      <td>INFO</td>\n",
 85 |        "      <td>224</td>\n",
 86 |        "      <td>_internal.py</td>\n",
 87 |        "      <td>_log()</td>\n",
 88 |        "      <td>127.0.0.1 - - [06/Jul/2022 19:40:42] \"GET / HT...</td>\n",
 89 |        "    </tr>\n",
 90 |        "    <tr>\n",
 91 |        "      <th>3</th>\n",
 92 |        "      <td>[2022-07-06 19:40:45,899]</td>\n",
 93 |        "      <td>INFO</td>\n",
 94 |        "      <td>224</td>\n",
 95 |        "      <td>_internal.py</td>\n",
 96 |        "      <td>_log()</td>\n",
 97 |        "      <td>127.0.0.1 - - [06/Jul/2022 19:40:45] \"GET /vie...</td>\n",
 98 |        "    </tr>\n",
 99 |        "    <tr>\n",
100 |        "      <th>4</th>\n",
101 |        "      <td>[2022-07-06 19:40:50,204]</td>\n",
102 |        "      <td>INFO</td>\n",
103 |        "      <td>226</td>\n",
104 |        "      <td>configuration.py</td>\n",
105 |        "      <td>get_training_pipeline_config()</td>\n",
106 |        "      <td>Training pipleine config: TrainingPipelineConf...</td>\n",
107 |        "    </tr>\n",
108 |        "    <tr>\n",
109 |        "      <th>...</th>\n",
110 |        "      <td>...</td>\n",
111 |        "      <td>...</td>\n",
112 |        "      <td>...</td>\n",
113 |        "      <td>...</td>\n",
114 |        "      <td>...</td>\n",
115 |        "      <td>...</td>\n",
116 |        "    </tr>\n",
117 |        "    <tr>\n",
118 |        "      <th>134</th>\n",
119 |        "      <td>[2022-07-06 19:40:55,171]</td>\n",
120 |        "      <td>INFO</td>\n",
121 |        "      <td>38</td>\n",
122 |        "      <td>model_pusher.py</td>\n",
123 |        "      <td>export_model()</td>\n",
124 |        "      <td>Model pusher artifact: [ModelPusherArtifact(is...</td>\n",
125 |        "    </tr>\n",
126 |        "    <tr>\n",
127 |        "      <th>135</th>\n",
128 |        "      <td>[2022-07-06 19:40:55,171]</td>\n",
129 |        "      <td>INFO</td>\n",
130 |        "      <td>50</td>\n",
131 |        "      <td>model_pusher.py</td>\n",
132 |        "      <td>__del__()</td>\n",
133 |        "      <td>&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;Model ...</td>\n",
134 |        "    </tr>\n",
135 |        "    <tr>\n",
136 |        "      <th>136</th>\n",
137 |        "      <td>[2022-07-06 19:40:55,171]</td>\n",
138 |        "      <td>INFO</td>\n",
139 |        "      <td>149</td>\n",
140 |        "      <td>pipeline.py</td>\n",
141 |        "      <td>run_pipeline()</td>\n",
142 |        "      <td>Model pusher artifact: ModelPusherArtifact(is_...</td>\n",
143 |        "    </tr>\n",
144 |        "    <tr>\n",
145 |        "      <th>137</th>\n",
146 |        "      <td>[2022-07-06 19:40:55,171]</td>\n",
147 |        "      <td>INFO</td>\n",
148 |        "      <td>152</td>\n",
149 |        "      <td>pipeline.py</td>\n",
150 |        "      <td>run_pipeline()</td>\n",
151 |        "      <td>Pipeline completed.\\n</td>\n",
152 |        "    </tr>\n",
153 |        "    <tr>\n",
154 |        "      <th>138</th>\n",
155 |        "      <td>[2022-07-06 19:40:55,171]</td>\n",
156 |        "      <td>INFO</td>\n",
157 |        "      <td>166</td>\n",
158 |        "      <td>pipeline.py</td>\n",
159 |        "      <td>run_pipeline()</td>\n",
160 |        "      <td>Pipeline experiment: Experiment(experiment_id=...</td>\n",
161 |        "    </tr>\n",
162 |        "  </tbody>\n",
163 |        "</table>\n",
164 |        "<p>139 rows × 6 columns</p>\n",
165 |        "</div>"
166 |       ],
167 |       "text/plain": [
168 |        "                    Time stamp Log Level line number         file name  \\\n",
169 |        "0    [2022-07-06 19:40:36,738]      INFO         226  configuration.py   \n",
170 |        "1    [2022-07-06 19:40:36,743]      INFO         224      _internal.py   \n",
171 |        "2    [2022-07-06 19:40:42,739]      INFO         224      _internal.py   \n",
172 |        "3    [2022-07-06 19:40:45,899]      INFO         224      _internal.py   \n",
173 |        "4    [2022-07-06 19:40:50,204]      INFO         226  configuration.py   \n",
174 |        "..                         ...       ...         ...               ...   \n",
175 |        "134  [2022-07-06 19:40:55,171]      INFO          38   model_pusher.py   \n",
176 |        "135  [2022-07-06 19:40:55,171]      INFO          50   model_pusher.py   \n",
177 |        "136  [2022-07-06 19:40:55,171]      INFO         149       pipeline.py   \n",
178 |        "137  [2022-07-06 19:40:55,171]      INFO         152       pipeline.py   \n",
179 |        "138  [2022-07-06 19:40:55,171]      INFO         166       pipeline.py   \n",
180 |        "\n",
181 |        "                      function name  \\\n",
182 |        "0    get_training_pipeline_config()   \n",
183 |        "1                            _log()   \n",
184 |        "2                            _log()   \n",
185 |        "3                            _log()   \n",
186 |        "4    get_training_pipeline_config()   \n",
187 |        "..                              ...   \n",
188 |        "134                  export_model()   \n",
189 |        "135                       __del__()   \n",
190 |        "136                  run_pipeline()   \n",
191 |        "137                  run_pipeline()   \n",
192 |        "138                  run_pipeline()   \n",
193 |        "\n",
194 |        "                                               message  \n",
195 |        "0    Training pipleine config: TrainingPipelineConf...  \n",
196 |        "1     * Running on http://127.0.0.1:5000 (Press CTR...  \n",
197 |        "2    127.0.0.1 - - [06/Jul/2022 19:40:42] \"GET / HT...  \n",
198 |        "3    127.0.0.1 - - [06/Jul/2022 19:40:45] \"GET /vie...  \n",
199 |        "4    Training pipleine config: TrainingPipelineConf...  \n",
200 |        "..                                                 ...  \n",
201 |        "134  Model pusher artifact: [ModelPusherArtifact(is...  \n",
202 |        "135  >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Model ...  \n",
203 |        "136  Model pusher artifact: ModelPusherArtifact(is_...  \n",
204 |        "137                              Pipeline completed.\\n  \n",
205 |        "138  Pipeline experiment: Experiment(experiment_id=...  \n",
206 |        "\n",
207 |        "[139 rows x 6 columns]"
208 |       ]
209 |      },
210 |      "execution_count": 21,
211 |      "metadata": {},
212 |      "output_type": "execute_result"
213 |     }
214 |    ],
215 |    "source": []
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 20,
220 |    "metadata": {},
221 |    "outputs": [],
222 |    "source": []
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "metadata": {},
228 |    "outputs": [],
229 |    "source": []
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 12,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": [
237 |     "df=pd.DataFrame(data)"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 17,
243 |    "metadata": {},
244 |    "outputs": [],
245 |    "source": []
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": 18,
250 |    "metadata": {},
251 |    "outputs": [],
252 |    "source": [
253 |     "df.columns=columns"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {},
260 |    "outputs": [],
261 |    "source": []
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": null,
266 |    "metadata": {},
267 |    "outputs": [],
268 |    "source": []
269 |   }
270 |  ],
271 |  "metadata": {
272 |   "kernelspec": {
273 |    "display_name": "Python 3.7.0 (conda)",
274 |    "language": "python",
275 |    "name": "python3"
276 |   },
277 |   "language_info": {
278 |    "codemirror_mode": {
279 |     "name": "ipython",
280 |     "version": 3
281 |    },
282 |    "file_extension": ".py",
283 |    "mimetype": "text/x-python",
284 |    "name": "python",
285 |    "nbconvert_exporter": "python",
286 |    "pygments_lexer": "ipython3",
287 |    "version": "3.7.0"
288 |   },
289 |   "orig_nbformat": 4,
290 |   "vscode": {
291 |    "interpreter": {
292 |     "hash": "fc6fa6e48c86001677d15bc9af4f846353042d089527ab27e7c7a4474d3b154b"
293 |    }
294 |   }
295 |  },
296 |  "nbformat": 4,
297 |  "nbformat_minor": 2
298 | }
299 | 


--------------------------------------------------------------------------------
/housing/pipeline/pipeline.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | from datetime import datetime
  3 | import uuid
  4 | from housing.config.configuration import Configuartion
  5 | from housing.logger import logging, get_log_file_name
  6 | from housing.exception import HousingException
  7 | from threading import Thread
  8 | from typing import List
  9 | 
 10 | from multiprocessing import Process
 11 | from housing.entity.artifact_entity import ModelPusherArtifact, DataIngestionArtifact, ModelEvaluationArtifact
 12 | from housing.entity.artifact_entity import DataValidationArtifact, DataTransformationArtifact, ModelTrainerArtifact
 13 | from housing.entity.config_entity import DataIngestionConfig, ModelEvaluationConfig
 14 | from housing.component.data_ingestion import DataIngestion
 15 | from housing.component.data_validation import DataValidation
 16 | from housing.component.data_transformation import DataTransformation
 17 | from housing.component.model_trainer import ModelTrainer
 18 | from housing.component.model_evaluation import ModelEvaluation
 19 | from housing.component.model_pusher import ModelPusher
 20 | import os, sys
 21 | from collections import namedtuple
 22 | from datetime import datetime
 23 | import pandas as pd
 24 | from housing.constant import EXPERIMENT_DIR_NAME, EXPERIMENT_FILE_NAME
 25 | 
 26 | Experiment = namedtuple("Experiment", ["experiment_id", "initialization_timestamp", "artifact_time_stamp",
 27 |                                        "running_status", "start_time", "stop_time", "execution_time", "message",
 28 |                                        "experiment_file_path", "accuracy", "is_model_accepted"])
 29 | 
 30 | 
 31 | 
 32 | 
 33 | 
 34 | class Pipeline(Thread):
 35 |     experiment: Experiment = Experiment(*([None] * 11))
 36 |     experiment_file_path = None
 37 | 
 38 |     def __init__(self, config: Configuartion ) -> None:
 39 |         try:
 40 |             os.makedirs(config.training_pipeline_config.artifact_dir, exist_ok=True)
 41 |             Pipeline.experiment_file_path=os.path.join(config.training_pipeline_config.artifact_dir,EXPERIMENT_DIR_NAME, EXPERIMENT_FILE_NAME)
 42 |             super().__init__(daemon=False, name="pipeline")
 43 |             self.config = config
 44 |         except Exception as e:
 45 |             raise HousingException(e, sys) from e
 46 | 
 47 |     def start_data_ingestion(self) -> DataIngestionArtifact:
 48 |         try:
 49 |             data_ingestion = DataIngestion(data_ingestion_config=self.config.get_data_ingestion_config())
 50 |             return data_ingestion.initiate_data_ingestion()
 51 |         except Exception as e:
 52 |             raise HousingException(e, sys) from e
 53 | 
 54 |     def start_data_validation(self, data_ingestion_artifact: DataIngestionArtifact) \
 55 |             -> DataValidationArtifact:
 56 |         try:
 57 |             data_validation = DataValidation(data_validation_config=self.config.get_data_validation_config(),
 58 |                                              data_ingestion_artifact=data_ingestion_artifact
 59 |                                              )
 60 |             return data_validation.initiate_data_validation()
 61 |         except Exception as e:
 62 |             raise HousingException(e, sys) from e
 63 | 
 64 |     def start_data_transformation(self,
 65 |                                   data_ingestion_artifact: DataIngestionArtifact,
 66 |                                   data_validation_artifact: DataValidationArtifact
 67 |                                   ) -> DataTransformationArtifact:
 68 |         try:
 69 |             data_transformation = DataTransformation(
 70 |                 data_transformation_config=self.config.get_data_transformation_config(),
 71 |                 data_ingestion_artifact=data_ingestion_artifact,
 72 |                 data_validation_artifact=data_validation_artifact
 73 |             )
 74 |             return data_transformation.initiate_data_transformation()
 75 |         except Exception as e:
 76 |             raise HousingException(e, sys)
 77 | 
 78 |     def start_model_trainer(self, data_transformation_artifact: DataTransformationArtifact) -> ModelTrainerArtifact:
 79 |         try:
 80 |             model_trainer = ModelTrainer(model_trainer_config=self.config.get_model_trainer_config(),
 81 |                                          data_transformation_artifact=data_transformation_artifact
 82 |                                          )
 83 |             return model_trainer.initiate_model_trainer()
 84 |         except Exception as e:
 85 |             raise HousingException(e, sys) from e
 86 | 
 87 |     def start_model_evaluation(self, data_ingestion_artifact: DataIngestionArtifact,
 88 |                                data_validation_artifact: DataValidationArtifact,
 89 |                                model_trainer_artifact: ModelTrainerArtifact) -> ModelEvaluationArtifact:
 90 |         try:
 91 |             model_eval = ModelEvaluation(
 92 |                 model_evaluation_config=self.config.get_model_evaluation_config(),
 93 |                 data_ingestion_artifact=data_ingestion_artifact,
 94 |                 data_validation_artifact=data_validation_artifact,
 95 |                 model_trainer_artifact=model_trainer_artifact)
 96 |             return model_eval.initiate_model_evaluation()
 97 |         except Exception as e:
 98 |             raise HousingException(e, sys) from e
 99 | 
100 |     def start_model_pusher(self, model_eval_artifact: ModelEvaluationArtifact) -> ModelPusherArtifact:
101 |         try:
102 |             model_pusher = ModelPusher(
103 |                 model_pusher_config=self.config.get_model_pusher_config(),
104 |                 model_evaluation_artifact=model_eval_artifact
105 |             )
106 |             return model_pusher.initiate_model_pusher()
107 |         except Exception as e:
108 |             raise HousingException(e, sys) from e
109 | 
110 |     def run_pipeline(self):
111 |         try:
112 |             if Pipeline.experiment.running_status:
113 |                 logging.info("Pipeline is already running")
114 |                 return Pipeline.experiment
115 |             # data ingestion
116 |             logging.info("Pipeline starting.")
117 | 
118 |             experiment_id = str(uuid.uuid4())
119 | 
120 |             Pipeline.experiment = Experiment(experiment_id=experiment_id,
121 |                                              initialization_timestamp=self.config.time_stamp,
122 |                                              artifact_time_stamp=self.config.time_stamp,
123 |                                              running_status=True,
124 |                                              start_time=datetime.now(),
125 |                                              stop_time=None,
126 |                                              execution_time=None,
127 |                                              experiment_file_path=Pipeline.experiment_file_path,
128 |                                              is_model_accepted=None,
129 |                                              message="Pipeline has been started.",
130 |                                              accuracy=None,
131 |                                              )
132 |             logging.info(f"Pipeline experiment: {Pipeline.experiment}")
133 | 
134 |             self.save_experiment()
135 | 
136 |             data_ingestion_artifact = self.start_data_ingestion()
137 |             data_validation_artifact = self.start_data_validation(data_ingestion_artifact=data_ingestion_artifact)
138 |             data_transformation_artifact = self.start_data_transformation(
139 |                 data_ingestion_artifact=data_ingestion_artifact,
140 |                 data_validation_artifact=data_validation_artifact
141 |             )
142 |             model_trainer_artifact = self.start_model_trainer(data_transformation_artifact=data_transformation_artifact)
143 | 
144 |             model_evaluation_artifact = self.start_model_evaluation(data_ingestion_artifact=data_ingestion_artifact,
145 |                                                                     data_validation_artifact=data_validation_artifact,
146 |                                                                     model_trainer_artifact=model_trainer_artifact)
147 | 
148 |             if model_evaluation_artifact.is_model_accepted:
149 |                 model_pusher_artifact = self.start_model_pusher(model_eval_artifact=model_evaluation_artifact)
150 |                 logging.info(f'Model pusher artifact: {model_pusher_artifact}')
151 |             else:
152 |                 logging.info("Trained model rejected.")
153 |             logging.info("Pipeline completed.")
154 | 
155 |             stop_time = datetime.now()
156 |             Pipeline.experiment = Experiment(experiment_id=Pipeline.experiment.experiment_id,
157 |                                              initialization_timestamp=self.config.time_stamp,
158 |                                              artifact_time_stamp=self.config.time_stamp,
159 |                                              running_status=False,
160 |                                              start_time=Pipeline.experiment.start_time,
161 |                                              stop_time=stop_time,
162 |                                              execution_time=stop_time - Pipeline.experiment.start_time,
163 |                                              message="Pipeline has been completed.",
164 |                                              experiment_file_path=Pipeline.experiment_file_path,
165 |                                              is_model_accepted=model_evaluation_artifact.is_model_accepted,
166 |                                              accuracy=model_trainer_artifact.model_accuracy
167 |                                              )
168 |             logging.info(f"Pipeline experiment: {Pipeline.experiment}")
169 |             self.save_experiment()
170 |         except Exception as e:
171 |             raise HousingException(e, sys) from e
172 | 
173 |     def run(self):
174 |         try:
175 |             self.run_pipeline()
176 |         except Exception as e:
177 |             raise e
178 | 
179 |     def save_experiment(self):
180 |         try:
181 |             if Pipeline.experiment.experiment_id is not None:
182 |                 experiment = Pipeline.experiment
183 |                 experiment_dict = experiment._asdict()
184 |                 experiment_dict: dict = {key: [value] for key, value in experiment_dict.items()}
185 | 
186 |                 experiment_dict.update({
187 |                     "created_time_stamp": [datetime.now()],
188 |                     "experiment_file_path": [os.path.basename(Pipeline.experiment.experiment_file_path)]})
189 | 
190 |                 experiment_report = pd.DataFrame(experiment_dict)
191 | 
192 |                 os.makedirs(os.path.dirname(Pipeline.experiment_file_path), exist_ok=True)
193 |                 if os.path.exists(Pipeline.experiment_file_path):
194 |                     experiment_report.to_csv(Pipeline.experiment_file_path, index=False, header=False, mode="a")
195 |                 else:
196 |                     experiment_report.to_csv(Pipeline.experiment_file_path, mode="w", index=False, header=True)
197 |             else:
198 |                 print("First start experiment")
199 |         except Exception as e:
200 |             raise HousingException(e, sys) from e
201 | 
202 |     @classmethod
203 |     def get_experiments_status(cls, limit: int = 5) -> pd.DataFrame:
204 |         try:
205 |             if os.path.exists(Pipeline.experiment_file_path):
206 |                 df = pd.read_csv(Pipeline.experiment_file_path)
207 |                 limit = -1 * int(limit)
208 |                 return df[limit:].drop(columns=["experiment_file_path", "initialization_timestamp"], axis=1)
209 |             else:
210 |                 return pd.DataFrame()
211 |         except Exception as e:
212 |             raise HousingException(e, sys) from e
213 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/notebook/model_training.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from housing.entity.model_factory import ModelFactory,get_sample_model_config_yaml_file"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 5,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "data": {
 19 |       "text/plain": [
 20 |        "'d:\\\\Project\\\\machine_learning_project\\\\notebook'"
 21 |       ]
 22 |      },
 23 |      "execution_count": 5,
 24 |      "metadata": {},
 25 |      "output_type": "execute_result"
 26 |     }
 27 |    ],
 28 |    "source": [
 29 |     "os.getcwd()"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 4,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "data": {
 39 |       "text/plain": [
 40 |        "'config\\\\model.yaml'"
 41 |       ]
 42 |      },
 43 |      "execution_count": 4,
 44 |      "metadata": {},
 45 |      "output_type": "execute_result"
 46 |     }
 47 |    ],
 48 |    "source": [
 49 |     "get_sample_model_config_yaml_file(export_dir=\"config\")"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 6,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "from sklearn.linear_model import LinearRegression"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "LinearRegression()"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 1,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "model_config_file=r\"D:\\Project\\machine_learning_project\\notebook\\config\\model.yaml\""
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 2,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "from neuro_mf import ModelFactory"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 3,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "model_factory = ModelFactory(model_config_path=model_config_file)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 9,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "{'fit_intercept': True}\n",
107 |       "{'n_estimators': 40, 'min_samples_leaf': 2}\n"
108 |      ]
109 |     }
110 |    ],
111 |    "source": [
112 |     "model_list = model_factory.get_initialized_model_list()"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 11,
118 |    "metadata": {},
119 |    "outputs": [
120 |     {
121 |      "data": {
122 |       "text/plain": [
123 |        "2"
124 |       ]
125 |      },
126 |      "execution_count": 11,
127 |      "metadata": {},
128 |      "output_type": "execute_result"
129 |     }
130 |    ],
131 |    "source": [
132 |     "len(model_list)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 13,
138 |    "metadata": {},
139 |    "outputs": [
140 |     {
141 |      "data": {
142 |       "text/plain": [
143 |        "InitializedModelDetail(model_serial_number='module_1', model=RandomForestRegressor(min_samples_leaf=2, n_estimators=40), param_grid_search={'min_samples_leaf': [2, 4, 6], 'n_estimators': [50, 100, 80]}, model_name='sklearn.ensemble.RandomForestRegressor')"
144 |       ]
145 |      },
146 |      "execution_count": 13,
147 |      "metadata": {},
148 |      "output_type": "execute_result"
149 |     }
150 |    ],
151 |    "source": [
152 |     "model_list[1]"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 4,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "from housing.util.util import load_numpy_array_data"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 5,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "data_file_path=r\"D:\\Project\\machine_learning_project\\housing\\artifact\\data_transformation\\2022-07-03-13-23-39\\transformed_data\\train\\housing.npz\""
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 6,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "data = load_numpy_array_data(data_file_path)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 7,
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": [
188 |     "x,y = data[:,:-1],data[:,-1]"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 9,
194 |    "metadata": {},
195 |    "outputs": [
196 |     {
197 |      "data": {
198 |       "text/plain": [
199 |        "[GridSearchedBestModel(model_serial_number='module_0', model=LinearRegression(), best_model=LinearRegression(fit_intercept=False), best_parameters={'fit_intercept': False}, best_score=0.6393153733826),\n",
200 |        " GridSearchedBestModel(model_serial_number='module_1', model=RandomForestRegressor(min_samples_leaf=2, n_estimators=40), best_model=RandomForestRegressor(min_samples_leaf=2, n_estimators=80), best_parameters={'min_samples_leaf': 2, 'n_estimators': 80}, best_score=0.8050101845299591)]"
201 |       ]
202 |      },
203 |      "execution_count": 9,
204 |      "metadata": {},
205 |      "output_type": "execute_result"
206 |     }
207 |    ],
208 |    "source": [
209 |     "model_factory.grid_searched_best_model_list"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 10,
215 |    "metadata": {},
216 |    "outputs": [
217 |     {
218 |      "name": "stdout",
219 |      "output_type": "stream",
220 |      "text": [
221 |       "Fitting 4 folds for each of 2 candidates, totalling 8 fits\n",
222 |       "[CV] END .................................fit_intercept=True; total time=   0.0s\n",
223 |       "[CV] END .................................fit_intercept=True; total time=   0.0s\n",
224 |       "[CV] END .................................fit_intercept=True; total time=   0.0s\n",
225 |       "[CV] END .................................fit_intercept=True; total time=   0.0s\n",
226 |       "[CV] END ................................fit_intercept=False; total time=   0.0s\n",
227 |       "[CV] END ................................fit_intercept=False; total time=   0.0s\n",
228 |       "[CV] END ................................fit_intercept=False; total time=   0.0s\n",
229 |       "[CV] END ................................fit_intercept=False; total time=   0.0s\n",
230 |       "Fitting 4 folds for each of 9 candidates, totalling 36 fits\n",
231 |       "[CV] END ................min_samples_leaf=2, n_estimators=50; total time=   4.4s\n",
232 |       "[CV] END ................min_samples_leaf=2, n_estimators=50; total time=   4.5s\n",
233 |       "[CV] END ................min_samples_leaf=2, n_estimators=50; total time=   4.5s\n",
234 |       "[CV] END ................min_samples_leaf=2, n_estimators=50; total time=   4.5s\n",
235 |       "[CV] END ...............min_samples_leaf=2, n_estimators=100; total time=   9.2s\n",
236 |       "[CV] END ...............min_samples_leaf=2, n_estimators=100; total time=   9.2s\n",
237 |       "[CV] END ...............min_samples_leaf=2, n_estimators=100; total time=   9.1s\n",
238 |       "[CV] END ...............min_samples_leaf=2, n_estimators=100; total time=   9.1s\n",
239 |       "[CV] END ................min_samples_leaf=2, n_estimators=80; total time=   7.4s\n",
240 |       "[CV] END ................min_samples_leaf=2, n_estimators=80; total time=   7.3s\n",
241 |       "[CV] END ................min_samples_leaf=2, n_estimators=80; total time=   7.4s\n",
242 |       "[CV] END ................min_samples_leaf=2, n_estimators=80; total time=   7.5s\n",
243 |       "[CV] END ................min_samples_leaf=4, n_estimators=50; total time=   4.1s\n",
244 |       "[CV] END ................min_samples_leaf=4, n_estimators=50; total time=   4.1s\n",
245 |       "[CV] END ................min_samples_leaf=4, n_estimators=50; total time=   4.0s\n",
246 |       "[CV] END ................min_samples_leaf=4, n_estimators=50; total time=   4.0s\n",
247 |       "[CV] END ...............min_samples_leaf=4, n_estimators=100; total time=   8.2s\n",
248 |       "[CV] END ...............min_samples_leaf=4, n_estimators=100; total time=   8.1s\n",
249 |       "[CV] END ...............min_samples_leaf=4, n_estimators=100; total time=   8.1s\n",
250 |       "[CV] END ...............min_samples_leaf=4, n_estimators=100; total time=   8.1s\n",
251 |       "[CV] END ................min_samples_leaf=4, n_estimators=80; total time=   6.5s\n",
252 |       "[CV] END ................min_samples_leaf=4, n_estimators=80; total time=   6.4s\n",
253 |       "[CV] END ................min_samples_leaf=4, n_estimators=80; total time=   6.5s\n",
254 |       "[CV] END ................min_samples_leaf=4, n_estimators=80; total time=   6.5s\n",
255 |       "[CV] END ................min_samples_leaf=6, n_estimators=50; total time=   3.8s\n",
256 |       "[CV] END ................min_samples_leaf=6, n_estimators=50; total time=   3.7s\n",
257 |       "[CV] END ................min_samples_leaf=6, n_estimators=50; total time=   3.7s\n",
258 |       "[CV] END ................min_samples_leaf=6, n_estimators=50; total time=   3.8s\n",
259 |       "[CV] END ...............min_samples_leaf=6, n_estimators=100; total time=   7.5s\n",
260 |       "[CV] END ...............min_samples_leaf=6, n_estimators=100; total time=   7.7s\n",
261 |       "[CV] END ...............min_samples_leaf=6, n_estimators=100; total time=   7.6s\n",
262 |       "[CV] END ...............min_samples_leaf=6, n_estimators=100; total time=   7.5s\n",
263 |       "[CV] END ................min_samples_leaf=6, n_estimators=80; total time=   6.2s\n",
264 |       "[CV] END ................min_samples_leaf=6, n_estimators=80; total time=   6.0s\n",
265 |       "[CV] END ................min_samples_leaf=6, n_estimators=80; total time=   6.0s\n",
266 |       "[CV] END ................min_samples_leaf=6, n_estimators=80; total time=   6.0s\n"
267 |      ]
268 |     }
269 |    ],
270 |    "source": [
271 |     "best_model = model_factory.get_best_model(x,y,0.79)"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 11,
277 |    "metadata": {},
278 |    "outputs": [
279 |     {
280 |      "data": {
281 |       "text/plain": [
282 |        "RandomForestRegressor(min_samples_leaf=2)"
283 |       ]
284 |      },
285 |      "execution_count": 11,
286 |      "metadata": {},
287 |      "output_type": "execute_result"
288 |     }
289 |    ],
290 |    "source": [
291 |     "best_model.best_model"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": 12,
297 |    "metadata": {},
298 |    "outputs": [
299 |     {
300 |      "data": {
301 |       "text/plain": [
302 |        "GridSearchedBestModel(model_serial_number='module_0', model=LinearRegression(), best_model=LinearRegression(fit_intercept=False), best_parameters={'fit_intercept': False}, best_score=0.6393153733826)"
303 |       ]
304 |      },
305 |      "execution_count": 12,
306 |      "metadata": {},
307 |      "output_type": "execute_result"
308 |     }
309 |    ],
310 |    "source": [
311 |     "model_factory.grid_searched_best_model_list[0]"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 26,
317 |    "metadata": {},
318 |    "outputs": [
319 |     {
320 |      "data": {
321 |       "text/plain": [
322 |        "[InitializedModelDetail(model_serial_number='module_0', model=LinearRegression(), param_grid_search={'fit_intercept': [True, False]}, model_name='sklearn.linear_model.LinearRegression'),\n",
323 |        " InitializedModelDetail(model_serial_number='module_1', model=RandomForestRegressor(min_samples_leaf=2, n_estimators=40), param_grid_search={'min_samples_leaf': [2, 4, 6], 'n_estimators': [50, 100, 80]}, model_name='sklearn.ensemble.RandomForestRegressor')]"
324 |       ]
325 |      },
326 |      "execution_count": 26,
327 |      "metadata": {},
328 |      "output_type": "execute_result"
329 |     }
330 |    ],
331 |    "source": [
332 |     "model_factory.initialized_model_list"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": null,
338 |    "metadata": {},
339 |    "outputs": [],
340 |    "source": []
341 |   }
342 |  ],
343 |  "metadata": {
344 |   "kernelspec": {
345 |    "display_name": "Python 3.7.0",
346 |    "language": "python",
347 |    "name": "python3"
348 |   },
349 |   "language_info": {
350 |    "codemirror_mode": {
351 |     "name": "ipython",
352 |     "version": 3
353 |    },
354 |    "file_extension": ".py",
355 |    "mimetype": "text/x-python",
356 |    "name": "python",
357 |    "nbconvert_exporter": "python",
358 |    "pygments_lexer": "ipython3",
359 |    "version": "3.7.0"
360 |   },
361 |   "orig_nbformat": 4,
362 |   "vscode": {
363 |    "interpreter": {
364 |     "hash": "7a29293c9d4d8b93126739266382f07a312940ff8d40640417510f0b045f4058"
365 |    }
366 |   }
367 |  },
368 |  "nbformat": 4,
369 |  "nbformat_minor": 2
370 | }
371 | 


--------------------------------------------------------------------------------
/notebook/EDA.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "\n",
 11 |     "os.chdir(os.pardir)"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from housing.pipeline.pipeline import Pipeline"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 3,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "p=Pipeline()"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 4,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "data": {
 39 |       "text/plain": [
 40 |        "<Pipeline(pipeline, initial)>"
 41 |       ]
 42 |      },
 43 |      "execution_count": 4,
 44 |      "metadata": {},
 45 |      "output_type": "execute_result"
 46 |     }
 47 |    ],
 48 |    "source": [
 49 |     "p"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 5,
 55 |    "metadata": {},
 56 |    "outputs": [
 57 |     {
 58 |      "name": "stderr",
 59 |      "output_type": "stream",
 60 |      "text": [
 61 |       "Exception in thread pipeline:\n",
 62 |       "Traceback (most recent call last):\n",
 63 |       "  File \"/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/housing/pipeline/pipeline.py\", line 183, in save_experiment\n",
 64 |       "    { \"experiment_file_path\": os.path.basename(experiment_dict[\"experiment_file_path\"]),\n",
 65 |       "  File \"/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/venv/lib/python3.7/posixpath.py\", line 146, in basename\n",
 66 |       "    p = os.fspath(p)\n",
 67 |       "TypeError: expected str, bytes or os.PathLike object, not list\n",
 68 |       "\n",
 69 |       "The above exception was the direct cause of the following exception:\n",
 70 |       "\n",
 71 |       "Traceback (most recent call last):\n",
 72 |       "  File \"/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/housing/pipeline/pipeline.py\", line 132, in run_pipeline\n",
 73 |       "    self.save_experiment()\n",
 74 |       "  File \"/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/housing/pipeline/pipeline.py\", line 196, in save_experiment\n",
 75 |       "    raise HousingException(e,sys) from e\n",
 76 |       "housing.exception.HousingException: \n",
 77 |       "        Error occured in script: \n",
 78 |       "        [ /home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/housing/pipeline/pipeline.py ] at \n",
 79 |       "        try block line number: [183] and exception block line number: [196] \n",
 80 |       "        error message: [expected str, bytes or os.PathLike object, not list]\n",
 81 |       "        \n",
 82 |       "\n",
 83 |       "The above exception was the direct cause of the following exception:\n",
 84 |       "\n",
 85 |       "Traceback (most recent call last):\n",
 86 |       "  File \"/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/venv/lib/python3.7/threading.py\", line 917, in _bootstrap_inner\n",
 87 |       "    self.run()\n",
 88 |       "  File \"/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/housing/pipeline/pipeline.py\", line 173, in run\n",
 89 |       "    raise e\n",
 90 |       "  File \"/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/housing/pipeline/pipeline.py\", line 171, in run\n",
 91 |       "    self.run_pipeline()\n",
 92 |       "  File \"/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/housing/pipeline/pipeline.py\", line 167, in run_pipeline\n",
 93 |       "    raise HousingException(e, sys) from e\n",
 94 |       "housing.exception.HousingException: \n",
 95 |       "        Error occured in script: \n",
 96 |       "        [ /home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/housing/pipeline/pipeline.py ] at \n",
 97 |       "        try block line number: [132] and exception block line number: [167] \n",
 98 |       "        error message: [\n",
 99 |       "        Error occured in script: \n",
100 |       "        [ /home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/housing/pipeline/pipeline.py ] at \n",
101 |       "        try block line number: [183] and exception block line number: [196] \n",
102 |       "        error message: [expected str, bytes or os.PathLike object, not list]\n",
103 |       "        ]\n",
104 |       "        \n",
105 |       "\n"
106 |      ]
107 |     }
108 |    ],
109 |    "source": [
110 |     "p.start()"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 1,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "f=\"/home/avnish/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/config/model.yaml\""
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 2,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "from housing.util.util import read_yaml_file"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 4,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "import json"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 9,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "json.dump(read_yaml_file(f),open(\"sample.json\",\"w\"),indent=4)"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 11,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "data=\"\"\"{\n",
156 |     "    \"grid_search\": {\n",
157 |     "        \"class\": \"GridSearchCV\",\n",
158 |     "        \"module\": \"sklearn.model_selection\",\n",
159 |     "        \"params\": {\n",
160 |     "            \"cv\": 4,\n",
161 |     "            \"verbose\": 2\n",
162 |     "        }\n",
163 |     "    },\n",
164 |     "    \"model_selection\": {\n",
165 |     "        \"module_0\": {\n",
166 |     "            \"class\": \"LinearRegression\",\n",
167 |     "            \"module\": \"sklearn.linear_model\",\n",
168 |     "            \"params\": {\n",
169 |     "                \"fit_intercept\": true\n",
170 |     "            },\n",
171 |     "            \"search_param_grid\": {\n",
172 |     "                \"fit_intercept\": [\n",
173 |     "                    true\n",
174 |     "                ]\n",
175 |     "            }\n",
176 |     "        },\n",
177 |     "        \"module_1\": {\n",
178 |     "            \"class\": \"RandomForestRegressor\",\n",
179 |     "            \"module\": \"sklearn.ensemble\",\n",
180 |     "            \"params\": {\n",
181 |     "                \"min_samples_leaf\": 2\n",
182 |     "            },\n",
183 |     "            \"search_param_grid\": {\n",
184 |     "                \"min_samples_leaf\": [\n",
185 |     "                    2\n",
186 |     "                ],\n",
187 |     "                \"n_estimators\": [\n",
188 |     "                    10\n",
189 |     "                ]\n",
190 |     "            }\n",
191 |     "        }\n",
192 |     "    }\n",
193 |     "}\"\"\""
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 12,
199 |    "metadata": {},
200 |    "outputs": [],
201 |    "source": [
202 |     "import json"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 32,
208 |    "metadata": {},
209 |    "outputs": [
210 |     {
211 |      "ename": "JSONDecodeError",
212 |      "evalue": "Expecting value: line 1 column 246 (char 245)",
213 |      "output_type": "error",
214 |      "traceback": [
215 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
216 |       "\u001b[0;31mJSONDecodeError\u001b[0m                           Traceback (most recent call last)",
217 |       "\u001b[0;32m/tmp/ipykernel_13318/2214753276.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
218 |       "\u001b[0;32m~/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/venv/lib/python3.7/json/__init__.py\u001b[0m in \u001b[0;36mloads\u001b[0;34m(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m    346\u001b[0m             \u001b[0mparse_int\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mparse_float\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    347\u001b[0m             parse_constant is None and object_pairs_hook is None and not kw):\n\u001b[0;32m--> 348\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_default_decoder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    349\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mcls\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    350\u001b[0m         \u001b[0mcls\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mJSONDecoder\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
219 |       "\u001b[0;32m~/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/venv/lib/python3.7/json/decoder.py\u001b[0m in \u001b[0;36mdecode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m    335\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    336\u001b[0m         \"\"\"\n\u001b[0;32m--> 337\u001b[0;31m         \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    338\u001b[0m         \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    339\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
220 |       "\u001b[0;32m~/iNeuron_Private_Intelligence_Limited/MachineLearningProject/machine_learning_project/venv/lib/python3.7/json/decoder.py\u001b[0m in \u001b[0;36mraw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m    353\u001b[0m             \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscan_once\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    354\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 355\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mJSONDecodeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Expecting value\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    356\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
221 |       "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 246 (char 245)"
222 |      ]
223 |     }
224 |    ],
225 |    "source": [
226 |     "json.loads(a)"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": 17,
232 |    "metadata": {},
233 |    "outputs": [],
234 |    "source": [
235 |     "data=\"\"\"{'grid_search': {'class': 'GridSearchCV',\n",
236 |     "  'module': 'sklearn.model_selection',\n",
237 |     "  'params': {'cv': 4, 'verbose': 2}},\n",
238 |     " 'model_selection': {'module_0': {'class': 'LinearRegression',\n",
239 |     "   'module': 'sklearn.linear_model',\n",
240 |     "   'params': {'fit_intercept': True},\n",
241 |     "   'search_param_grid': {'fit_intercept': [True]}},\n",
242 |     "  'module_1': {'class': 'RandomForestRegressor',\n",
243 |     "   'module': 'sklearn.ensemble',\n",
244 |     "   'params': {'min_samples_leaf': 2},\n",
245 |     "   'search_param_grid': {'min_samples_leaf': [2], 'n_estimators': [10]}}}}\"\"\""
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 33,
251 |    "metadata": {},
252 |    "outputs": [],
253 |    "source": [
254 |     "a=data.replace(\"'\",'\"').replace(\"\\n\",\"\")"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": 27,
260 |    "metadata": {},
261 |    "outputs": [
262 |     {
263 |      "data": {
264 |       "text/plain": [
265 |        "1"
266 |       ]
267 |      },
268 |      "execution_count": 27,
269 |      "metadata": {},
270 |      "output_type": "execute_result"
271 |     }
272 |    ],
273 |    "source": [
274 |     "len(data[245])"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": 37,
280 |    "metadata": {},
281 |    "outputs": [
282 |     {
283 |      "name": "stdout",
284 |      "output_type": "stream",
285 |      "text": [
286 |       "Expecting value: line 1 column 246 (char 245)\n"
287 |      ]
288 |     }
289 |    ],
290 |    "source": []
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {},
296 |    "outputs": [],
297 |    "source": []
298 |   }
299 |  ],
300 |  "metadata": {
301 |   "kernelspec": {
302 |    "display_name": "Python 3.7.0",
303 |    "language": "python",
304 |    "name": "python3"
305 |   },
306 |   "language_info": {
307 |    "codemirror_mode": {
308 |     "name": "ipython",
309 |     "version": 3
310 |    },
311 |    "file_extension": ".py",
312 |    "mimetype": "text/x-python",
313 |    "name": "python",
314 |    "nbconvert_exporter": "python",
315 |    "pygments_lexer": "ipython3",
316 |    "version": "3.7.0"
317 |   },
318 |   "orig_nbformat": 4,
319 |   "vscode": {
320 |    "interpreter": {
321 |     "hash": "7a29293c9d4d8b93126739266382f07a312940ff8d40640417510f0b045f4058"
322 |    }
323 |   }
324 |  },
325 |  "nbformat": 4,
326 |  "nbformat_minor": 2
327 | }
328 | 


--------------------------------------------------------------------------------
/housing/entity/model_factory.py:
--------------------------------------------------------------------------------
  1 | from cmath import log
  2 | import importlib
  3 | from pyexpat import model
  4 | import numpy as np
  5 | import yaml
  6 | from housing.exception import HousingException
  7 | import os
  8 | import sys
  9 | 
 10 | from collections import namedtuple
 11 | from typing import List
 12 | from housing.logger import logging
 13 | from sklearn.metrics import r2_score,mean_squared_error
 14 | GRID_SEARCH_KEY = 'grid_search'
 15 | MODULE_KEY = 'module'
 16 | CLASS_KEY = 'class'
 17 | PARAM_KEY = 'params'
 18 | MODEL_SELECTION_KEY = 'model_selection'
 19 | SEARCH_PARAM_GRID_KEY = "search_param_grid"
 20 | 
 21 | InitializedModelDetail = namedtuple("InitializedModelDetail",
 22 |                                     ["model_serial_number", "model", "param_grid_search", "model_name"])
 23 | 
 24 | GridSearchedBestModel = namedtuple("GridSearchedBestModel", ["model_serial_number",
 25 |                                                              "model",
 26 |                                                              "best_model",
 27 |                                                              "best_parameters",
 28 |                                                              "best_score",
 29 |                                                              ])
 30 | 
 31 | BestModel = namedtuple("BestModel", ["model_serial_number",
 32 |                                      "model",
 33 |                                      "best_model",
 34 |                                      "best_parameters",
 35 |                                      "best_score", ])
 36 | 
 37 | MetricInfoArtifact = namedtuple("MetricInfoArtifact",
 38 |                                 ["model_name", "model_object", "train_rmse", "test_rmse", "train_accuracy",
 39 |                                  "test_accuracy", "model_accuracy", "index_number"])
 40 | 
 41 | 
 42 | 
 43 | def evaluate_classification_model(model_list: list, X_train:np.ndarray, y_train:np.ndarray, X_test:np.ndarray, y_test:np.ndarray, base_accuracy:float=0.6)->MetricInfoArtifact:
 44 |     pass
 45 | 
 46 | 
 47 | def evaluate_regression_model(model_list: list, X_train:np.ndarray, y_train:np.ndarray, X_test:np.ndarray, y_test:np.ndarray, base_accuracy:float=0.6) -> MetricInfoArtifact:
 48 |     """
 49 |     Description:
 50 |     This function compare multiple regression model return best model
 51 | 
 52 |     Params:
 53 |     model_list: List of model
 54 |     X_train: Training dataset input feature
 55 |     y_train: Training dataset target feature
 56 |     X_test: Testing dataset input feature
 57 |     y_test: Testing dataset input feature
 58 | 
 59 |     return
 60 |     It retured a named tuple
 61 |     
 62 |     MetricInfoArtifact = namedtuple("MetricInfo",
 63 |                                 ["model_name", "model_object", "train_rmse", "test_rmse", "train_accuracy",
 64 |                                  "test_accuracy", "model_accuracy", "index_number"])
 65 | 
 66 |     """
 67 |     try:
 68 |         
 69 |     
 70 |         index_number = 0
 71 |         metric_info_artifact = None
 72 |         for model in model_list:
 73 |             model_name = str(model)  #getting model name based on model object
 74 |             logging.info(f"{'>>'*30}Started evaluating model: [{type(model).__name__}] {'<<'*30}")
 75 |             
 76 |             #Getting prediction for training and testing dataset
 77 |             y_train_pred = model.predict(X_train)
 78 |             y_test_pred = model.predict(X_test)
 79 | 
 80 |             #Calculating r squared score on training and testing dataset
 81 |             train_acc = r2_score(y_train, y_train_pred)
 82 |             test_acc = r2_score(y_test, y_test_pred)
 83 |             
 84 |             #Calculating mean squared error on training and testing dataset
 85 |             train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
 86 |             test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
 87 | 
 88 |             # Calculating harmonic mean of train_accuracy and test_accuracy
 89 |             model_accuracy = (2 * (train_acc * test_acc)) / (train_acc + test_acc)
 90 |             diff_test_train_acc = abs(test_acc - train_acc)
 91 |             
 92 |             #logging all important metric
 93 |             logging.info(f"{'>>'*30} Score {'<<'*30}")
 94 |             logging.info(f"Train Score\t\t Test Score\t\t Average Score")
 95 |             logging.info(f"{train_acc}\t\t {test_acc}\t\t{model_accuracy}")
 96 | 
 97 |             logging.info(f"{'>>'*30} Loss {'<<'*30}")
 98 |             logging.info(f"Diff test train accuracy: [{diff_test_train_acc}].") 
 99 |             logging.info(f"Train root mean squared error: [{train_rmse}].")
100 |             logging.info(f"Test root mean squared error: [{test_rmse}].")
101 | 
102 | 
103 |             #if model accuracy is greater than base accuracy and train and test score is within certain thershold
104 |             #we will accept that model as accepted model
105 |             if model_accuracy >= base_accuracy and diff_test_train_acc < 0.05:
106 |                 base_accuracy = model_accuracy
107 |                 metric_info_artifact = MetricInfoArtifact(model_name=model_name,
108 |                                                         model_object=model,
109 |                                                         train_rmse=train_rmse,
110 |                                                         test_rmse=test_rmse,
111 |                                                         train_accuracy=train_acc,
112 |                                                         test_accuracy=test_acc,
113 |                                                         model_accuracy=model_accuracy,
114 |                                                         index_number=index_number)
115 | 
116 |                 logging.info(f"Acceptable model found {metric_info_artifact}. ")
117 |             index_number += 1
118 |         if metric_info_artifact is None:
119 |             logging.info(f"No model found with higher accuracy than base accuracy")
120 |         return metric_info_artifact
121 |     except Exception as e:
122 |         raise HousingException(e, sys) from e
123 | 
124 | 
125 | def get_sample_model_config_yaml_file(export_dir: str):
126 |     try:
127 |         model_config = {
128 |             GRID_SEARCH_KEY: {
129 |                 MODULE_KEY: "sklearn.model_selection",
130 |                 CLASS_KEY: "GridSearchCV",
131 |                 PARAM_KEY: {
132 |                     "cv": 3,
133 |                     "verbose": 1
134 |                 }
135 | 
136 |             },
137 |             MODEL_SELECTION_KEY: {
138 |                 "module_0": {
139 |                     MODULE_KEY: "module_of_model",
140 |                     CLASS_KEY: "ModelClassName",
141 |                     PARAM_KEY:
142 |                         {"param_name1": "value1",
143 |                          "param_name2": "value2",
144 |                          },
145 |                     SEARCH_PARAM_GRID_KEY: {
146 |                         "param_name": ['param_value_1', 'param_value_2']
147 |                     }
148 | 
149 |                 },
150 |             }
151 |         }
152 |         os.makedirs(export_dir, exist_ok=True)
153 |         export_file_path = os.path.join(export_dir, "model.yaml")
154 |         with open(export_file_path, 'w') as file:
155 |             yaml.dump(model_config, file)
156 |         return export_file_path
157 |     except Exception as e:
158 |         raise HousingException(e, sys)
159 | 
160 | 
161 | class ModelFactory:
162 |     def __init__(self, model_config_path: str = None,):
163 |         try:
164 |             self.config: dict = ModelFactory.read_params(model_config_path)
165 | 
166 |             self.grid_search_cv_module: str = self.config[GRID_SEARCH_KEY][MODULE_KEY]
167 |             self.grid_search_class_name: str = self.config[GRID_SEARCH_KEY][CLASS_KEY]
168 |             self.grid_search_property_data: dict = dict(self.config[GRID_SEARCH_KEY][PARAM_KEY])
169 | 
170 |             self.models_initialization_config: dict = dict(self.config[MODEL_SELECTION_KEY])
171 | 
172 |             self.initialized_model_list = None
173 |             self.grid_searched_best_model_list = None
174 | 
175 |         except Exception as e:
176 |             raise HousingException(e, sys) from e
177 | 
178 |     @staticmethod
179 |     def update_property_of_class(instance_ref:object, property_data: dict):
180 |         try:
181 |             if not isinstance(property_data, dict):
182 |                 raise Exception("property_data parameter required to dictionary")
183 |             print(property_data)
184 |             for key, value in property_data.items():
185 |                 logging.info(f"Executing:$ {str(instance_ref)}.{key}={value}")
186 |                 setattr(instance_ref, key, value)
187 |             return instance_ref
188 |         except Exception as e:
189 |             raise HousingException(e, sys) from e
190 | 
191 |     @staticmethod
192 |     def read_params(config_path: str) -> dict:
193 |         try:
194 |             with open(config_path) as yaml_file:
195 |                 config:dict = yaml.safe_load(yaml_file)
196 |             return config
197 |         except Exception as e:
198 |             raise HousingException(e, sys) from e
199 | 
200 |     @staticmethod
201 |     def class_for_name(module_name:str, class_name:str):
202 |         try:
203 |             # load the module, will raise ImportError if module cannot be loaded
204 |             module = importlib.import_module(module_name)
205 |             # get the class, will raise AttributeError if class cannot be found
206 |             logging.info(f"Executing command: from {module} import {class_name}")
207 |             class_ref = getattr(module, class_name)
208 |             return class_ref
209 |         except Exception as e:
210 |             raise HousingException(e, sys) from e
211 | 
212 |     def execute_grid_search_operation(self, initialized_model: InitializedModelDetail, input_feature,
213 |                                       output_feature) -> GridSearchedBestModel:
214 |         """
215 |         excute_grid_search_operation(): function will perform paramter search operation and
216 |         it will return you the best optimistic  model with best paramter:
217 |         estimator: Model object
218 |         param_grid: dictionary of paramter to perform search operation
219 |         input_feature: your all input features
220 |         output_feature: Target/Dependent features
221 |         ================================================================================
222 |         return: Function will return GridSearchOperation object
223 |         """
224 |         try:
225 |             # instantiating GridSearchCV class
226 |             
227 |            
228 |             grid_search_cv_ref = ModelFactory.class_for_name(module_name=self.grid_search_cv_module,
229 |                                                              class_name=self.grid_search_class_name
230 |                                                              )
231 | 
232 |             grid_search_cv = grid_search_cv_ref(estimator=initialized_model.model,
233 |                                                 param_grid=initialized_model.param_grid_search)
234 |             grid_search_cv = ModelFactory.update_property_of_class(grid_search_cv,
235 |                                                                    self.grid_search_property_data)
236 | 
237 |             
238 |             message = f'{">>"* 30} f"Training {type(initialized_model.model).__name__} Started." {"<<"*30}'
239 |             logging.info(message)
240 |             grid_search_cv.fit(input_feature, output_feature)
241 |             message = f'{">>"* 30} f"Training {type(initialized_model.model).__name__}" completed {"<<"*30}'
242 |             grid_searched_best_model = GridSearchedBestModel(model_serial_number=initialized_model.model_serial_number,
243 |                                                              model=initialized_model.model,
244 |                                                              best_model=grid_search_cv.best_estimator_,
245 |                                                              best_parameters=grid_search_cv.best_params_,
246 |                                                              best_score=grid_search_cv.best_score_
247 |                                                              )
248 |             
249 |             return grid_searched_best_model
250 |         except Exception as e:
251 |             raise HousingException(e, sys) from e
252 | 
253 |     def get_initialized_model_list(self) -> List[InitializedModelDetail]:
254 |         """
255 |         This function will return a list of model details.
256 |         return List[ModelDetail]
257 |         """
258 |         try:
259 |             initialized_model_list = []
260 |             for model_serial_number in self.models_initialization_config.keys():
261 | 
262 |                 model_initialization_config = self.models_initialization_config[model_serial_number]
263 |                 model_obj_ref = ModelFactory.class_for_name(module_name=model_initialization_config[MODULE_KEY],
264 |                                                             class_name=model_initialization_config[CLASS_KEY]
265 |                                                             )
266 |                 model = model_obj_ref()
267 |                 
268 |                 if PARAM_KEY in model_initialization_config:
269 |                     model_obj_property_data = dict(model_initialization_config[PARAM_KEY])
270 |                     model = ModelFactory.update_property_of_class(instance_ref=model,
271 |                                                                   property_data=model_obj_property_data)
272 | 
273 |                 param_grid_search = model_initialization_config[SEARCH_PARAM_GRID_KEY]
274 |                 model_name = f"{model_initialization_config[MODULE_KEY]}.{model_initialization_config[CLASS_KEY]}"
275 | 
276 |                 model_initialization_config = InitializedModelDetail(model_serial_number=model_serial_number,
277 |                                                                      model=model,
278 |                                                                      param_grid_search=param_grid_search,
279 |                                                                      model_name=model_name
280 |                                                                      )
281 | 
282 |                 initialized_model_list.append(model_initialization_config)
283 | 
284 |             self.initialized_model_list = initialized_model_list
285 |             return self.initialized_model_list
286 |         except Exception as e:
287 |             raise HousingException(e, sys) from e
288 | 
289 |     def initiate_best_parameter_search_for_initialized_model(self, initialized_model: InitializedModelDetail,
290 |                                                              input_feature,
291 |                                                              output_feature) -> GridSearchedBestModel:
292 |         """
293 |         initiate_best_model_parameter_search(): function will perform paramter search operation and
294 |         it will return you the best optimistic  model with best paramter:
295 |         estimator: Model object
296 |         param_grid: dictionary of paramter to perform search operation
297 |         input_feature: your all input features
298 |         output_feature: Target/Dependent features
299 |         ================================================================================
300 |         return: Function will return a GridSearchOperation
301 |         """
302 |         try:
303 |             return self.execute_grid_search_operation(initialized_model=initialized_model,
304 |                                                       input_feature=input_feature,
305 |                                                       output_feature=output_feature)
306 |         except Exception as e:
307 |             raise HousingException(e, sys) from e
308 | 
309 |     def initiate_best_parameter_search_for_initialized_models(self,
310 |                                                               initialized_model_list: List[InitializedModelDetail],
311 |                                                               input_feature,
312 |                                                               output_feature) -> List[GridSearchedBestModel]:
313 | 
314 |         try:
315 |             self.grid_searched_best_model_list = []
316 |             for initialized_model_list in initialized_model_list:
317 |                 grid_searched_best_model = self.initiate_best_parameter_search_for_initialized_model(
318 |                     initialized_model=initialized_model_list,
319 |                     input_feature=input_feature,
320 |                     output_feature=output_feature
321 |                 )
322 |                 self.grid_searched_best_model_list.append(grid_searched_best_model)
323 |             return self.grid_searched_best_model_list
324 |         except Exception as e:
325 |             raise HousingException(e, sys) from e
326 | 
327 |     @staticmethod
328 |     def get_model_detail(model_details: List[InitializedModelDetail],
329 |                          model_serial_number: str) -> InitializedModelDetail:
330 |         """
331 |         This function return ModelDetail
332 |         """
333 |         try:
334 |             for model_data in model_details:
335 |                 if model_data.model_serial_number == model_serial_number:
336 |                     return model_data
337 |         except Exception as e:
338 |             raise HousingException(e, sys) from e
339 | 
340 |     @staticmethod
341 |     def get_best_model_from_grid_searched_best_model_list(grid_searched_best_model_list: List[GridSearchedBestModel],
342 |                                                           base_accuracy=0.6
343 |                                                           ) -> BestModel:
344 |         try:
345 |             best_model = None
346 |             for grid_searched_best_model in grid_searched_best_model_list:
347 |                 if base_accuracy < grid_searched_best_model.best_score:
348 |                     logging.info(f"Acceptable model found:{grid_searched_best_model}")
349 |                     base_accuracy = grid_searched_best_model.best_score
350 | 
351 |                     best_model = grid_searched_best_model
352 |             if not best_model:
353 |                 raise Exception(f"None of Model has base accuracy: {base_accuracy}")
354 |             logging.info(f"Best model: {best_model}")
355 |             return best_model
356 |         except Exception as e:
357 |             raise HousingException(e, sys) from e
358 | 
359 |     def get_best_model(self, X, y,base_accuracy=0.6) -> BestModel:
360 |         try:
361 |             logging.info("Started Initializing model from config file")
362 |             initialized_model_list = self.get_initialized_model_list()
363 |             logging.info(f"Initialized model: {initialized_model_list}")
364 |             grid_searched_best_model_list = self.initiate_best_parameter_search_for_initialized_models(
365 |                 initialized_model_list=initialized_model_list,
366 |                 input_feature=X,
367 |                 output_feature=y
368 |             )
369 |             return ModelFactory.get_best_model_from_grid_searched_best_model_list(grid_searched_best_model_list,
370 |                                                                                   base_accuracy=base_accuracy)
371 |         except Exception as e:
372 |             raise HousingException(e, sys)


--------------------------------------------------------------------------------
/notebook/example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from collections import namedtuple"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "1. Download url\n",
 17 |     "2. Download folder (compressed file)\n",
 18 |     "3. Extract folder (extracted file))\n",
 19 |     "4. Train dataset folder\n",
 20 |     "5. Test dataset folder\n"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 3,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "DataIngestionConfig=namedtuple(\"DataIngestionConfig\",\n",
 30 |     "[\"dataset_download_url\",\"tgz_download_dir\",\"raw_data_dir\",\"ingested_train_dir\",\"ingested_test_dir\"])"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 5,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "data_ingestion_config = DataIngestionConfig(dataset_download_url=\"asfasdf\",\n",
 40 |     "tgz_download_dir='asdasd',\n",
 41 |     "raw_data_dir=\"asdas\",\n",
 42 |     "ingested_train_dir=\"asdbfk\",\n",
 43 |     "ingested_test_dir=\"sadnjk\"\n",
 44 |     ")"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 6,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "data": {
 54 |       "text/plain": [
 55 |        "DataIngestionConfig(dataset_download_url='asfasdf', tgz_download_dir='asdasd', raw_data_dir='asdas', ingested_train_dir='asdbfk', ingested_test_dir='sadnjk')"
 56 |       ]
 57 |      },
 58 |      "execution_count": 6,
 59 |      "metadata": {},
 60 |      "output_type": "execute_result"
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "data_ingestion_config"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 7,
 70 |    "metadata": {},
 71 |    "outputs": [
 72 |     {
 73 |      "data": {
 74 |       "text/plain": [
 75 |        "('sdfjnksdf', 'sdwjkuf', 'asdfasd', 'wsdfbkiasd')"
 76 |       ]
 77 |      },
 78 |      "execution_count": 7,
 79 |      "metadata": {},
 80 |      "output_type": "execute_result"
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "(\"sdfjnksdf\",\"sdwjkuf\",\"asdfasd\",\"wsdfbkiasd\")"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 1,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "import yaml"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 2,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "import os"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 3,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "data": {
112 |       "text/plain": [
113 |        "'d:\\\\Project\\\\machine_learning_project\\\\notebook'"
114 |       ]
115 |      },
116 |      "execution_count": 3,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "os.getcwd()"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 4,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "os.chdir(\"d:\\\\Project\\\\machine_learning_project\")"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 5,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "data": {
141 |       "text/plain": [
142 |        "'d:\\\\Project\\\\machine_learning_project'"
143 |       ]
144 |      },
145 |      "execution_count": 5,
146 |      "metadata": {},
147 |      "output_type": "execute_result"
148 |     }
149 |    ],
150 |    "source": [
151 |     "os.getcwd()"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": []
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 6,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "config_file_path=os.path.join(\"config\",\"config.yaml\")"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 7,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "data": {
177 |       "text/plain": [
178 |        "'config\\\\config.yaml'"
179 |       ]
180 |      },
181 |      "execution_count": 7,
182 |      "metadata": {},
183 |      "output_type": "execute_result"
184 |     }
185 |    ],
186 |    "source": [
187 |     "config_file_path"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 8,
193 |    "metadata": {},
194 |    "outputs": [
195 |     {
196 |      "data": {
197 |       "text/plain": [
198 |        "True"
199 |       ]
200 |      },
201 |      "execution_count": 8,
202 |      "metadata": {},
203 |      "output_type": "execute_result"
204 |     }
205 |    ],
206 |    "source": [
207 |     "os.path.exists(config_file_path)"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 17,
213 |    "metadata": {},
214 |    "outputs": [
215 |     {
216 |      "data": {
217 |       "text/plain": [
218 |        "'d:\\\\Project\\\\machine_learning_project'"
219 |       ]
220 |      },
221 |      "execution_count": 17,
222 |      "metadata": {},
223 |      "output_type": "execute_result"
224 |     }
225 |    ],
226 |    "source": [
227 |     "os.getcwd()"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 18,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": [
236 |     "config_info=None\n",
237 |     "with open(config_file_path,\"rb\") as yaml_file:\n",
238 |     "    config_info=yaml.safe_load(yaml_file)\n"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": 20,
244 |    "metadata": {},
245 |    "outputs": [
246 |     {
247 |      "data": {
248 |       "text/plain": [
249 |        "{'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',\n",
250 |        " 'raw_data_dir': 'raw_data',\n",
251 |        " 'tgz_download_dir': 'tgz_data',\n",
252 |        " 'ingested_dir': 'ingested_data',\n",
253 |        " 'ingested_train_dir': 'train',\n",
254 |        " 'ingested_test_dir': 'test'}"
255 |       ]
256 |      },
257 |      "execution_count": 20,
258 |      "metadata": {},
259 |      "output_type": "execute_result"
260 |     }
261 |    ],
262 |    "source": [
263 |     "config_info[\"data_ingestion_config\"]"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 21,
269 |    "metadata": {},
270 |    "outputs": [],
271 |    "source": [
272 |     "def read_yaml_file(file_path:str)->dict:\n",
273 |     "    \"\"\"\n",
274 |     "    Reads a YAML file and returns the contents as a dictionary.\n",
275 |     "    file_path: str\n",
276 |     "    \"\"\"\n",
277 |     "    try:\n",
278 |     "        with open(file_path, 'rb') as yaml_file:\n",
279 |     "            return yaml.safe_load(yaml_file)\n",
280 |     "    except Exception as e:\n",
281 |     "        raise e"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": 23,
287 |    "metadata": {},
288 |    "outputs": [],
289 |    "source": [
290 |     "config =read_yaml_file(config_file_path)"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": 2,
296 |    "metadata": {},
297 |    "outputs": [],
298 |    "source": [
299 |     "from housing.constant import *"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": 26,
305 |    "metadata": {},
306 |    "outputs": [
307 |     {
308 |      "data": {
309 |       "text/plain": [
310 |        "'training_pipeline_config'"
311 |       ]
312 |      },
313 |      "execution_count": 26,
314 |      "metadata": {},
315 |      "output_type": "execute_result"
316 |     }
317 |    ],
318 |    "source": [
319 |     "TRAINING_PIPELINE_CONFIG_KEY"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": 29,
325 |    "metadata": {},
326 |    "outputs": [
327 |     {
328 |      "data": {
329 |       "text/plain": [
330 |        "{'pipeline_name': 'housing', 'artifact_dir': 'artifact'}"
331 |       ]
332 |      },
333 |      "execution_count": 29,
334 |      "metadata": {},
335 |      "output_type": "execute_result"
336 |     }
337 |    ],
338 |    "source": [
339 |     "config[TRAINING_PIPELINE_CONFIG_KEY]"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": 28,
345 |    "metadata": {},
346 |    "outputs": [
347 |     {
348 |      "data": {
349 |       "text/plain": [
350 |        "'housing'"
351 |       ]
352 |      },
353 |      "execution_count": 28,
354 |      "metadata": {},
355 |      "output_type": "execute_result"
356 |     }
357 |    ],
358 |    "source": [
359 |     "config[TRAINING_PIPELINE_CONFIG_KEY][TRAINING_PIPELINE_NAME_KEY]"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": 31,
365 |    "metadata": {},
366 |    "outputs": [],
367 |    "source": [
368 |     "training_pipeline_config = config[TRAINING_PIPELINE_CONFIG_KEY]\n",
369 |     "artifact_dir = os.path.join(ROOT_DIR,\n",
370 |     "training_pipeline_config[TRAINING_PIPELINE_NAME_KEY],\n",
371 |     "training_pipeline_config[TRAINING_PIPELINE_ARTIFACT_DIR_KEY]\n",
372 |     ")"
373 |    ]
374 |   },
375 |   {
376 |    "cell_type": "code",
377 |    "execution_count": 33,
378 |    "metadata": {},
379 |    "outputs": [
380 |     {
381 |      "data": {
382 |       "text/plain": [
383 |        "'d:\\\\Project\\\\machine_learning_project'"
384 |       ]
385 |      },
386 |      "execution_count": 33,
387 |      "metadata": {},
388 |      "output_type": "execute_result"
389 |     }
390 |    ],
391 |    "source": [
392 |     "ROOT_DIR"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "execution_count": 34,
398 |    "metadata": {},
399 |    "outputs": [
400 |     {
401 |      "data": {
402 |       "text/plain": [
403 |        "'housing'"
404 |       ]
405 |      },
406 |      "execution_count": 34,
407 |      "metadata": {},
408 |      "output_type": "execute_result"
409 |     }
410 |    ],
411 |    "source": [
412 |     "training_pipeline_config[TRAINING_PIPELINE_NAME_KEY]"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "code",
417 |    "execution_count": 35,
418 |    "metadata": {},
419 |    "outputs": [
420 |     {
421 |      "data": {
422 |       "text/plain": [
423 |        "'artifact'"
424 |       ]
425 |      },
426 |      "execution_count": 35,
427 |      "metadata": {},
428 |      "output_type": "execute_result"
429 |     }
430 |    ],
431 |    "source": [
432 |     "training_pipeline_config[TRAINING_PIPELINE_ARTIFACT_DIR_KEY]"
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "code",
437 |    "execution_count": 32,
438 |    "metadata": {},
439 |    "outputs": [
440 |     {
441 |      "data": {
442 |       "text/plain": [
443 |        "'d:\\\\Project\\\\machine_learning_project\\\\housing\\\\artifact'"
444 |       ]
445 |      },
446 |      "execution_count": 32,
447 |      "metadata": {},
448 |      "output_type": "execute_result"
449 |     }
450 |    ],
451 |    "source": [
452 |     "artifact_dir"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "code",
457 |    "execution_count": 10,
458 |    "metadata": {},
459 |    "outputs": [],
460 |    "source": [
461 |     "from housing.config.configuration import Configuartion"
462 |    ]
463 |   },
464 |   {
465 |    "cell_type": "code",
466 |    "execution_count": 13,
467 |    "metadata": {},
468 |    "outputs": [
469 |     {
470 |      "data": {
471 |       "text/plain": [
472 |        "'d:\\\\Project\\\\machine_learning_project'"
473 |       ]
474 |      },
475 |      "execution_count": 13,
476 |      "metadata": {},
477 |      "output_type": "execute_result"
478 |     }
479 |    ],
480 |    "source": [
481 |     "os.getcwd()"
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "code",
486 |    "execution_count": 12,
487 |    "metadata": {},
488 |    "outputs": [],
489 |    "source": [
490 |     "config = Configuartion(config_file_path=\"d:\\\\Project\\\\machine_learning_project\\\\config\\\\config.yaml\")"
491 |    ]
492 |   },
493 |   {
494 |    "cell_type": "code",
495 |    "execution_count": 29,
496 |    "metadata": {},
497 |    "outputs": [],
498 |    "source": [
499 |     "training_pipeline_config=config.get_training_pipeline_config()"
500 |    ]
501 |   },
502 |   {
503 |    "cell_type": "code",
504 |    "execution_count": 31,
505 |    "metadata": {},
506 |    "outputs": [],
507 |    "source": [
508 |     "artifact_dir = training_pipeline_config.artifact_dir"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "code",
513 |    "execution_count": 32,
514 |    "metadata": {},
515 |    "outputs": [
516 |     {
517 |      "data": {
518 |       "text/plain": [
519 |        "'data_ingestion'"
520 |       ]
521 |      },
522 |      "execution_count": 32,
523 |      "metadata": {},
524 |      "output_type": "execute_result"
525 |     }
526 |    ],
527 |    "source": [
528 |     "DATA_INGESTION_ARTIFACT_DIR"
529 |    ]
530 |   },
531 |   {
532 |    "cell_type": "code",
533 |    "execution_count": 33,
534 |    "metadata": {},
535 |    "outputs": [
536 |     {
537 |      "data": {
538 |       "text/plain": [
539 |        "'2022-06-25-12-58-04'"
540 |       ]
541 |      },
542 |      "execution_count": 33,
543 |      "metadata": {},
544 |      "output_type": "execute_result"
545 |     }
546 |    ],
547 |    "source": [
548 |     "CURRENT_TIME_STAMP"
549 |    ]
550 |   },
551 |   {
552 |    "cell_type": "code",
553 |    "execution_count": 11,
554 |    "metadata": {},
555 |    "outputs": [],
556 |    "source": [
557 |     "from housing.constant import *"
558 |    ]
559 |   },
560 |   {
561 |    "cell_type": "code",
562 |    "execution_count": 23,
563 |    "metadata": {},
564 |    "outputs": [],
565 |    "source": [
566 |     "data_ingestion_info=config.config_info[DATA_INGESTION_CONFIG_KEY]"
567 |    ]
568 |   },
569 |   {
570 |    "cell_type": "code",
571 |    "execution_count": 24,
572 |    "metadata": {},
573 |    "outputs": [
574 |     {
575 |      "data": {
576 |       "text/plain": [
577 |        "{'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',\n",
578 |        " 'raw_data_dir': 'raw_data',\n",
579 |        " 'tgz_download_dir': 'tgz_data',\n",
580 |        " 'ingested_dir': 'ingested_data',\n",
581 |        " 'ingested_train_dir': 'train',\n",
582 |        " 'ingested_test_dir': 'test'}"
583 |       ]
584 |      },
585 |      "execution_count": 24,
586 |      "metadata": {},
587 |      "output_type": "execute_result"
588 |     }
589 |    ],
590 |    "source": [
591 |     "data_ingestion_info"
592 |    ]
593 |   },
594 |   {
595 |    "cell_type": "code",
596 |    "execution_count": 26,
597 |    "metadata": {},
598 |    "outputs": [
599 |     {
600 |      "data": {
601 |       "text/plain": [
602 |        "'dataset_download_url'"
603 |       ]
604 |      },
605 |      "execution_count": 26,
606 |      "metadata": {},
607 |      "output_type": "execute_result"
608 |     }
609 |    ],
610 |    "source": []
611 |   },
612 |   {
613 |    "cell_type": "code",
614 |    "execution_count": 6,
615 |    "metadata": {},
616 |    "outputs": [],
617 |    "source": [
618 |     "from housing.constant import DATA_INGESTION_CONFIG_KEY"
619 |    ]
620 |   },
621 |   {
622 |    "cell_type": "code",
623 |    "execution_count": 7,
624 |    "metadata": {},
625 |    "outputs": [
626 |     {
627 |      "data": {
628 |       "text/plain": [
629 |        "'data_ingestion_config'"
630 |       ]
631 |      },
632 |      "execution_count": 7,
633 |      "metadata": {},
634 |      "output_type": "execute_result"
635 |     }
636 |    ],
637 |    "source": [
638 |     "DATA_INGESTION_CONFIG_KEY"
639 |    ]
640 |   },
641 |   {
642 |    "cell_type": "code",
643 |    "execution_count": 27,
644 |    "metadata": {},
645 |    "outputs": [
646 |     {
647 |      "data": {
648 |       "text/plain": [
649 |        "'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz'"
650 |       ]
651 |      },
652 |      "execution_count": 27,
653 |      "metadata": {},
654 |      "output_type": "execute_result"
655 |     }
656 |    ],
657 |    "source": [
658 |     "data_ingestion_info[DATA_INGESTION_DOWNLOAD_URL_KEY]"
659 |    ]
660 |   },
661 |   {
662 |    "cell_type": "code",
663 |    "execution_count": 28,
664 |    "metadata": {},
665 |    "outputs": [
666 |     {
667 |      "data": {
668 |       "text/plain": [
669 |        "{'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',\n",
670 |        " 'raw_data_dir': 'raw_data',\n",
671 |        " 'tgz_download_dir': 'tgz_data',\n",
672 |        " 'ingested_dir': 'ingested_data',\n",
673 |        " 'ingested_train_dir': 'train',\n",
674 |        " 'ingested_test_dir': 'test'}"
675 |       ]
676 |      },
677 |      "execution_count": 28,
678 |      "metadata": {},
679 |      "output_type": "execute_result"
680 |     }
681 |    ],
682 |    "source": [
683 |     "data_ingestion_info"
684 |    ]
685 |   },
686 |   {
687 |    "cell_type": "code",
688 |    "execution_count": 14,
689 |    "metadata": {},
690 |    "outputs": [
691 |     {
692 |      "ename": "HousingException",
693 |      "evalue": "Error occured in script: [d:\\Project\\machine_learning_project\\housing\\config\\configuration.py] at line number: [68] error message: ['dict' object has no attribute 'config_info']",
694 |      "output_type": "error",
695 |      "traceback": [
696 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
697 |       "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
698 |       "\u001b[1;32md:\\Project\\machine_learning_project\\housing\\config\\configuration.py\u001b[0m in \u001b[0;36mget_data_ingestion_config\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     32\u001b[0m             )\n\u001b[1;32m---> 33\u001b[1;33m             \u001b[0mdata_ingestion_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconfig_info\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mDATA_INGESTION_CONFIG_KEY\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     34\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
699 |       "\u001b[1;31mAttributeError\u001b[0m: 'dict' object has no attribute 'config_info'",
700 |       "\nThe above exception was the direct cause of the following exception:\n",
701 |       "\u001b[1;31mHousingException\u001b[0m                          Traceback (most recent call last)",
702 |       "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_14708\\2719117554.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mconfig\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_data_ingestion_config\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
703 |       "\u001b[1;32md:\\Project\\machine_learning_project\\housing\\config\\configuration.py\u001b[0m in \u001b[0;36mget_data_ingestion_config\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m     66\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mdata_ingestion_config\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     67\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 68\u001b[1;33m             \u001b[1;32mraise\u001b[0m \u001b[0mHousingException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0me\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0msys\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     69\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     70\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mget_data_validation_config\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mDataValidationConfig\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
704 |       "\u001b[1;31mHousingException\u001b[0m: Error occured in script: [d:\\Project\\machine_learning_project\\housing\\config\\configuration.py] at line number: [68] error message: ['dict' object has no attribute 'config_info']"
705 |      ]
706 |     }
707 |    ],
708 |    "source": [
709 |     "config.get_data_ingestion_config()"
710 |    ]
711 |   },
712 |   {
713 |    "cell_type": "code",
714 |    "execution_count": null,
715 |    "metadata": {},
716 |    "outputs": [],
717 |    "source": []
718 |   },
719 |   {
720 |    "cell_type": "code",
721 |    "execution_count": 2,
722 |    "metadata": {},
723 |    "outputs": [],
724 |    "source": [
725 |     "from housing.config.configuration import Configuartion"
726 |    ]
727 |   },
728 |   {
729 |    "cell_type": "code",
730 |    "execution_count": 3,
731 |    "metadata": {},
732 |    "outputs": [],
733 |    "source": [
734 |     "config = Configuartion(config_file_path=\"d:\\\\Project\\\\machine_learning_project\\\\config\\\\config.yaml\")"
735 |    ]
736 |   },
737 |   {
738 |    "cell_type": "code",
739 |    "execution_count": 4,
740 |    "metadata": {},
741 |    "outputs": [
742 |     {
743 |      "data": {
744 |       "text/plain": [
745 |        "DataIngestionConfig(dataset_download_url='https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz', tgz_download_dir='d:\\\\Project\\\\machine_learning_project\\\\notebook\\\\housing\\\\artifact\\\\data_ingestion\\\\2022-06-25-13-25-32\\\\tgz_data', raw_data_dir='d:\\\\Project\\\\machine_learning_project\\\\notebook\\\\housing\\\\artifact\\\\data_ingestion\\\\2022-06-25-13-25-32\\\\raw_data', ingested_train_dir='d:\\\\Project\\\\machine_learning_project\\\\notebook\\\\housing\\\\artifact\\\\data_ingestion\\\\2022-06-25-13-25-32\\\\ingested_data\\\\train', ingested_test_dir='d:\\\\Project\\\\machine_learning_project\\\\notebook\\\\housing\\\\artifact\\\\data_ingestion\\\\2022-06-25-13-25-32\\\\ingested_data\\\\test')"
746 |       ]
747 |      },
748 |      "execution_count": 4,
749 |      "metadata": {},
750 |      "output_type": "execute_result"
751 |     }
752 |    ],
753 |    "source": [
754 |     "config.get_data_ingestion_config()"
755 |    ]
756 |   },
757 |   {
758 |    "cell_type": "markdown",
759 |    "metadata": {},
760 |    "source": []
761 |   },
762 |   {
763 |    "cell_type": "code",
764 |    "execution_count": null,
765 |    "metadata": {},
766 |    "outputs": [],
767 |    "source": []
768 |   }
769 |  ],
770 |  "metadata": {
771 |   "kernelspec": {
772 |    "display_name": "Python 3.7.0",
773 |    "language": "python",
774 |    "name": "python3"
775 |   },
776 |   "language_info": {
777 |    "codemirror_mode": {
778 |     "name": "ipython",
779 |     "version": 3
780 |    },
781 |    "file_extension": ".py",
782 |    "mimetype": "text/x-python",
783 |    "name": "python",
784 |    "nbconvert_exporter": "python",
785 |    "pygments_lexer": "ipython3",
786 |    "version": "3.7.0"
787 |   },
788 |   "orig_nbformat": 4,
789 |   "vscode": {
790 |    "interpreter": {
791 |     "hash": "7a29293c9d4d8b93126739266382f07a312940ff8d40640417510f0b045f4058"
792 |    }
793 |   }
794 |  },
795 |  "nbformat": 4,
796 |  "nbformat_minor": 2
797 | }
798 | 


--------------------------------------------------------------------------------