├── us_visa
    ├── __init__.py
    ├── entity
    │   ├── __init__.py
    │   ├── artifact_entity.py
    │   ├── estimator.py
    │   ├── s3_estimator.py
    │   └── config_entity.py
    ├── pipline
    │   ├── __init__.py
    │   ├── prediction_pipeline.py
    │   └── training_pipeline.py
    ├── utils
    │   ├── __init__.py
    │   └── main_utils.py
    ├── cloud_storage
    │   ├── __init__.py
    │   └── aws_storage.py
    ├── components
    │   ├── __init__.py
    │   ├── model_pusher.py
    │   ├── model_trainer.py
    │   ├── data_ingestion.py
    │   ├── model_evaluation.py
    │   ├── data_validation.py
    │   └── data_transformation.py
    ├── configuration
    │   ├── __init__.py
    │   ├── mongo_db_connection.py
    │   └── aws_connection.py
    ├── data_access
    │   ├── __init__.py
    │   └── usvisa_data.py
    ├── logger
    │   └── __init__.py
    ├── exception
    │   └── __init__.py
    └── constants
    │   └── __init__.py
├── assignments
    └── tasks.txt
├── flowcharts
    ├── Model Pusher.png
    ├── Model Trainer.png
    ├── Data Ingestion.png
    ├── Data Validation.png
    ├── 1_Folder Structure.png
    ├── Model Evaluation.png
    └── Data Transformation.png
├── demo.py
├── .dockerignore
├── Dockerfile
├── setup.py
├── requirements.txt
├── config
    ├── model.yaml
    └── schema.yaml
├── LICENSE
├── static
    └── css
    │   └── style.css
├── template.py
├── .github
    └── workflows
    │   └── aws.yaml
├── README.md
├── .gitignore
├── app.py
└── templates
    └── usvisa.html


/us_visa/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/us_visa/entity/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/us_visa/pipline/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/us_visa/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/us_visa/cloud_storage/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/us_visa/components/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/us_visa/configuration/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/us_visa/data_access/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assignments/tasks.txt:
--------------------------------------------------------------------------------
1 | 1. You have to re-create the folder structure flowchart & make the setup ready for the next class


--------------------------------------------------------------------------------
/flowcharts/Model Pusher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Model Pusher.png


--------------------------------------------------------------------------------
/flowcharts/Model Trainer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Model Trainer.png


--------------------------------------------------------------------------------
/flowcharts/Data Ingestion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Data Ingestion.png


--------------------------------------------------------------------------------
/flowcharts/Data Validation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Data Validation.png


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | from us_visa.pipline.training_pipeline import TrainPipeline
2 | 
3 | 
4 | pipline  = TrainPipeline()
5 | pipline.run_pipeline()


--------------------------------------------------------------------------------
/flowcharts/1_Folder Structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/1_Folder Structure.png


--------------------------------------------------------------------------------
/flowcharts/Model Evaluation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Model Evaluation.png


--------------------------------------------------------------------------------
/flowcharts/Data Transformation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Data Transformation.png


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | artifact
 2 | venv
 3 | env
 4 | .gitignore
 5 | logs
 6 | template.py
 7 | demo.py
 8 | README.md
 9 | LICENSE
10 | us_visa.egg.info
11 | notebook


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8.5-slim-buster
2 | 
3 | WORKDIR /app
4 | 
5 | COPY . /app
6 | 
7 | RUN pip install -r requirements.txt
8 | 
9 | CMD ["python3", "app.py"]


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | 
3 | setup(
4 |     name="us_visa",
5 |     version="0.0.0",
6 |     author="Bappy",
7 |     author_email="entbappy73@gmail.com",
8 |     packages=find_packages()
9 | )


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | ipykernel
 2 | pandas
 3 | numpy
 4 | matplotlib
 5 | plotly
 6 | seaborn
 7 | scipy
 8 | scikit-learn
 9 | imblearn
10 | xgboost
11 | catboost
12 | pymongo
13 | from_root
14 | evidently==0.2.8
15 | dill
16 | PyYAML
17 | neuro_mf
18 | boto3
19 | mypy-boto3-s3
20 | botocore
21 | fastapi
22 | uvicorn
23 | jinja2
24 | python-multipart
25 | -e .


--------------------------------------------------------------------------------
/us_visa/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | from from_root import from_root
 5 | from datetime import datetime
 6 | 
 7 | LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
 8 | 
 9 | log_dir = 'logs'
10 | 
11 | logs_path = os.path.join(from_root(), log_dir, LOG_FILE)
12 | 
13 | os.makedirs(log_dir, exist_ok=True)
14 | 
15 | 
16 | logging.basicConfig(
17 |     filename=logs_path,
18 |     format="[ %(asctime)s ] %(name)s - %(levelname)s - %(message)s",
19 |     level=logging.DEBUG,
20 | )


--------------------------------------------------------------------------------
/us_visa/exception/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | def error_message_detail(error, error_detail:sys):
 5 |     _, _, exc_tb = error_detail.exc_info()
 6 |     file_name = exc_tb.tb_frame.f_code.co_filename
 7 |     error_message = "Error occurred python script name [{0}] line number [{1}] error message [{2}]".format(
 8 |         file_name, exc_tb.tb_lineno, str(error)
 9 |     )
10 | 
11 |     return error_message
12 | 
13 | class USvisaException(Exception):
14 |     def __init__(self, error_message, error_detail):
15 |         """
16 |         :param error_message: error message in string format
17 |         """
18 |         super().__init__(error_message)
19 |         self.error_message = error_message_detail(
20 |             error_message, error_detail=error_detail
21 |         )
22 | 
23 |     def __str__(self):
24 |         return self.error_message


--------------------------------------------------------------------------------
/config/model.yaml:
--------------------------------------------------------------------------------
 1 | grid_search:
 2 |   class: GridSearchCV
 3 |   module: sklearn.model_selection
 4 |   params:
 5 |     cv: 3
 6 |     verbose: 3
 7 | model_selection:
 8 |   module_0:
 9 |     class: KNeighborsClassifier
10 |     module: sklearn.neighbors
11 |     params:
12 |       algorithm: kd_tree
13 |       weights: uniform
14 |       n_neighbors: 3
15 |     search_param_grid:
16 |       algorithm:
17 |       - auto
18 |       - ball_tree
19 |       - kd_tree
20 |       - brute
21 |       weights:
22 |       - uniform
23 |       - distance
24 |       n_neighbors:
25 |       - 3
26 |       - 5
27 |       - 9
28 | 
29 |       
30 |   module_1:
31 |     class: RandomForestClassifier
32 |     module: sklearn.ensemble
33 |     params:
34 |       max_depth: 10
35 |       max_features: sqrt
36 |       n_estimators: 3
37 |     search_param_grid:
38 |       max_depth:
39 |       - 10
40 |       - 15
41 |       - 20
42 |       max_features:
43 |       - sqrt
44 |       - log2
45 |       n_estimators:
46 |       - 3
47 |       - 5
48 |       - 9
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 BAPPY AHMED
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/us_visa/entity/artifact_entity.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | 
 4 | @dataclass
 5 | class DataIngestionArtifact:
 6 |     trained_file_path:str 
 7 |     test_file_path:str 
 8 | 
 9 | 
10 | 
11 | @dataclass
12 | class DataValidationArtifact:
13 |     validation_status:bool
14 |     message: str
15 |     drift_report_file_path: str
16 | 
17 | 
18 | @dataclass
19 | class DataTransformationArtifact:
20 |     transformed_object_file_path:str 
21 |     transformed_train_file_path:str
22 |     transformed_test_file_path:str
23 | 
24 | 
25 | @dataclass
26 | class ClassificationMetricArtifact:
27 |     f1_score:float
28 |     precision_score:float
29 |     recall_score:float
30 | 
31 | 
32 | 
33 | @dataclass
34 | class ModelTrainerArtifact:
35 |     trained_model_file_path:str 
36 |     metric_artifact:ClassificationMetricArtifact
37 | 
38 | 
39 | 
40 | @dataclass
41 | class ModelEvaluationArtifact:
42 |     is_model_accepted:bool
43 |     changed_accuracy:float
44 |     s3_model_path:str 
45 |     trained_model_path:str
46 | 
47 | 
48 | 
49 | @dataclass
50 | class ModelPusherArtifact:
51 |     bucket_name:str
52 |     s3_model_path:str
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 |  


--------------------------------------------------------------------------------
/config/schema.yaml:
--------------------------------------------------------------------------------
 1 | columns:
 2 |   - case_id: category
 3 |   - continent: category
 4 |   - education_of_employee: category
 5 |   - has_job_experience: category
 6 |   - requires_job_training: category
 7 |   - no_of_employees: int
 8 |   - yr_of_estab: int
 9 |   - region_of_employment: category
10 |   - prevailing_wage: int
11 |   - unit_of_wage: category
12 |   - full_time_position: category
13 |   - case_status: category
14 | 
15 | numerical_columns:
16 |   - no_of_employees
17 |   - prevailing_wage
18 |   - yr_of_estab
19 | 
20 | categorical_columns:
21 |   - case_id
22 |   - continent
23 |   - education_of_employee
24 |   - has_job_experience
25 |   - requires_job_training
26 |   - region_of_employment
27 |   - unit_of_wage
28 |   - full_time_position
29 |   - case_status
30 | 
31 | drop_columns:
32 |   - case_id
33 |   - yr_of_estab
34 | 
35 | # for data transformation
36 | num_features:
37 |   - no_of_employees
38 |   - prevailing_wage
39 |   - company_age
40 | 
41 | or_columns:
42 |   - has_job_experience
43 |   - requires_job_training
44 |   - full_time_position
45 |   - education_of_employee
46 | 
47 | oh_columns:
48 |   - continent
49 |   - unit_of_wage
50 |   - region_of_employment
51 | 
52 | transform_columns:
53 |   - no_of_employees
54 |   - company_age


--------------------------------------------------------------------------------
/us_visa/configuration/mongo_db_connection.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from us_visa.exception import USvisaException
 4 | from us_visa.logger import logging
 5 | 
 6 | import os
 7 | from us_visa.constants import DATABASE_NAME, MONGODB_URL_KEY
 8 | import pymongo
 9 | import certifi
10 | 
11 | ca = certifi.where()
12 | 
13 | class MongoDBClient:
14 |     """
15 |     Class Name :   export_data_into_feature_store
16 |     Description :   This method exports the dataframe from mongodb feature store as dataframe 
17 |     
18 |     Output      :   connection to mongodb database
19 |     On Failure  :   raises an exception
20 |     """
21 |     client = None
22 | 
23 |     def __init__(self, database_name=DATABASE_NAME) -> None:
24 |         try:
25 |             if MongoDBClient.client is None:
26 |                 mongo_db_url = os.getenv(MONGODB_URL_KEY)
27 |                 if mongo_db_url is None:
28 |                     raise Exception(f"Environment key: {MONGODB_URL_KEY} is not set.")
29 |                 MongoDBClient.client = pymongo.MongoClient(mongo_db_url, tlsCAFile=ca)
30 |             self.client = MongoDBClient.client
31 |             self.database = self.client[database_name]
32 |             self.database_name = database_name
33 |             logging.info("MongoDB connection succesfull")
34 |         except Exception as e:
35 |             raise USvisaException(e,sys)


--------------------------------------------------------------------------------
/us_visa/data_access/usvisa_data.py:
--------------------------------------------------------------------------------
 1 | from us_visa.configuration.mongo_db_connection import MongoDBClient
 2 | from us_visa.constants import DATABASE_NAME
 3 | from us_visa.exception import USvisaException
 4 | import pandas as pd
 5 | import sys
 6 | from typing import Optional
 7 | import numpy as np
 8 | 
 9 | 
10 | 
11 | class USvisaData:
12 |     """
13 |     This class help to export entire mongo db record as pandas dataframe
14 |     """
15 | 
16 |     def __init__(self):
17 |         """
18 |         """
19 |         try:
20 |             self.mongo_client = MongoDBClient(database_name=DATABASE_NAME)
21 |         except Exception as e:
22 |             raise USvisaException(e,sys)
23 |         
24 | 
25 |     def export_collection_as_dataframe(self,collection_name:str,database_name:Optional[str]=None)->pd.DataFrame:
26 |         try:
27 |             """
28 |             export entire collectin as dataframe:
29 |             return pd.DataFrame of collection
30 |             """
31 |             if database_name is None:
32 |                 collection = self.mongo_client.database[collection_name]
33 |             else:
34 |                 collection = self.mongo_client[database_name][collection_name]
35 | 
36 |             df = pd.DataFrame(list(collection.find()))
37 |             if "_id" in df.columns.to_list():
38 |                 df = df.drop(columns=["_id"], axis=1)
39 |             df.replace({"na":np.nan},inplace=True)
40 |             return df
41 |         except Exception as e:
42 |             raise USvisaException(e,sys)


--------------------------------------------------------------------------------
/static/css/style.css:
--------------------------------------------------------------------------------
 1 | .navbar-light .navbar-brand {
 2 |     color: rgba(0, 0, 0, .9);
 3 | }
 4 | 
 5 | .navbar-light .navbar-brand {
 6 |     margin-left: auto;
 7 |     margin-right: auto;
 8 | }
 9 | 
10 | body {
11 |     margin: 0;
12 |     padding: 0;
13 |     font-family: sans-serif;
14 | }
15 | 
16 | .formBox {
17 |     margin-top: 50px;
18 |     padding: 50px;
19 | }
20 | 
21 | h1 {
22 |     margin: 0;
23 |     padding: 0;
24 |     text-align: center;
25 |     margin-bottom: 50px !important;
26 |     text-transform: uppercase;
27 |     font-size: 48px;
28 | }
29 | 
30 | .inputBox {
31 |     position: relative;
32 |     box-sizing: border-box;
33 |     margin-bottom: 40px;
34 | }
35 | 
36 | .button {
37 |     width: 100%;
38 |     background: #00bcd4;
39 |     color: #fff;
40 |     border-radius: 0;
41 |     border: none;
42 |     outline: none;
43 |     height: 50px;
44 |     font-size: 24px;
45 | }
46 | 
47 | .input {
48 |     position: relative;
49 |     width: 100%;
50 |     height: 50px;
51 |     background: transparent;
52 |     border: none;
53 |     outline: none;
54 |     font-size: 24px;
55 |     border-bottom: 2px solid rgba(0, 0, 0, .5);
56 | }
57 | 
58 | .inputText {
59 |     position: absolute;
60 |     line-height: 50px;
61 |     font-size: 24px;
62 |     transition: .5s;
63 |     opacity: 0.5;
64 | }
65 | 
66 | .focus .inputText {
67 |     transform: translateY(-30px);
68 |     font-size: 18px;
69 |     opacity: 1;
70 |     color: #00bcd4;
71 | }
72 | 
73 | .btn_train {
74 |     width: 100%;
75 |     background: red;
76 |     color: #fff;
77 |     border-radius: 0;
78 |     border: none;
79 |     outline: none;
80 |     height: 50px;
81 |     font-size: 24px;
82 | }


--------------------------------------------------------------------------------
/template.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | project_name = "us_visa"
 5 | 
 6 | list_of_files = [
 7 | 
 8 |     f"{project_name}/__init__.py",
 9 |     f"{project_name}/components/__init__.py",
10 |     f"{project_name}/components/data_ingestion.py",  
11 |     f"{project_name}/components/data_validation.py",
12 |     f"{project_name}/components/data_transformation.py",
13 |     f"{project_name}/components/model_trainer.py",
14 |     f"{project_name}/components/model_evaluation.py",
15 |     f"{project_name}/components/model_pusher.py",
16 |     f"{project_name}/configuration/__init__.py",
17 |     f"{project_name}/constants/__init__.py",
18 |     f"{project_name}/entity/__init__.py",
19 |     f"{project_name}/entity/config_entity.py",
20 |     f"{project_name}/entity/artifact_entity.py",
21 |     f"{project_name}/exception/__init__.py",
22 |     f"{project_name}/logger/__init__.py",
23 |     f"{project_name}/pipline/__init__.py",
24 |     f"{project_name}/pipline/training_pipeline.py",
25 |     f"{project_name}/pipline/prediction_pipeline.py",
26 |     f"{project_name}/utils/__init__.py",
27 |     f"{project_name}/utils/main_utils.py",
28 |     "app.py",
29 |     "requirements.txt",
30 |     "Dockerfile",
31 |     ".dockerignore",
32 |     "demo.py",
33 |     "setup.py",
34 |     "config/model.yaml",
35 |     "config/schema.yaml",
36 | 
37 | 
38 | ]
39 | 
40 | 
41 | for filepath in list_of_files:
42 |     filepath = Path(filepath)
43 |     filedir, filename = os.path.split(filepath)
44 |     if filedir != "":
45 |         os.makedirs(filedir, exist_ok=True)
46 |     if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0):
47 |         with open(filepath, "w") as f:
48 |             pass
49 |     else:
50 |         print(f"file is already present at: {filepath}")


--------------------------------------------------------------------------------
/us_visa/configuration/aws_connection.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import os
 3 | from us_visa.constants import AWS_SECRET_ACCESS_KEY_ENV_KEY, AWS_ACCESS_KEY_ID_ENV_KEY,REGION_NAME
 4 | 
 5 | 
 6 | class S3Client:
 7 | 
 8 |     s3_client=None
 9 |     s3_resource = None
10 |     def __init__(self, region_name=REGION_NAME):
11 |         """ 
12 |         This Class gets aws credentials from env_variable and creates an connection with s3 bucket 
13 |         and raise exception when environment variable is not set
14 |         """
15 | 
16 |         if S3Client.s3_resource==None or S3Client.s3_client==None:
17 |             __access_key_id = os.getenv(AWS_ACCESS_KEY_ID_ENV_KEY, )
18 |             __secret_access_key = os.getenv(AWS_SECRET_ACCESS_KEY_ENV_KEY, )
19 |             if __access_key_id is None:
20 |                 raise Exception(f"Environment variable: {AWS_ACCESS_KEY_ID_ENV_KEY} is not not set.")
21 |             if __secret_access_key is None:
22 |                 raise Exception(f"Environment variable: {AWS_SECRET_ACCESS_KEY_ENV_KEY} is not set.")
23 |         
24 |             S3Client.s3_resource = boto3.resource('s3',
25 |                                             aws_access_key_id=__access_key_id,
26 |                                             aws_secret_access_key=__secret_access_key,
27 |                                             region_name=region_name
28 |                                             )
29 |             S3Client.s3_client = boto3.client('s3',
30 |                                         aws_access_key_id=__access_key_id,
31 |                                         aws_secret_access_key=__secret_access_key,
32 |                                         region_name=region_name
33 |                                         )
34 |         self.s3_resource = S3Client.s3_resource
35 |         self.s3_client = S3Client.s3_client
36 |         


--------------------------------------------------------------------------------
/us_visa/entity/estimator.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from pandas import DataFrame
 4 | from sklearn.pipeline import Pipeline
 5 | 
 6 | from us_visa.exception import USvisaException
 7 | from us_visa.logger import logging
 8 | 
 9 | class TargetValueMapping:
10 |     def __init__(self):
11 |         self.Certified:int = 0
12 |         self.Denied:int = 1
13 |     def _asdict(self):
14 |         return self.__dict__
15 |     def reverse_mapping(self):
16 |         mapping_response = self._asdict()
17 |         return dict(zip(mapping_response.values(),mapping_response.keys()))
18 |     
19 | 
20 | 
21 | 
22 | 
23 | class USvisaModel:
24 |     def __init__(self, preprocessing_object: Pipeline, trained_model_object: object):
25 |         """
26 |         :param preprocessing_object: Input Object of preprocesser
27 |         :param trained_model_object: Input Object of trained model 
28 |         """
29 |         self.preprocessing_object = preprocessing_object
30 |         self.trained_model_object = trained_model_object
31 | 
32 |     def predict(self, dataframe: DataFrame) -> DataFrame:
33 |         """
34 |         Function accepts raw inputs and then transformed raw input using preprocessing_object
35 |         which guarantees that the inputs are in the same format as the training data
36 |         At last it performs prediction on transformed features
37 |         """
38 |         logging.info("Entered predict method of UTruckModel class")
39 | 
40 |         try:
41 |             logging.info("Using the trained model to get predictions")
42 | 
43 |             transformed_feature = self.preprocessing_object.transform(dataframe)
44 | 
45 |             logging.info("Used the trained model to get predictions")
46 |             return self.trained_model_object.predict(transformed_feature)
47 | 
48 |         except Exception as e:
49 |             raise USvisaException(e, sys) from e
50 | 
51 |     def __repr__(self):
52 |         return f"{type(self.trained_model_object).__name__}()"
53 | 
54 |     def __str__(self):
55 |         return f"{type(self.trained_model_object).__name__}()"


--------------------------------------------------------------------------------
/.github/workflows/aws.yaml:
--------------------------------------------------------------------------------
 1 | name: Deploy Application Docker Image to EC2 instance
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 | 
 7 | jobs:
 8 |   Continuous-Integration:
 9 |     runs-on: ubuntu-latest
10 | 
11 |     steps:
12 |       - name: Checkout
13 |         uses: actions/checkout@v2
14 | 
15 |       - name: Configure AWS credentials
16 |         uses: aws-actions/configure-aws-credentials@v1
17 |         with:
18 |           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
19 |           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
20 |           aws-region: ${{ secrets.AWS_DEFAULT_REGION }}
21 | 
22 |       - name: Login to Amazon ECR
23 |         id: login-ecr
24 |         uses: aws-actions/amazon-ecr-login@v1
25 | 
26 |       - name: Build, tag, and push image to Amazon ECR
27 |         id: build-image
28 |         env:
29 |           ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
30 |           ECR_REPOSITORY: ${{ secrets.ECR_REPO }}
31 |           IMAGE_TAG: latest
32 |         run: |
33 |           docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .  
34 |           docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
35 |           echo "::set-output name=image::$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG"
36 | 
37 |   Continuous-Deployment:
38 |     needs: Continuous-Integration
39 |     runs-on: self-hosted
40 |     steps:
41 |       - name: Checkout
42 |         uses: actions/checkout@v3
43 | 
44 |       - name: Configure AWS credentials
45 |         uses: aws-actions/configure-aws-credentials@v1
46 |         with:
47 |           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
48 |           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
49 |           aws-region: ${{ secrets.AWS_DEFAULT_REGION }}
50 | 
51 |       - name: Login to Amazon ECR
52 |         id: login-ecr
53 |         uses: aws-actions/amazon-ecr-login@v1
54 | 
55 |       - name: Run Docker Image to serve users
56 |         run: |
57 |          docker run -d -e AWS_ACCESS_KEY_ID="${{ secrets.AWS_ACCESS_KEY_ID }}" -e AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_SECRET_ACCESS_KEY }}" -e AWS_DEFAULT_REGION="${{ secrets.AWS_DEFAULT_REGION }}" -e MONGODB_URL="${{ secrets.MONGODB_URL }}" -p 8080:8080 "${{ steps.login-ecr.outputs.registry }}"/"${{ secrets.ECR_REPO }}":latest


--------------------------------------------------------------------------------
/us_visa/constants/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import date
 3 | 
 4 | DATABASE_NAME = "US_VISA"
 5 | 
 6 | COLLECTION_NAME = "visa_data"
 7 | 
 8 | MONGODB_URL_KEY = "MONGODB_URL"
 9 | 
10 | PIPELINE_NAME: str = "usvisa"
11 | ARTIFACT_DIR: str = "artifact"
12 | 
13 | MODEL_FILE_NAME = "model.pkl"
14 | 
15 | TARGET_COLUMN = "case_status"
16 | CURRENT_YEAR = date.today().year
17 | PREPROCSSING_OBJECT_FILE_NAME = "preprocessing.pkl"
18 | 
19 | FILE_NAME: str = "usvisa.csv"
20 | TRAIN_FILE_NAME: str = "train.csv"
21 | TEST_FILE_NAME: str = "test.csv"
22 | SCHEMA_FILE_PATH = os.path.join("config", "schema.yaml")
23 | 
24 | 
25 | AWS_ACCESS_KEY_ID_ENV_KEY = "AWS_ACCESS_KEY_ID"
26 | AWS_SECRET_ACCESS_KEY_ENV_KEY = "AWS_SECRET_ACCESS_KEY"
27 | REGION_NAME = "us-east-1"
28 | 
29 | 
30 | """
31 | Data Ingestion related constant start with DATA_INGESTION VAR NAME
32 | """
33 | DATA_INGESTION_COLLECTION_NAME: str = "visa_data"
34 | DATA_INGESTION_DIR_NAME: str = "data_ingestion"
35 | DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store"
36 | DATA_INGESTION_INGESTED_DIR: str = "ingested"
37 | DATA_INGESTION_TRAIN_TEST_SPLIT_RATIO: float = 0.2
38 | 
39 | 
40 | 
41 | """
42 | Data Validation realted contant start with DATA_VALIDATION VAR NAME
43 | """
44 | DATA_VALIDATION_DIR_NAME: str = "data_validation"
45 | DATA_VALIDATION_DRIFT_REPORT_DIR: str = "drift_report"
46 | DATA_VALIDATION_DRIFT_REPORT_FILE_NAME: str = "report.yaml"
47 | 
48 | 
49 | 
50 | """
51 | Data Transformation ralated constant start with DATA_TRANSFORMATION VAR NAME
52 | """
53 | DATA_TRANSFORMATION_DIR_NAME: str = "data_transformation"
54 | DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR: str = "transformed"
55 | DATA_TRANSFORMATION_TRANSFORMED_OBJECT_DIR: str = "transformed_object"
56 | 
57 | 
58 | """
59 | MODEL TRAINER related constant start with MODEL_TRAINER var name
60 | """
61 | MODEL_TRAINER_DIR_NAME: str = "model_trainer"
62 | MODEL_TRAINER_TRAINED_MODEL_DIR: str = "trained_model"
63 | MODEL_TRAINER_TRAINED_MODEL_NAME: str = "model.pkl"
64 | MODEL_TRAINER_EXPECTED_SCORE: float = 0.6
65 | MODEL_TRAINER_MODEL_CONFIG_FILE_PATH: str = os.path.join("config", "model.yaml")
66 | 
67 | 
68 | MODEL_EVALUATION_CHANGED_THRESHOLD_SCORE: float = 0.02
69 | MODEL_BUCKET_NAME = "usvisa-model2024"
70 | MODEL_PUSHER_S3_KEY = "model-registry"
71 | 
72 | 
73 | APP_HOST = "0.0.0.0"
74 | APP_PORT = 8080
75 | 


--------------------------------------------------------------------------------
/us_visa/entity/s3_estimator.py:
--------------------------------------------------------------------------------
 1 | from us_visa.cloud_storage.aws_storage import SimpleStorageService
 2 | from us_visa.exception import USvisaException
 3 | from us_visa.entity.estimator import USvisaModel
 4 | import sys
 5 | from pandas import DataFrame
 6 | 
 7 | 
 8 | class USvisaEstimator:
 9 |     """
10 |     This class is used to save and retrieve us_visas model in s3 bucket and to do prediction
11 |     """
12 | 
13 |     def __init__(self,bucket_name,model_path,):
14 |         """
15 |         :param bucket_name: Name of your model bucket
16 |         :param model_path: Location of your model in bucket
17 |         """
18 |         self.bucket_name = bucket_name
19 |         self.s3 = SimpleStorageService()
20 |         self.model_path = model_path
21 |         self.loaded_model:USvisaModel=None
22 | 
23 | 
24 |     def is_model_present(self,model_path):
25 |         try:
26 |             return self.s3.s3_key_path_available(bucket_name=self.bucket_name, s3_key=model_path)
27 |         except USvisaException as e:
28 |             print(e)
29 |             return False
30 | 
31 |     def load_model(self,)->USvisaModel:
32 |         """
33 |         Load the model from the model_path
34 |         :return:
35 |         """
36 | 
37 |         return self.s3.load_model(self.model_path,bucket_name=self.bucket_name)
38 | 
39 |     def save_model(self,from_file,remove:bool=False)->None:
40 |         """
41 |         Save the model to the model_path
42 |         :param from_file: Your local system model path
43 |         :param remove: By default it is false that mean you will have your model locally available in your system folder
44 |         :return:
45 |         """
46 |         try:
47 |             self.s3.upload_file(from_file,
48 |                                 to_filename=self.model_path,
49 |                                 bucket_name=self.bucket_name,
50 |                                 remove=remove
51 |                                 )
52 |         except Exception as e:
53 |             raise USvisaException(e, sys)
54 | 
55 | 
56 |     def predict(self,dataframe:DataFrame):
57 |         """
58 |         :param dataframe:
59 |         :return:
60 |         """
61 |         try:
62 |             if self.loaded_model is None:
63 |                 self.loaded_model = self.load_model()
64 |             return self.loaded_model.predict(dataframe=dataframe)
65 |         except Exception as e:
66 |             raise USvisaException(e, sys)


--------------------------------------------------------------------------------
/us_visa/components/model_pusher.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from us_visa.cloud_storage.aws_storage import SimpleStorageService
 4 | from us_visa.exception import USvisaException
 5 | from us_visa.logger import logging
 6 | from us_visa.entity.artifact_entity import ModelPusherArtifact, ModelEvaluationArtifact
 7 | from us_visa.entity.config_entity import ModelPusherConfig
 8 | from us_visa.entity.s3_estimator import USvisaEstimator
 9 | 
10 | 
11 | class ModelPusher:
12 |     def __init__(self, model_evaluation_artifact: ModelEvaluationArtifact,
13 |                  model_pusher_config: ModelPusherConfig):
14 |         """
15 |         :param model_evaluation_artifact: Output reference of data evaluation artifact stage
16 |         :param model_pusher_config: Configuration for model pusher
17 |         """
18 |         self.s3 = SimpleStorageService()
19 |         self.model_evaluation_artifact = model_evaluation_artifact
20 |         self.model_pusher_config = model_pusher_config
21 |         self.usvisa_estimator = USvisaEstimator(bucket_name=model_pusher_config.bucket_name,
22 |                                 model_path=model_pusher_config.s3_model_key_path)
23 | 
24 |     def initiate_model_pusher(self) -> ModelPusherArtifact:
25 |         """
26 |         Method Name :   initiate_model_evaluation
27 |         Description :   This function is used to initiate all steps of the model pusher
28 |         
29 |         Output      :   Returns model evaluation artifact
30 |         On Failure  :   Write an exception log and then raise an exception
31 |         """
32 |         logging.info("Entered initiate_model_pusher method of ModelTrainer class")
33 | 
34 |         try:
35 |             logging.info("Uploading artifacts folder to s3 bucket")
36 | 
37 |             self.usvisa_estimator.save_model(from_file=self.model_evaluation_artifact.trained_model_path)
38 | 
39 | 
40 |             model_pusher_artifact = ModelPusherArtifact(bucket_name=self.model_pusher_config.bucket_name,
41 |                                                         s3_model_path=self.model_pusher_config.s3_model_key_path)
42 | 
43 |             logging.info("Uploaded artifacts folder to s3 bucket")
44 |             logging.info(f"Model pusher artifact: [{model_pusher_artifact}]")
45 |             logging.info("Exited initiate_model_pusher method of ModelTrainer class")
46 |             
47 |             return model_pusher_artifact
48 |         except Exception as e:
49 |             raise USvisaException(e, sys) from e


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # US-Visa-Approval-Prediction
  2 | 
  3 | ## Live matarials docs
  4 | 
  5 | [link](https://docs.google.com/document/d/1UFiHnyKRqgx8Lodsvdzu58LbVjdWHNf-uab2WmhE0A4/edit?usp=sharing)
  6 | 
  7 | 
  8 | ## Git commands
  9 | 
 10 | ```bash
 11 | git add .
 12 | 
 13 | git commit -m "Updated"
 14 | 
 15 | git push origin main
 16 | ```
 17 | 
 18 | ## How to run?
 19 | 
 20 | ```bash
 21 | conda create -n visa python=3.8 -y
 22 | ```
 23 | 
 24 | ```bash
 25 | conda activate visa
 26 | ```
 27 | 
 28 | ```bash
 29 | pip install -r requirements.txt
 30 | ```
 31 | 
 32 | ```bash
 33 | python app.py
 34 | ```
 35 | 
 36 | 
 37 | ## Workflow
 38 | 
 39 | 1. constant
 40 | 2. config_entity
 41 | 3. artifact_entity
 42 | 4. conponent
 43 | 5. pipeline
 44 | 6. app.py / demo.py
 45 | 
 46 | 
 47 | ### Export the  environment variable
 48 | ```bash
 49 | 
 50 | 
 51 | export MONGODB_URL="mongodb+srv://<username>:<password>...."
 52 | 
 53 | export AWS_ACCESS_KEY_ID=<AWS_ACCESS_KEY_ID>
 54 | 
 55 | export AWS_SECRET_ACCESS_KEY=<AWS_SECRET_ACCESS_KEY>
 56 | ```
 57 | 
 58 | 
 59 | 
 60 | # AWS-CICD-Deployment-with-Github-Actions
 61 | 
 62 | ## 1. Login to AWS console.
 63 | 
 64 | ## 2. Create IAM user for deployment
 65 | 
 66 | 	#with specific access
 67 | 
 68 | 	1. EC2 access : It is virtual machine
 69 | 
 70 | 	2. ECR: Elastic Container registry to save your docker image in aws
 71 | 
 72 | 
 73 | 	#Description: About the deployment
 74 | 
 75 | 	1. Build docker image of the source code
 76 | 
 77 | 	2. Push your docker image to ECR
 78 | 
 79 | 	3. Launch Your EC2 
 80 | 
 81 | 	4. Pull Your image from ECR in EC2
 82 | 
 83 | 	5. Lauch your docker image in EC2
 84 | 
 85 | 	#Policy:
 86 | 
 87 | 	1. AmazonEC2ContainerRegistryFullAccess
 88 | 
 89 | 	2. AmazonEC2FullAccess
 90 | 
 91 | 	
 92 | ## 3. Create ECR repo to store/save docker image
 93 |     - Save the URI: 136566696263.dkr.ecr.us-east-1.amazonaws.com/mlproject
 94 | 
 95 | 	
 96 | ## 4. Create EC2 machine (Ubuntu) 
 97 | 
 98 | ## 5. Open EC2 and Install docker in EC2 Machine:
 99 | 	
100 | 	
101 | 	#optinal
102 | 
103 | 	sudo apt-get update -y
104 | 
105 | 	sudo apt-get upgrade
106 | 	
107 | 	#required
108 | 
109 | 	curl -fsSL https://get.docker.com -o get-docker.sh
110 | 
111 | 	sudo sh get-docker.sh
112 | 
113 | 	sudo usermod -aG docker ubuntu
114 | 
115 | 	newgrp docker
116 | 	
117 | # 6. Configure EC2 as self-hosted runner:
118 |     setting>actions>runner>new self hosted runner> choose os> then run command one by one
119 | 
120 | 
121 | # 7. Setup github secrets:
122 | 
123 |    - AWS_ACCESS_KEY_ID
124 |    - AWS_SECRET_ACCESS_KEY
125 |    - AWS_DEFAULT_REGION
126 |    - ECR_REPO
127 | 
128 |     
129 | 


--------------------------------------------------------------------------------
/us_visa/entity/config_entity.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from us_visa.constants import *
 3 | from dataclasses import dataclass
 4 | from datetime import datetime
 5 | 
 6 | TIMESTAMP: str = datetime.now().strftime("%m_%d_%Y_%H_%M_%S")
 7 | 
 8 | @dataclass
 9 | class TrainingPipelineConfig:
10 |     pipeline_name: str = PIPELINE_NAME
11 |     artifact_dir: str = os.path.join(ARTIFACT_DIR, TIMESTAMP)
12 |     timestamp: str = TIMESTAMP
13 | 
14 | 
15 | training_pipeline_config: TrainingPipelineConfig = TrainingPipelineConfig()
16 | 
17 | @dataclass
18 | class DataIngestionConfig:
19 |     data_ingestion_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_INGESTION_DIR_NAME)
20 |     feature_store_file_path: str = os.path.join(data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, FILE_NAME)
21 |     training_file_path: str = os.path.join(data_ingestion_dir, DATA_INGESTION_INGESTED_DIR, TRAIN_FILE_NAME)
22 |     testing_file_path: str = os.path.join(data_ingestion_dir, DATA_INGESTION_INGESTED_DIR, TEST_FILE_NAME)
23 |     train_test_split_ratio: float = DATA_INGESTION_TRAIN_TEST_SPLIT_RATIO
24 |     collection_name:str = DATA_INGESTION_COLLECTION_NAME
25 | 
26 | 
27 | 
28 | @dataclass
29 | class DataValidationConfig:
30 |     data_validation_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_VALIDATION_DIR_NAME)
31 |     drift_report_file_path: str = os.path.join(data_validation_dir, DATA_VALIDATION_DRIFT_REPORT_DIR,
32 |                                                DATA_VALIDATION_DRIFT_REPORT_FILE_NAME)
33 |     
34 | 
35 | 
36 | 
37 | @dataclass
38 | class DataTransformationConfig:
39 |     data_transformation_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_TRANSFORMATION_DIR_NAME)
40 |     transformed_train_file_path: str = os.path.join(data_transformation_dir, DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR,
41 |                                                     TRAIN_FILE_NAME.replace("csv", "npy"))
42 |     transformed_test_file_path: str = os.path.join(data_transformation_dir, DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR,
43 |                                                    TEST_FILE_NAME.replace("csv", "npy"))
44 |     transformed_object_file_path: str = os.path.join(data_transformation_dir,
45 |                                                      DATA_TRANSFORMATION_TRANSFORMED_OBJECT_DIR,
46 |                                                      PREPROCSSING_OBJECT_FILE_NAME)
47 |     
48 | 
49 | 
50 | 
51 | @dataclass
52 | class ModelTrainerConfig:
53 |     model_trainer_dir: str = os.path.join(training_pipeline_config.artifact_dir, MODEL_TRAINER_DIR_NAME)
54 |     trained_model_file_path: str = os.path.join(model_trainer_dir, MODEL_TRAINER_TRAINED_MODEL_DIR, MODEL_FILE_NAME)
55 |     expected_accuracy: float = MODEL_TRAINER_EXPECTED_SCORE
56 |     model_config_file_path: str = MODEL_TRAINER_MODEL_CONFIG_FILE_PATH
57 | 
58 | 
59 | 
60 | @dataclass
61 | class ModelEvaluationConfig:
62 |     changed_threshold_score: float = MODEL_EVALUATION_CHANGED_THRESHOLD_SCORE
63 |     bucket_name: str = MODEL_BUCKET_NAME
64 |     s3_model_key_path: str = MODEL_FILE_NAME
65 | 
66 | 
67 | 
68 | @dataclass
69 | class ModelPusherConfig:
70 |     bucket_name: str = MODEL_BUCKET_NAME
71 |     s3_model_key_path: str = MODEL_FILE_NAME
72 | 
73 | 
74 | 
75 | @dataclass
76 | class USvisaPredictorConfig:
77 |     model_file_path: str = MODEL_FILE_NAME
78 |     model_bucket_name: str = MODEL_BUCKET_NAME
79 | 
80 | 


--------------------------------------------------------------------------------
/us_visa/utils/main_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import numpy as np
  5 | import dill
  6 | import yaml
  7 | from pandas import DataFrame
  8 | 
  9 | from us_visa.exception import USvisaException
 10 | from us_visa.logger import logging
 11 | 
 12 | 
 13 | def read_yaml_file(file_path: str) -> dict:
 14 |     try:
 15 |         with open(file_path, "rb") as yaml_file:
 16 |             return yaml.safe_load(yaml_file)
 17 | 
 18 |     except Exception as e:
 19 |         raise USvisaException(e, sys) from e
 20 | 
 21 | 
 22 | def write_yaml_file(file_path: str, content: object, replace: bool = False) -> None:
 23 |     try:
 24 |         if replace:
 25 |             if os.path.exists(file_path):
 26 |                 os.remove(file_path)
 27 |         os.makedirs(os.path.dirname(file_path), exist_ok=True)
 28 |         with open(file_path, "w") as file:
 29 |             yaml.dump(content, file)
 30 |     except Exception as e:
 31 |         raise USvisaException(e, sys) from e
 32 | 
 33 | 
 34 | def load_object(file_path: str) -> object:
 35 |     logging.info("Entered the load_object method of utils")
 36 | 
 37 |     try:
 38 | 
 39 |         with open(file_path, "rb") as file_obj:
 40 |             obj = dill.load(file_obj)
 41 | 
 42 |         logging.info("Exited the load_object method of utils")
 43 | 
 44 |         return obj
 45 | 
 46 |     except Exception as e:
 47 |         raise USvisaException(e, sys) from e
 48 | 
 49 | def save_numpy_array_data(file_path: str, array: np.array):
 50 |     """
 51 |     Save numpy array data to file
 52 |     file_path: str location of file to save
 53 |     array: np.array data to save
 54 |     """
 55 |     try:
 56 |         dir_path = os.path.dirname(file_path)
 57 |         os.makedirs(dir_path, exist_ok=True)
 58 |         with open(file_path, 'wb') as file_obj:
 59 |             np.save(file_obj, array)
 60 |     except Exception as e:
 61 |         raise USvisaException(e, sys) from e
 62 | 
 63 | 
 64 | def load_numpy_array_data(file_path: str) -> np.array:
 65 |     """
 66 |     load numpy array data from file
 67 |     file_path: str location of file to load
 68 |     return: np.array data loaded
 69 |     """
 70 |     try:
 71 |         with open(file_path, 'rb') as file_obj:
 72 |             return np.load(file_obj)
 73 |     except Exception as e:
 74 |         raise USvisaException(e, sys) from e
 75 | 
 76 | 
 77 | def save_object(file_path: str, obj: object) -> None:
 78 |     logging.info("Entered the save_object method of utils")
 79 | 
 80 |     try:
 81 |         os.makedirs(os.path.dirname(file_path), exist_ok=True)
 82 |         with open(file_path, "wb") as file_obj:
 83 |             dill.dump(obj, file_obj)
 84 | 
 85 |         logging.info("Exited the save_object method of utils")
 86 | 
 87 |     except Exception as e:
 88 |         raise USvisaException(e, sys) from e
 89 | 
 90 | 
 91 | def drop_columns(df: DataFrame, cols: list)-> DataFrame:
 92 | 
 93 |     """
 94 |     drop the columns form a pandas DataFrame
 95 |     df: pandas DataFrame
 96 |     cols: list of columns to be dropped
 97 |     """
 98 |     logging.info("Entered drop_columns methon of utils")
 99 | 
100 |     try:
101 |         df = df.drop(columns=cols, axis=1)
102 | 
103 |         logging.info("Exited the drop_columns method of utils")
104 |         
105 |         return df
106 |     except Exception as e:
107 |         raise USvisaException(e, sys) from e


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | artifact/*
162 | 


--------------------------------------------------------------------------------
/us_visa/pipline/prediction_pipeline.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | from us_visa.entity.config_entity import USvisaPredictorConfig
  7 | from us_visa.entity.s3_estimator import USvisaEstimator
  8 | from us_visa.exception import USvisaException
  9 | from us_visa.logger import logging
 10 | from us_visa.utils.main_utils import read_yaml_file
 11 | from pandas import DataFrame
 12 | 
 13 | 
 14 | class USvisaData:
 15 |     def __init__(self,
 16 |                 continent,
 17 |                 education_of_employee,
 18 |                 has_job_experience,
 19 |                 requires_job_training,
 20 |                 no_of_employees,
 21 |                 region_of_employment,
 22 |                 prevailing_wage,
 23 |                 unit_of_wage,
 24 |                 full_time_position,
 25 |                 company_age
 26 |                 ):
 27 |         """
 28 |         Usvisa Data constructor
 29 |         Input: all features of the trained model for prediction
 30 |         """
 31 |         try:
 32 |             self.continent = continent
 33 |             self.education_of_employee = education_of_employee
 34 |             self.has_job_experience = has_job_experience
 35 |             self.requires_job_training = requires_job_training
 36 |             self.no_of_employees = no_of_employees
 37 |             self.region_of_employment = region_of_employment
 38 |             self.prevailing_wage = prevailing_wage
 39 |             self.unit_of_wage = unit_of_wage
 40 |             self.full_time_position = full_time_position
 41 |             self.company_age = company_age
 42 | 
 43 | 
 44 |         except Exception as e:
 45 |             raise USvisaException(e, sys) from e
 46 | 
 47 |     def get_usvisa_input_data_frame(self)-> DataFrame:
 48 |         """
 49 |         This function returns a DataFrame from USvisaData class input
 50 |         """
 51 |         try:
 52 |             
 53 |             usvisa_input_dict = self.get_usvisa_data_as_dict()
 54 |             return DataFrame(usvisa_input_dict)
 55 |         
 56 |         except Exception as e:
 57 |             raise USvisaException(e, sys) from e
 58 | 
 59 | 
 60 |     def get_usvisa_data_as_dict(self):
 61 |         """
 62 |         This function returns a dictionary from USvisaData class input 
 63 |         """
 64 |         logging.info("Entered get_usvisa_data_as_dict method as USvisaData class")
 65 | 
 66 |         try:
 67 |             input_data = {
 68 |                 "continent": [self.continent],
 69 |                 "education_of_employee": [self.education_of_employee],
 70 |                 "has_job_experience": [self.has_job_experience],
 71 |                 "requires_job_training": [self.requires_job_training],
 72 |                 "no_of_employees": [self.no_of_employees],
 73 |                 "region_of_employment": [self.region_of_employment],
 74 |                 "prevailing_wage": [self.prevailing_wage],
 75 |                 "unit_of_wage": [self.unit_of_wage],
 76 |                 "full_time_position": [self.full_time_position],
 77 |                 "company_age": [self.company_age],
 78 |             }
 79 | 
 80 |             logging.info("Created usvisa data dict")
 81 | 
 82 |             logging.info("Exited get_usvisa_data_as_dict method as USvisaData class")
 83 | 
 84 |             return input_data
 85 | 
 86 |         except Exception as e:
 87 |             raise USvisaException(e, sys) from e
 88 | 
 89 | class USvisaClassifier:
 90 |     def __init__(self,prediction_pipeline_config: USvisaPredictorConfig = USvisaPredictorConfig(),) -> None:
 91 |         """
 92 |         :param prediction_pipeline_config: Configuration for prediction the value
 93 |         """
 94 |         try:
 95 |             # self.schema_config = read_yaml_file(SCHEMA_FILE_PATH)
 96 |             self.prediction_pipeline_config = prediction_pipeline_config
 97 |         except Exception as e:
 98 |             raise USvisaException(e, sys)
 99 | 
100 |     def predict(self, dataframe) -> str:
101 |         """
102 |         This is the method of USvisaClassifier
103 |         Returns: Prediction in string format
104 |         """
105 |         try:
106 |             logging.info("Entered predict method of USvisaClassifier class")
107 |             model = USvisaEstimator(
108 |                 bucket_name=self.prediction_pipeline_config.model_bucket_name,
109 |                 model_path=self.prediction_pipeline_config.model_file_path,
110 |             )
111 |             result =  model.predict(dataframe)
112 |             
113 |             return result
114 |         
115 |         except Exception as e:
116 |             raise USvisaException(e, sys)


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from fastapi import FastAPI, Request
  3 | from fastapi.middleware.cors import CORSMiddleware
  4 | from fastapi.responses import Response
  5 | from fastapi.staticfiles import StaticFiles
  6 | from fastapi.templating import Jinja2Templates
  7 | from starlette.responses import HTMLResponse, RedirectResponse
  8 | from uvicorn import run as app_run
  9 | 
 10 | from typing import Optional
 11 | 
 12 | from us_visa.constants import APP_HOST, APP_PORT
 13 | from us_visa.pipline.prediction_pipeline import USvisaData, USvisaClassifier
 14 | from us_visa.pipline.training_pipeline import TrainPipeline
 15 | 
 16 | app = FastAPI()
 17 | 
 18 | app.mount("/static", StaticFiles(directory="static"), name="static")
 19 | 
 20 | templates = Jinja2Templates(directory='templates')
 21 | 
 22 | origins = ["*"]
 23 | 
 24 | app.add_middleware(
 25 |     CORSMiddleware,
 26 |     allow_origins=origins,
 27 |     allow_credentials=True,
 28 |     allow_methods=["*"],
 29 |     allow_headers=["*"],
 30 | )
 31 | 
 32 | class DataForm:
 33 |     def __init__(self, request: Request):
 34 |         self.request: Request = request
 35 |         self.continent: Optional[str] = None
 36 |         self.education_of_employee: Optional[str] = None
 37 |         self.has_job_experience: Optional[str] = None
 38 |         self.requires_job_training: Optional[str] = None
 39 |         self.no_of_employees: Optional[str] = None
 40 |         self.company_age: Optional[str] = None
 41 |         self.region_of_employment: Optional[str] = None
 42 |         self.prevailing_wage: Optional[str] = None
 43 |         self.unit_of_wage: Optional[str] = None
 44 |         self.full_time_position: Optional[str] = None
 45 |         
 46 | 
 47 |     async def get_usvisa_data(self):
 48 |         form = await self.request.form()
 49 |         self.continent = form.get("continent")
 50 |         self.education_of_employee = form.get("education_of_employee")
 51 |         self.has_job_experience = form.get("has_job_experience")
 52 |         self.requires_job_training = form.get("requires_job_training")
 53 |         self.no_of_employees = form.get("no_of_employees")
 54 |         self.company_age = form.get("company_age")
 55 |         self.region_of_employment = form.get("region_of_employment")
 56 |         self.prevailing_wage = form.get("prevailing_wage")
 57 |         self.unit_of_wage = form.get("unit_of_wage")
 58 |         self.full_time_position = form.get("full_time_position")
 59 | 
 60 | @app.get("/", tags=["authentication"])
 61 | async def index(request: Request):
 62 | 
 63 |     return templates.TemplateResponse(
 64 |             "usvisa.html",{"request": request, "context": "Rendering"})
 65 | 
 66 | 
 67 | @app.get("/train")
 68 | async def trainRouteClient():
 69 |     try:
 70 |         train_pipeline = TrainPipeline()
 71 | 
 72 |         train_pipeline.run_pipeline()
 73 | 
 74 |         return Response("Training successful !!")
 75 | 
 76 |     except Exception as e:
 77 |         return Response(f"Error Occurred! {e}")
 78 | 
 79 | 
 80 | @app.post("/")
 81 | async def predictRouteClient(request: Request):
 82 |     try:
 83 |         form = DataForm(request)
 84 |         await form.get_usvisa_data()
 85 |         
 86 |         usvisa_data = USvisaData(
 87 |                                 continent= form.continent,
 88 |                                 education_of_employee = form.education_of_employee,
 89 |                                 has_job_experience = form.has_job_experience,
 90 |                                 requires_job_training = form.requires_job_training,
 91 |                                 no_of_employees= form.no_of_employees,
 92 |                                 company_age= form.company_age,
 93 |                                 region_of_employment = form.region_of_employment,
 94 |                                 prevailing_wage= form.prevailing_wage,
 95 |                                 unit_of_wage= form.unit_of_wage,
 96 |                                 full_time_position= form.full_time_position,
 97 |                                 )
 98 |         
 99 |         usvisa_df = usvisa_data.get_usvisa_input_data_frame()
100 | 
101 |         model_predictor = USvisaClassifier()
102 | 
103 |         value = model_predictor.predict(dataframe=usvisa_df)[0]
104 | 
105 |         status = None
106 |         if value == 1:
107 |             status = "Visa-approved"
108 |         else:
109 |             status = "Visa Not-Approved"
110 | 
111 |         return templates.TemplateResponse(
112 |             "usvisa.html",
113 |             {"request": request, "context": status},
114 |         )
115 |         
116 |     except Exception as e:
117 |         return {"status": False, "error": f"{e}"}
118 | 
119 | 
120 | if __name__ == "__main__":
121 |     app_run(app, host=APP_HOST, port=APP_PORT)


--------------------------------------------------------------------------------
/us_visa/components/model_trainer.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from typing import Tuple
 3 | 
 4 | import numpy as np
 5 | import pandas as pd
 6 | from pandas import DataFrame
 7 | from sklearn.pipeline import Pipeline
 8 | from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
 9 | from neuro_mf  import ModelFactory
10 | 
11 | from us_visa.exception import USvisaException
12 | from us_visa.logger import logging
13 | from us_visa.utils.main_utils import load_numpy_array_data, read_yaml_file, load_object, save_object
14 | from us_visa.entity.config_entity import ModelTrainerConfig
15 | from us_visa.entity.artifact_entity import DataTransformationArtifact, ModelTrainerArtifact, ClassificationMetricArtifact
16 | from us_visa.entity.estimator import USvisaModel
17 | 
18 | class ModelTrainer:
19 |     def __init__(self, data_transformation_artifact: DataTransformationArtifact,
20 |                  model_trainer_config: ModelTrainerConfig):
21 |         """
22 |         :param data_ingestion_artifact: Output reference of data ingestion artifact stage
23 |         :param data_transformation_config: Configuration for data transformation
24 |         """
25 |         self.data_transformation_artifact = data_transformation_artifact
26 |         self.model_trainer_config = model_trainer_config
27 | 
28 |     def get_model_object_and_report(self, train: np.array, test: np.array) -> Tuple[object, object]:
29 |         """
30 |         Method Name :   get_model_object_and_report
31 |         Description :   This function uses neuro_mf to get the best model object and report of the best model
32 |         
33 |         Output      :   Returns metric artifact object and best model object
34 |         On Failure  :   Write an exception log and then raise an exception
35 |         """
36 |         try:
37 |             logging.info("Using neuro_mf to get best model object and report")
38 |             model_factory = ModelFactory(model_config_path=self.model_trainer_config.model_config_file_path)
39 |             
40 |             x_train, y_train, x_test, y_test = train[:, :-1], train[:, -1], test[:, :-1], test[:, -1]
41 | 
42 |             best_model_detail = model_factory.get_best_model(
43 |                 X=x_train,y=y_train,base_accuracy=self.model_trainer_config.expected_accuracy
44 |             )
45 |             model_obj = best_model_detail.best_model
46 | 
47 |             y_pred = model_obj.predict(x_test)
48 |             
49 |             accuracy = accuracy_score(y_test, y_pred) 
50 |             f1 = f1_score(y_test, y_pred)  
51 |             precision = precision_score(y_test, y_pred)  
52 |             recall = recall_score(y_test, y_pred)
53 |             metric_artifact = ClassificationMetricArtifact(f1_score=f1, precision_score=precision, recall_score=recall)
54 |             
55 |             return best_model_detail, metric_artifact
56 |         
57 |         except Exception as e:
58 |             raise USvisaException(e, sys) from e
59 |         
60 | 
61 |     def initiate_model_trainer(self, ) -> ModelTrainerArtifact:
62 |         logging.info("Entered initiate_model_trainer method of ModelTrainer class")
63 |         """
64 |         Method Name :   initiate_model_trainer
65 |         Description :   This function initiates a model trainer steps
66 |         
67 |         Output      :   Returns model trainer artifact
68 |         On Failure  :   Write an exception log and then raise an exception
69 |         """
70 |         try:
71 |             train_arr = load_numpy_array_data(file_path=self.data_transformation_artifact.transformed_train_file_path)
72 |             test_arr = load_numpy_array_data(file_path=self.data_transformation_artifact.transformed_test_file_path)
73 |             
74 |             best_model_detail ,metric_artifact = self.get_model_object_and_report(train=train_arr, test=test_arr)
75 |             
76 |             preprocessing_obj = load_object(file_path=self.data_transformation_artifact.transformed_object_file_path)
77 | 
78 | 
79 |             if best_model_detail.best_score < self.model_trainer_config.expected_accuracy:
80 |                 logging.info("No best model found with score more than base score")
81 |                 raise Exception("No best model found with score more than base score")
82 | 
83 |             usvisa_model = USvisaModel(preprocessing_object=preprocessing_obj,
84 |                                        trained_model_object=best_model_detail.best_model)
85 |             logging.info("Created usvisa model object with preprocessor and model")
86 |             logging.info("Created best model file path.")
87 |             save_object(self.model_trainer_config.trained_model_file_path, usvisa_model)
88 | 
89 |             model_trainer_artifact = ModelTrainerArtifact(
90 |                 trained_model_file_path=self.model_trainer_config.trained_model_file_path,
91 |                 metric_artifact=metric_artifact,
92 |             )
93 |             logging.info(f"Model trainer artifact: {model_trainer_artifact}")
94 |             return model_trainer_artifact
95 |         except Exception as e:
96 |             raise USvisaException(e, sys) from e


--------------------------------------------------------------------------------
/us_visa/components/data_ingestion.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | from pandas import DataFrame
  5 | from sklearn.model_selection import train_test_split
  6 | 
  7 | from us_visa.entity.config_entity import DataIngestionConfig
  8 | from us_visa.entity.artifact_entity import DataIngestionArtifact
  9 | from us_visa.exception import USvisaException
 10 | from us_visa.logger import logging
 11 | from us_visa.data_access.usvisa_data import USvisaData
 12 | 
 13 | class DataIngestion:
 14 |     def __init__(self,data_ingestion_config:DataIngestionConfig=DataIngestionConfig()):
 15 |         """
 16 |         :param data_ingestion_config: configuration for data ingestion
 17 |         """
 18 |         try:
 19 |             self.data_ingestion_config = data_ingestion_config
 20 |         except Exception as e:
 21 |             raise USvisaException(e,sys)
 22 | 
 23 |     def export_data_into_feature_store(self)->DataFrame:
 24 |         """
 25 |         Method Name :   export_data_into_feature_store
 26 |         Description :   This method exports data from mongodb to csv file
 27 |         
 28 |         Output      :   data is returned as artifact of data ingestion components
 29 |         On Failure  :   Write an exception log and then raise an exception
 30 |         """
 31 |         try:
 32 |             logging.info(f"Exporting data from mongodb")
 33 |             usvisa_data = USvisaData()
 34 |             dataframe = usvisa_data.export_collection_as_dataframe(collection_name=
 35 |                                                                    self.data_ingestion_config.collection_name)
 36 |             logging.info(f"Shape of dataframe: {dataframe.shape}")
 37 |             feature_store_file_path  = self.data_ingestion_config.feature_store_file_path
 38 |             dir_path = os.path.dirname(feature_store_file_path)
 39 |             os.makedirs(dir_path,exist_ok=True)
 40 |             logging.info(f"Saving exported data into feature store file path: {feature_store_file_path}")
 41 |             dataframe.to_csv(feature_store_file_path,index=False,header=True)
 42 |             return dataframe
 43 | 
 44 |         except Exception as e:
 45 |             raise USvisaException(e,sys)
 46 | 
 47 |     def split_data_as_train_test(self,dataframe: DataFrame) ->None:
 48 |         """
 49 |         Method Name :   split_data_as_train_test
 50 |         Description :   This method splits the dataframe into train set and test set based on split ratio 
 51 |         
 52 |         Output      :   Folder is created in s3 bucket
 53 |         On Failure  :   Write an exception log and then raise an exception
 54 |         """
 55 |         logging.info("Entered split_data_as_train_test method of Data_Ingestion class")
 56 | 
 57 |         try:
 58 |             train_set, test_set = train_test_split(dataframe, test_size=self.data_ingestion_config.train_test_split_ratio)
 59 |             logging.info("Performed train test split on the dataframe")
 60 |             logging.info(
 61 |                 "Exited split_data_as_train_test method of Data_Ingestion class"
 62 |             )
 63 |             dir_path = os.path.dirname(self.data_ingestion_config.training_file_path)
 64 |             os.makedirs(dir_path,exist_ok=True)
 65 |             
 66 |             logging.info(f"Exporting train and test file path.")
 67 |             train_set.to_csv(self.data_ingestion_config.training_file_path,index=False,header=True)
 68 |             test_set.to_csv(self.data_ingestion_config.testing_file_path,index=False,header=True)
 69 | 
 70 |             logging.info(f"Exported train and test file path.")
 71 |         except Exception as e:
 72 |             raise USvisaException(e, sys) from e
 73 | 
 74 |     def initiate_data_ingestion(self) ->DataIngestionArtifact:
 75 |         """
 76 |         Method Name :   initiate_data_ingestion
 77 |         Description :   This method initiates the data ingestion components of training pipeline 
 78 |         
 79 |         Output      :   train set and test set are returned as the artifacts of data ingestion components
 80 |         On Failure  :   Write an exception log and then raise an exception
 81 |         """
 82 |         logging.info("Entered initiate_data_ingestion method of Data_Ingestion class")
 83 | 
 84 |         try:
 85 |             dataframe = self.export_data_into_feature_store()
 86 | 
 87 |             logging.info("Got the data from mongodb")
 88 | 
 89 |             self.split_data_as_train_test(dataframe)
 90 | 
 91 |             logging.info("Performed train test split on the dataset")
 92 | 
 93 |             logging.info(
 94 |                 "Exited initiate_data_ingestion method of Data_Ingestion class"
 95 |             )
 96 | 
 97 |             data_ingestion_artifact = DataIngestionArtifact(trained_file_path=self.data_ingestion_config.training_file_path,
 98 |             test_file_path=self.data_ingestion_config.testing_file_path)
 99 |             
100 |             logging.info(f"Data ingestion artifact: {data_ingestion_artifact}")
101 |             return data_ingestion_artifact
102 |         except Exception as e:
103 |             raise USvisaException(e, sys) from e


--------------------------------------------------------------------------------
/us_visa/components/model_evaluation.py:
--------------------------------------------------------------------------------
  1 | from us_visa.entity.config_entity import ModelEvaluationConfig
  2 | from us_visa.entity.artifact_entity import ModelTrainerArtifact, DataIngestionArtifact, ModelEvaluationArtifact
  3 | from sklearn.metrics import f1_score
  4 | from us_visa.exception import USvisaException
  5 | from us_visa.constants import TARGET_COLUMN, CURRENT_YEAR
  6 | from us_visa.logger import logging
  7 | import sys
  8 | import pandas as pd
  9 | from typing import Optional
 10 | from us_visa.entity.s3_estimator import USvisaEstimator
 11 | from dataclasses import dataclass
 12 | from us_visa.entity.estimator import USvisaModel
 13 | from us_visa.entity.estimator import TargetValueMapping
 14 | 
 15 | @dataclass
 16 | class EvaluateModelResponse:
 17 |     trained_model_f1_score: float
 18 |     best_model_f1_score: float
 19 |     is_model_accepted: bool
 20 |     difference: float
 21 | 
 22 | 
 23 | class ModelEvaluation:
 24 | 
 25 |     def __init__(self, model_eval_config: ModelEvaluationConfig, data_ingestion_artifact: DataIngestionArtifact,
 26 |                  model_trainer_artifact: ModelTrainerArtifact):
 27 |         try:
 28 |             self.model_eval_config = model_eval_config
 29 |             self.data_ingestion_artifact = data_ingestion_artifact
 30 |             self.model_trainer_artifact = model_trainer_artifact
 31 |         except Exception as e:
 32 |             raise USvisaException(e, sys) from e
 33 | 
 34 |     def get_best_model(self) -> Optional[USvisaEstimator]:
 35 |         """
 36 |         Method Name :   get_best_model
 37 |         Description :   This function is used to get model in production
 38 |         
 39 |         Output      :   Returns model object if available in s3 storage
 40 |         On Failure  :   Write an exception log and then raise an exception
 41 |         """
 42 |         try:
 43 |             bucket_name = self.model_eval_config.bucket_name
 44 |             model_path=self.model_eval_config.s3_model_key_path
 45 |             usvisa_estimator = USvisaEstimator(bucket_name=bucket_name,
 46 |                                                model_path=model_path)
 47 | 
 48 |             if usvisa_estimator.is_model_present(model_path=model_path):
 49 |                 return usvisa_estimator
 50 |             return None
 51 |         except Exception as e:
 52 |             raise  USvisaException(e,sys)
 53 | 
 54 |     def evaluate_model(self) -> EvaluateModelResponse:
 55 |         """
 56 |         Method Name :   evaluate_model
 57 |         Description :   This function is used to evaluate trained model 
 58 |                         with production model and choose best model 
 59 |         
 60 |         Output      :   Returns bool value based on validation results
 61 |         On Failure  :   Write an exception log and then raise an exception
 62 |         """
 63 |         try:
 64 |             test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path)
 65 |             test_df['company_age'] = CURRENT_YEAR-test_df['yr_of_estab']
 66 | 
 67 |             x, y = test_df.drop(TARGET_COLUMN, axis=1), test_df[TARGET_COLUMN]
 68 |             y = y.replace(
 69 |                 TargetValueMapping()._asdict()
 70 |             )
 71 | 
 72 |             # trained_model = load_object(file_path=self.model_trainer_artifact.trained_model_file_path)
 73 |             trained_model_f1_score = self.model_trainer_artifact.metric_artifact.f1_score
 74 | 
 75 |             best_model_f1_score=None
 76 |             best_model = self.get_best_model()
 77 |             if best_model is not None:
 78 |                 y_hat_best_model = best_model.predict(x)
 79 |                 best_model_f1_score = f1_score(y, y_hat_best_model)
 80 |             
 81 |             tmp_best_model_score = 0 if best_model_f1_score is None else best_model_f1_score
 82 |             result = EvaluateModelResponse(trained_model_f1_score=trained_model_f1_score,
 83 |                                            best_model_f1_score=best_model_f1_score,
 84 |                                            is_model_accepted=trained_model_f1_score > tmp_best_model_score,
 85 |                                            difference=trained_model_f1_score - tmp_best_model_score
 86 |                                            )
 87 |             logging.info(f"Result: {result}")
 88 |             return result
 89 | 
 90 |         except Exception as e:
 91 |             raise USvisaException(e, sys)
 92 | 
 93 |     def initiate_model_evaluation(self) -> ModelEvaluationArtifact:
 94 |         """
 95 |         Method Name :   initiate_model_evaluation
 96 |         Description :   This function is used to initiate all steps of the model evaluation
 97 |         
 98 |         Output      :   Returns model evaluation artifact
 99 |         On Failure  :   Write an exception log and then raise an exception
100 |         """  
101 |         try:
102 |             evaluate_model_response = self.evaluate_model()
103 |             s3_model_path = self.model_eval_config.s3_model_key_path
104 | 
105 |             model_evaluation_artifact = ModelEvaluationArtifact(
106 |                 is_model_accepted=evaluate_model_response.is_model_accepted,
107 |                 s3_model_path=s3_model_path,
108 |                 trained_model_path=self.model_trainer_artifact.trained_model_file_path,
109 |                 changed_accuracy=evaluate_model_response.difference)
110 | 
111 |             logging.info(f"Model evaluation artifact: {model_evaluation_artifact}")
112 |             return model_evaluation_artifact
113 |         except Exception as e:
114 |             raise USvisaException(e, sys) from e


--------------------------------------------------------------------------------
/us_visa/components/data_validation.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import sys
  3 | 
  4 | import pandas as pd
  5 | from evidently.model_profile import Profile
  6 | from evidently.model_profile.sections import DataDriftProfileSection
  7 | 
  8 | from pandas import DataFrame
  9 | 
 10 | from us_visa.exception import USvisaException
 11 | from us_visa.logger import logging
 12 | from us_visa.utils.main_utils import read_yaml_file, write_yaml_file
 13 | from us_visa.entity.artifact_entity import DataIngestionArtifact, DataValidationArtifact
 14 | from us_visa.entity.config_entity import DataValidationConfig
 15 | from us_visa.constants import SCHEMA_FILE_PATH
 16 | 
 17 | 
 18 | class DataValidation:
 19 |     def __init__(self, data_ingestion_artifact: DataIngestionArtifact, data_validation_config: DataValidationConfig):
 20 |         """
 21 |         :param data_ingestion_artifact: Output reference of data ingestion artifact stage
 22 |         :param data_validation_config: configuration for data validation
 23 |         """
 24 |         try:
 25 |             self.data_ingestion_artifact = data_ingestion_artifact
 26 |             self.data_validation_config = data_validation_config
 27 |             self._schema_config =read_yaml_file(file_path=SCHEMA_FILE_PATH)
 28 |         except Exception as e:
 29 |             raise USvisaException(e,sys)
 30 | 
 31 |     def validate_number_of_columns(self, dataframe: DataFrame) -> bool:
 32 |         """
 33 |         Method Name :   validate_number_of_columns
 34 |         Description :   This method validates the number of columns
 35 |         
 36 |         Output      :   Returns bool value based on validation results
 37 |         On Failure  :   Write an exception log and then raise an exception
 38 |         """
 39 |         try:
 40 |             status = len(dataframe.columns) == len(self._schema_config["columns"])
 41 |             logging.info(f"Is required column present: [{status}]")
 42 |             return status
 43 |         except Exception as e:
 44 |             raise USvisaException(e, sys)
 45 | 
 46 |     def is_column_exist(self, df: DataFrame) -> bool:
 47 |         """
 48 |         Method Name :   is_column_exist
 49 |         Description :   This method validates the existence of a numerical and categorical columns
 50 |         
 51 |         Output      :   Returns bool value based on validation results
 52 |         On Failure  :   Write an exception log and then raise an exception
 53 |         """
 54 |         try:
 55 |             dataframe_columns = df.columns
 56 |             missing_numerical_columns = []
 57 |             missing_categorical_columns = []
 58 |             for column in self._schema_config["numerical_columns"]:
 59 |                 if column not in dataframe_columns:
 60 |                     missing_numerical_columns.append(column)
 61 | 
 62 |             if len(missing_numerical_columns)>0:
 63 |                 logging.info(f"Missing numerical column: {missing_numerical_columns}")
 64 | 
 65 | 
 66 |             for column in self._schema_config["categorical_columns"]:
 67 |                 if column not in dataframe_columns:
 68 |                     missing_categorical_columns.append(column)
 69 | 
 70 |             if len(missing_categorical_columns)>0:
 71 |                 logging.info(f"Missing categorical column: {missing_categorical_columns}")
 72 | 
 73 |             return False if len(missing_categorical_columns)>0 or len(missing_numerical_columns)>0 else True
 74 |         except Exception as e:
 75 |             raise USvisaException(e, sys) from e
 76 | 
 77 |     @staticmethod
 78 |     def read_data(file_path) -> DataFrame:
 79 |         try:
 80 |             return pd.read_csv(file_path)
 81 |         except Exception as e:
 82 |             raise USvisaException(e, sys)
 83 | 
 84 |     def detect_dataset_drift(self, reference_df: DataFrame, current_df: DataFrame, ) -> bool:
 85 |         """
 86 |         Method Name :   detect_dataset_drift
 87 |         Description :   This method validates if drift is detected
 88 |         
 89 |         Output      :   Returns bool value based on validation results
 90 |         On Failure  :   Write an exception log and then raise an exception
 91 |         """
 92 |         try:
 93 |             data_drift_profile = Profile(sections=[DataDriftProfileSection()])
 94 | 
 95 |             data_drift_profile.calculate(reference_df, current_df)
 96 | 
 97 |             report = data_drift_profile.json()
 98 |             json_report = json.loads(report)
 99 | 
100 |             write_yaml_file(file_path=self.data_validation_config.drift_report_file_path, content=json_report)
101 | 
102 |             n_features = json_report["data_drift"]["data"]["metrics"]["n_features"]
103 |             n_drifted_features = json_report["data_drift"]["data"]["metrics"]["n_drifted_features"]
104 | 
105 |             logging.info(f"{n_drifted_features}/{n_features} drift detected.")
106 |             drift_status = json_report["data_drift"]["data"]["metrics"]["dataset_drift"]
107 |             return drift_status
108 |         except Exception as e:
109 |             raise USvisaException(e, sys) from e
110 | 
111 |     def initiate_data_validation(self) -> DataValidationArtifact:
112 |         """
113 |         Method Name :   initiate_data_validation
114 |         Description :   This method initiates the data validation component for the pipeline
115 |         
116 |         Output      :   Returns bool value based on validation results
117 |         On Failure  :   Write an exception log and then raise an exception
118 |         """
119 | 
120 |         try:
121 |             validation_error_msg = ""
122 |             logging.info("Starting data validation")
123 |             train_df, test_df = (DataValidation.read_data(file_path=self.data_ingestion_artifact.trained_file_path),
124 |                                  DataValidation.read_data(file_path=self.data_ingestion_artifact.test_file_path))
125 | 
126 |             status = self.validate_number_of_columns(dataframe=train_df)
127 |             logging.info(f"All required columns present in training dataframe: {status}")
128 |             if not status:
129 |                 validation_error_msg += f"Columns are missing in training dataframe."
130 |             status = self.validate_number_of_columns(dataframe=test_df)
131 | 
132 |             logging.info(f"All required columns present in testing dataframe: {status}")
133 |             if not status:
134 |                 validation_error_msg += f"Columns are missing in test dataframe."
135 | 
136 |             status = self.is_column_exist(df=train_df)
137 | 
138 |             if not status:
139 |                 validation_error_msg += f"Columns are missing in training dataframe."
140 |             status = self.is_column_exist(df=test_df)
141 | 
142 |             if not status:
143 |                 validation_error_msg += f"columns are missing in test dataframe."
144 | 
145 |             validation_status = len(validation_error_msg) == 0
146 | 
147 |             if validation_status:
148 |                 drift_status = self.detect_dataset_drift(train_df, test_df)
149 |                 if drift_status:
150 |                     logging.info(f"Drift detected.")
151 |                     validation_error_msg = "Drift detected"
152 |                 else:
153 |                     validation_error_msg = "Drift not detected"
154 |             else:
155 |                 logging.info(f"Validation_error: {validation_error_msg}")
156 |                 
157 | 
158 |             data_validation_artifact = DataValidationArtifact(
159 |                 validation_status=validation_status,
160 |                 message=validation_error_msg,
161 |                 drift_report_file_path=self.data_validation_config.drift_report_file_path
162 |             )
163 | 
164 |             logging.info(f"Data validation artifact: {data_validation_artifact}")
165 |             return data_validation_artifact
166 |         except Exception as e:
167 |             raise USvisaException(e, sys) from e


--------------------------------------------------------------------------------
/us_visa/pipline/training_pipeline.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from us_visa.exception import USvisaException
  3 | from us_visa.logger import logging
  4 | 
  5 | from us_visa.components.data_ingestion import DataIngestion
  6 | from us_visa.components.data_validation import DataValidation
  7 | from us_visa.components.data_transformation import DataTransformation
  8 | from us_visa.components.model_trainer import ModelTrainer
  9 | from us_visa.components.model_evaluation import ModelEvaluation
 10 | from us_visa.components.model_pusher import ModelPusher
 11 | 
 12 | from us_visa.entity.config_entity import (DataIngestionConfig,
 13 |                                           DataValidationConfig,
 14 |                                           DataTransformationConfig,
 15 |                                           ModelTrainerConfig,
 16 |                                           ModelEvaluationConfig,
 17 |                                           ModelPusherConfig)
 18 |                                           
 19 | 
 20 | from us_visa.entity.artifact_entity import (DataIngestionArtifact,
 21 |                                             DataValidationArtifact,
 22 |                                             DataTransformationArtifact,
 23 |                                             ModelTrainerArtifact,
 24 |                                             ModelEvaluationArtifact,
 25 |                                             ModelPusherArtifact)
 26 | 
 27 | 
 28 | 
 29 | class TrainPipeline:
 30 |     def __init__(self):
 31 |         self.data_ingestion_config = DataIngestionConfig()
 32 |         self.data_validation_config = DataValidationConfig()
 33 |         self.data_transformation_config = DataTransformationConfig()
 34 |         self.model_trainer_config = ModelTrainerConfig()
 35 |         self.model_evaluation_config = ModelEvaluationConfig()
 36 |         self.model_pusher_config = ModelPusherConfig()
 37 | 
 38 | 
 39 |     
 40 |     def start_data_ingestion(self) -> DataIngestionArtifact:
 41 |         """
 42 |         This method of TrainPipeline class is responsible for starting data ingestion component
 43 |         """
 44 |         try:
 45 |             logging.info("Entered the start_data_ingestion method of TrainPipeline class")
 46 |             logging.info("Getting the data from mongodb")
 47 |             data_ingestion = DataIngestion(data_ingestion_config=self.data_ingestion_config)
 48 |             data_ingestion_artifact = data_ingestion.initiate_data_ingestion()
 49 |             logging.info("Got the train_set and test_set from mongodb")
 50 |             logging.info(
 51 |                 "Exited the start_data_ingestion method of TrainPipeline class"
 52 |             )
 53 |             return data_ingestion_artifact
 54 |         except Exception as e:
 55 |             raise USvisaException(e, sys) from e
 56 |         
 57 |     
 58 | 
 59 |     def start_data_validation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataValidationArtifact:
 60 |         """
 61 |         This method of TrainPipeline class is responsible for starting data validation component
 62 |         """
 63 |         logging.info("Entered the start_data_validation method of TrainPipeline class")
 64 | 
 65 |         try:
 66 |             data_validation = DataValidation(data_ingestion_artifact=data_ingestion_artifact,
 67 |                                              data_validation_config=self.data_validation_config
 68 |                                              )
 69 | 
 70 |             data_validation_artifact = data_validation.initiate_data_validation()
 71 | 
 72 |             logging.info("Performed the data validation operation")
 73 | 
 74 |             logging.info(
 75 |                 "Exited the start_data_validation method of TrainPipeline class"
 76 |             )
 77 | 
 78 |             return data_validation_artifact
 79 | 
 80 |         except Exception as e:
 81 |             raise USvisaException(e, sys) from e
 82 |         
 83 | 
 84 | 
 85 |     
 86 | 
 87 |     def start_data_transformation(self, data_ingestion_artifact: DataIngestionArtifact, data_validation_artifact: DataValidationArtifact) -> DataTransformationArtifact:
 88 |         """
 89 |         This method of TrainPipeline class is responsible for starting data transformation component
 90 |         """
 91 |         try:
 92 |             data_transformation = DataTransformation(data_ingestion_artifact=data_ingestion_artifact,
 93 |                                                      data_transformation_config=self.data_transformation_config,
 94 |                                                      data_validation_artifact=data_validation_artifact)
 95 |             data_transformation_artifact = data_transformation.initiate_data_transformation()
 96 |             return data_transformation_artifact
 97 |         except Exception as e:
 98 |             raise USvisaException(e, sys)
 99 |         
100 | 
101 |     
102 |     def start_model_trainer(self, data_transformation_artifact: DataTransformationArtifact) -> ModelTrainerArtifact:
103 |         """
104 |         This method of TrainPipeline class is responsible for starting model training
105 |         """
106 |         try:
107 |             model_trainer = ModelTrainer(data_transformation_artifact=data_transformation_artifact,
108 |                                          model_trainer_config=self.model_trainer_config
109 |                                          )
110 |             model_trainer_artifact = model_trainer.initiate_model_trainer()
111 |             return model_trainer_artifact
112 | 
113 |         except Exception as e:
114 |             raise USvisaException(e, sys)
115 |         
116 |     
117 | 
118 |     def start_model_evaluation(self, data_ingestion_artifact: DataIngestionArtifact,
119 |                                model_trainer_artifact: ModelTrainerArtifact) -> ModelEvaluationArtifact:
120 |         """
121 |         This method of TrainPipeline class is responsible for starting modle evaluation
122 |         """
123 |         try:
124 |             model_evaluation = ModelEvaluation(model_eval_config=self.model_evaluation_config,
125 |                                                data_ingestion_artifact=data_ingestion_artifact,
126 |                                                model_trainer_artifact=model_trainer_artifact)
127 |             model_evaluation_artifact = model_evaluation.initiate_model_evaluation()
128 |             return model_evaluation_artifact
129 |         except Exception as e:
130 |             raise USvisaException(e, sys)
131 |         
132 | 
133 |     
134 | 
135 |     def start_model_pusher(self, model_evaluation_artifact: ModelEvaluationArtifact) -> ModelPusherArtifact:
136 |         """
137 |         This method of TrainPipeline class is responsible for starting model pushing
138 |         """
139 |         try:
140 |             model_pusher = ModelPusher(model_evaluation_artifact=model_evaluation_artifact,
141 |                                        model_pusher_config=self.model_pusher_config
142 |                                        )
143 |             model_pusher_artifact = model_pusher.initiate_model_pusher()
144 |             return model_pusher_artifact
145 |         except Exception as e:
146 |             raise USvisaException(e, sys)
147 | 
148 |         
149 | 
150 |     
151 | 
152 |         
153 | 
154 |     
155 |     def run_pipeline(self, ) -> None:
156 |         """
157 |         This method of TrainPipeline class is responsible for running complete pipeline
158 |         """
159 |         try:
160 |             data_ingestion_artifact = self.start_data_ingestion()
161 |             data_validation_artifact = self.start_data_validation(data_ingestion_artifact=data_ingestion_artifact)
162 |             data_transformation_artifact = self.start_data_transformation(
163 |                 data_ingestion_artifact=data_ingestion_artifact, data_validation_artifact=data_validation_artifact)
164 |             model_trainer_artifact = self.start_model_trainer(data_transformation_artifact=data_transformation_artifact)
165 |             model_evaluation_artifact = self.start_model_evaluation(data_ingestion_artifact=data_ingestion_artifact,
166 |                                                                     model_trainer_artifact=model_trainer_artifact)
167 |             
168 |             if not model_evaluation_artifact.is_model_accepted:
169 |                 logging.info(f"Model not accepted.")
170 |                 return None
171 |             model_pusher_artifact = self.start_model_pusher(model_evaluation_artifact=model_evaluation_artifact)
172 | 
173 | 
174 |         
175 |         except Exception as e:
176 |             raise USvisaException(e, sys)


--------------------------------------------------------------------------------
/templates/usvisa.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 |   <head>
  4 |     <meta charset="utf-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1">
  6 |     <title>US Visa Prediction</title>
  7 |     <link rel="stylesheet" href="{{ url_for('static', path='/css/style.css')}}">
  8 |     <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-iYQeCzEYFbKjA/T2uDLTpkwGzCiq6soy8tYaI1GyVh/UjpbCx/TYkiZhlZB6+fzT" crossorigin="anonymous">
  9 |   </head>
 10 |   <body>
 11 |     <nav class="navbar navbar-light bg-light static-top">
 12 |         <div class="container-fluid">
 13 |            <a class="navbar-brand" href="https://ineuron.ai"><img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAIkAAAAlCAMAAABBJDpuAAAAVFBMVEX/fgD/iQA7NzIyMjMnNYInNYEnNYInNYEzMzInNYQnNYETJzsnNYIwN1wxMjNoTis4O1YrNm0xOF47PE9DP0dEQEVpTVkAJJ4nNYEOLJHJbiAzMzOmmmHRAAAAFnRSTlOh/vu+986oen9fP2UqEzQkFAIDAAEEILVBJQAAA3FJREFUWMPNmO2WoyAMQG0VBARtdVRs3/89NwkgaF1rd2aOm18tIrnkgwSz7n+R7LcV1ELkeS7E7VySRlyfQXJhTiO55c+liP4cEuH1X8E7gaV+S3Jv2zYZ/Vr+/Sdx2vP6doc/fSPcf/GOpLSFVXFUW8t+giRP47TOd60yk1hbxIhSP0ACKPWrvw7YBIT/lcQY84G3Wk17ugWQfg5UsQJpjflKSLQi7xTWyk0SXTJ4VnB5D1yccxmp4Z+mX5KGNS+SPXVNll0ulyxL0wYmloBB6zL/MpCoAgjAJiUM6w0SboMwPUdRogvNKcNM3smFdRvEcJJkKcflQa+X0pNIfBGW0zKqjyQtw4VLKQlI7ZLgdrRNSZDjMQx2GB4pCkxkCrfGWUTJ0Egan6muCHgJCUwtnCk0mrJ9Q8K5LUqtdSfynkAedhynaRqngVDaXLiJEAzkFkM71IuIlWRYtSQpbXSZDvQ7JPCei9brs+oaABkn74LRIkpFaVxGnzjvIxTYREOoEwn5r01JTDRTcO87Eu3zNUeTPKZpjrIRrNIDCjzALbPk7LJMIokOEQu2K7yKQAKvFDF9pVe0R+LGb7h1MMkw2ijjA/1zhSfSphvEAChXJEhAqwYStIJWXnTpvbdH4hTUwSQ2JRmQpILDLYbB7J41SYiLcNozu5Z3JMo7RxBJahI7WSRByCMkqJt9hySESbVJcvmARJGFExJmdBSl2oM2+T4JLWfgaGFptizlPUm1FydHSdyZ7UnS4ySph2kS0kJrkvr5NJg7dnrJnfxgxPozRPGtkyPNOxtaiDvbIAF9L+5B5zTI2BwlwXmM+U3zReav3o0F8oUEFFZ42McDZYomOUziFmezgWyo251Uc1nyKJol7URCEvIYUEbHYaEGQi9C9egwCdVfHwiaajfjKMUcHcRacCylhWJbJDfq47EEDhYqoIWjHnwDXZPpPiChPQelLU+Cbm4w50FmYgkvFwpiWwCC/Um/SLcNElOC9RVj6qWpCiEMvRUK4zLpdKmPw8rVldy/qxjn60wLrVLWJIPLiZJRr3fsvnOHPvare+lB93tb37T20D82jqOt+zNuXhuNvNhp7X+PpHb3rvV9R5xjE2SpyCN9ffQO+GtWIZr5Xnytz4gTMIRYfys47/uJEXny/aQ/95tSUwmU+uRvSp/IHzQqZYpYrQmxAAAAAElFTkSuQmCC" /></a>
 14 |         </div>
 15 |      </nav>
 16 |     <section>
 17 |         <div class="container-fluid">
 18 |             <div class="container">
 19 |                 <form action="/" method="post">
 20 |                     <legend>US Visa approval Form</legend>
 21 |                     <div class="mb-3">
 22 |                         <label for="continent" >continent</label>
 23 |                         <select class="form-control" id="continent" name="continent" placeholder="Continent" required="required">
 24 |                           <option value="" disabled selected>Select Continent</option>
 25 |                           <option>Asia</option>
 26 |                           <option>Africa</option>
 27 |                           <option>North America</option>
 28 |                           <option>Europe</option>
 29 |                           <option>South America</option>
 30 |                           <option>Oceania</option>
 31 |                         </select>
 32 |                       </div>
 33 |         
 34 |                       <div class="mb-3">
 35 |                         <label for="education_of_employee">Education of employee</label>
 36 |                         <select class="form-control" id="education_of_employee" name="education_of_employee" placeholder="Education" required="required">
 37 |                             <option value="" disabled selected>Select Education</option>
 38 |                             <option>High School</option>
 39 |                             <option>Master's</option>
 40 |                             <option>Bachelor's</option>
 41 |                             <option>Doctorate</option>
 42 |                         </select>
 43 |                       </div>
 44 |         
 45 |                       <div class="mb-3">
 46 |                         <label for="has_job_experience">Has Job Experience</label>
 47 |                         <select class="form-control" id="has_job_experience" name="has_job_experience" placeholder="Has Experience" required="required">
 48 |                             <option value="" disabled selected>Has Experience</option>
 49 |                             <option>N</option>
 50 |                             <option>Y</option>
 51 |                         </select>
 52 |                       </div>
 53 |         
 54 |                       <div class="mb-3">
 55 |                         <label for="requires_job_training">Requires Job Training</label>
 56 |                         <select class="form-control" id="requires_job_training" name="requires_job_training" placeholder="Training required" required="required">
 57 |                             <option value="" disabled selected>Required Training</option>
 58 |                             <option>N</option>
 59 |                             <option>Y</option>
 60 |                         </select>
 61 |                       </div>
 62 |         
 63 |                       <div class="mb-3">
 64 |                         <label for="no_of_employees">Number of Employees</label>
 65 |                         <input type="number" class="form-control" id="no_of_employees" name="no_of_employees" min="14500" max="40000" placeholder="select between 15000 to 40000" required="required">
 66 |                       </div>
 67 |                       
 68 |                       <div class="mb-3">
 69 |                         <label for="region_of_employment">Select Region</label>
 70 |                         <select class="form-control" id="region_of_employment" name="region_of_employment" placeholder="Region" required="required">
 71 |                           <option value="" disabled selected>Region</option>
 72 |                           <option>West</option>
 73 |                           <option>Northeast</option>
 74 |                           <option>South</option>
 75 |                           <option>Midwest</option>
 76 |                           <option>Island</option>
 77 |                         </select>
 78 |                       </div>
 79 |         
 80 |                       <div class="mb-3">
 81 |                         <label for="prevailing_wage">Prevailing Wage</label>
 82 |                         <input type="number" class="form-control" id="prevailing_wage" name="prevailing_wage" min="600" max="70000" placeholder="select between 700 to 7000" required="required">
 83 |                       </div>
 84 | 
 85 | 
 86 |         
 87 |                       <div class="mb-3">
 88 |                         <label for="unit_of_wage">Contract Tenure</label>
 89 |                         <select class="form-control" id="unit_of_wage" name="unit_of_wage" placeholder="Contract tenure" required="required">
 90 |                             <option value="" disabled selected>Contract Tenure</option>
 91 |                             <option>Hour</option>
 92 |                             <option>Year</option>
 93 |                             <option>Week</option>
 94 |                             <option>Month</option>
 95 |                         </select>
 96 |                       </div>
 97 |         
 98 |                       <div class="mb-3">
 99 |                         <label for="full_time_position">Full or Part Time</label>
100 |                         <select class="form-control" id="full_time_position" name="full_time_position" placeholder="Full Time" required="required">
101 |                             <option value="" disabled selected>Full Time</option>
102 |                             <option>Y</option>
103 |                             <option>N</option>
104 |                         </select>
105 |                       </div>
106 |         
107 |                       <div class="mb-3">
108 |                         <label for="company_age">Age of the company</label>
109 |                         <input type="number" class="form-control" id="company_age" name="company_age" min="15" max="180" placeholder="select between 10 to 180" required="required">
110 |                       </div>
111 |         
112 |                         <input class="btn btn-primary" type="submit" value="Predict Visa-Status" required />
113 |                     </div>
114 |                     
115 |                 </form>
116 |             </div>
117 |             <div class="text-center text-black">
118 |                 <h2 class="display-4 fw-bolder">Visa Prediction Status: {{context}} </h2>
119 |             </div>
120 |         </div>
121 |     </section>
122 |     <script src="https://code.jquery.com/jquery-3.2.1.js"></script>
123 |     <script type="text/javascript">
124 |         $(".input").focus(function(){
125 |             $(this).parent().addClass("focus");
126 |         }).blur(function(){
127 |             if($(this).val() === ''){
128 |                 $(this).parent().removeClass("focus");
129 |             }
130 |         })
131 |     </script>
132 |   </body>
133 | </html>


--------------------------------------------------------------------------------
/us_visa/components/data_transformation.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from imblearn.combine import SMOTEENN
  6 | from sklearn.pipeline import Pipeline
  7 | from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder, PowerTransformer
  8 | from sklearn.compose import ColumnTransformer
  9 | 
 10 | from us_visa.constants import TARGET_COLUMN, SCHEMA_FILE_PATH, CURRENT_YEAR
 11 | from us_visa.entity.config_entity import DataTransformationConfig
 12 | from us_visa.entity.artifact_entity import DataTransformationArtifact, DataIngestionArtifact, DataValidationArtifact
 13 | from us_visa.exception import USvisaException
 14 | from us_visa.logger import logging
 15 | from us_visa.utils.main_utils import save_object, save_numpy_array_data, read_yaml_file, drop_columns
 16 | from us_visa.entity.estimator import TargetValueMapping
 17 | 
 18 | class DataTransformation:
 19 |     def __init__(self, data_ingestion_artifact: DataIngestionArtifact,
 20 |                  data_transformation_config: DataTransformationConfig,
 21 |                  data_validation_artifact: DataValidationArtifact):
 22 |         """
 23 |         :param data_ingestion_artifact: Output reference of data ingestion artifact stage
 24 |         :param data_transformation_config: configuration for data transformation
 25 |         """
 26 |         try:
 27 |             self.data_ingestion_artifact = data_ingestion_artifact
 28 |             self.data_transformation_config = data_transformation_config
 29 |             self.data_validation_artifact = data_validation_artifact
 30 |             self._schema_config = read_yaml_file(file_path=SCHEMA_FILE_PATH)
 31 |         except Exception as e:
 32 |             raise USvisaException(e, sys)
 33 | 
 34 |     @staticmethod
 35 |     def read_data(file_path) -> pd.DataFrame:
 36 |         try:
 37 |             return pd.read_csv(file_path)
 38 |         except Exception as e:
 39 |             raise USvisaException(e, sys)
 40 | 
 41 |     
 42 |     def get_data_transformer_object(self) -> Pipeline:
 43 |         """
 44 |         Method Name :   get_data_transformer_object
 45 |         Description :   This method creates and returns a data transformer object for the data
 46 |         
 47 |         Output      :   data transformer object is created and returned 
 48 |         On Failure  :   Write an exception log and then raise an exception
 49 |         """
 50 |         logging.info(
 51 |             "Entered get_data_transformer_object method of DataTransformation class"
 52 |         )
 53 | 
 54 |         try:
 55 |             logging.info("Got numerical cols from schema config")
 56 | 
 57 |             numeric_transformer = StandardScaler()
 58 |             oh_transformer = OneHotEncoder()
 59 |             ordinal_encoder = OrdinalEncoder()
 60 | 
 61 |             logging.info("Initialized StandardScaler, OneHotEncoder, OrdinalEncoder")
 62 | 
 63 |             oh_columns = self._schema_config['oh_columns']
 64 |             or_columns = self._schema_config['or_columns']
 65 |             transform_columns = self._schema_config['transform_columns']
 66 |             num_features = self._schema_config['num_features']
 67 | 
 68 |             logging.info("Initialize PowerTransformer")
 69 | 
 70 |             transform_pipe = Pipeline(steps=[
 71 |                 ('transformer', PowerTransformer(method='yeo-johnson'))
 72 |             ])
 73 |             preprocessor = ColumnTransformer(
 74 |                 [
 75 |                     ("OneHotEncoder", oh_transformer, oh_columns),
 76 |                     ("Ordinal_Encoder", ordinal_encoder, or_columns),
 77 |                     ("Transformer", transform_pipe, transform_columns),
 78 |                     ("StandardScaler", numeric_transformer, num_features)
 79 |                 ]
 80 |             )
 81 | 
 82 |             logging.info("Created preprocessor object from ColumnTransformer")
 83 | 
 84 |             logging.info(
 85 |                 "Exited get_data_transformer_object method of DataTransformation class"
 86 |             )
 87 |             return preprocessor
 88 | 
 89 |         except Exception as e:
 90 |             raise USvisaException(e, sys) from e
 91 | 
 92 |     def initiate_data_transformation(self, ) -> DataTransformationArtifact:
 93 |         """
 94 |         Method Name :   initiate_data_transformation
 95 |         Description :   This method initiates the data transformation component for the pipeline 
 96 |         
 97 |         Output      :   data transformer steps are performed and preprocessor object is created  
 98 |         On Failure  :   Write an exception log and then raise an exception
 99 |         """
100 |         try:
101 |             if self.data_validation_artifact.validation_status:
102 |                 logging.info("Starting data transformation")
103 |                 preprocessor = self.get_data_transformer_object()
104 |                 logging.info("Got the preprocessor object")
105 | 
106 |                 train_df = DataTransformation.read_data(file_path=self.data_ingestion_artifact.trained_file_path)
107 |                 test_df = DataTransformation.read_data(file_path=self.data_ingestion_artifact.test_file_path)
108 | 
109 |                 input_feature_train_df = train_df.drop(columns=[TARGET_COLUMN], axis=1)
110 |                 target_feature_train_df = train_df[TARGET_COLUMN]
111 | 
112 |                 logging.info("Got train features and test features of Training dataset")
113 | 
114 |                 input_feature_train_df['company_age'] = CURRENT_YEAR-input_feature_train_df['yr_of_estab']
115 | 
116 |                 logging.info("Added company_age column to the Training dataset")
117 | 
118 |                 drop_cols = self._schema_config['drop_columns']
119 | 
120 |                 logging.info("drop the columns in drop_cols of Training dataset")
121 | 
122 |                 input_feature_train_df = drop_columns(df=input_feature_train_df, cols = drop_cols)
123 |                 
124 |                 target_feature_train_df = target_feature_train_df.replace(
125 |                     TargetValueMapping()._asdict()
126 |                 )
127 | 
128 | 
129 |                 input_feature_test_df = test_df.drop(columns=[TARGET_COLUMN], axis=1)
130 | 
131 |                 target_feature_test_df = test_df[TARGET_COLUMN]
132 | 
133 | 
134 |                 input_feature_test_df['company_age'] = CURRENT_YEAR-input_feature_test_df['yr_of_estab']
135 | 
136 |                 logging.info("Added company_age column to the Test dataset")
137 | 
138 |                 input_feature_test_df = drop_columns(df=input_feature_test_df, cols = drop_cols)
139 | 
140 |                 logging.info("drop the columns in drop_cols of Test dataset")
141 | 
142 |                 target_feature_test_df = target_feature_test_df.replace(
143 |                 TargetValueMapping()._asdict()
144 |                 )
145 | 
146 |                 logging.info("Got train features and test features of Testing dataset")
147 | 
148 |                 logging.info(
149 |                     "Applying preprocessing object on training dataframe and testing dataframe"
150 |                 )
151 | 
152 |                 input_feature_train_arr = preprocessor.fit_transform(input_feature_train_df)
153 | 
154 |                 logging.info(
155 |                     "Used the preprocessor object to fit transform the train features"
156 |                 )
157 | 
158 |                 input_feature_test_arr = preprocessor.transform(input_feature_test_df)
159 | 
160 |                 logging.info("Used the preprocessor object to transform the test features")
161 | 
162 |                 logging.info("Applying SMOTEENN on Training dataset")
163 | 
164 |                 smt = SMOTEENN(sampling_strategy="minority")
165 | 
166 |                 input_feature_train_final, target_feature_train_final = smt.fit_resample(
167 |                     input_feature_train_arr, target_feature_train_df
168 |                 )
169 | 
170 |                 logging.info("Applied SMOTEENN on training dataset")
171 | 
172 |                 logging.info("Applying SMOTEENN on testing dataset")
173 | 
174 |                 input_feature_test_final, target_feature_test_final = smt.fit_resample(
175 |                     input_feature_test_arr, target_feature_test_df
176 |                 )
177 | 
178 |                 logging.info("Applied SMOTEENN on testing dataset")
179 | 
180 |                 logging.info("Created train array and test array")
181 | 
182 |                 train_arr = np.c_[
183 |                     input_feature_train_final, np.array(target_feature_train_final)
184 |                 ]
185 | 
186 |                 test_arr = np.c_[
187 |                     input_feature_test_final, np.array(target_feature_test_final)
188 |                 ]
189 | 
190 |                 save_object(self.data_transformation_config.transformed_object_file_path, preprocessor)
191 |                 save_numpy_array_data(self.data_transformation_config.transformed_train_file_path, array=train_arr)
192 |                 save_numpy_array_data(self.data_transformation_config.transformed_test_file_path, array=test_arr)
193 | 
194 |                 logging.info("Saved the preprocessor object")
195 | 
196 |                 logging.info(
197 |                     "Exited initiate_data_transformation method of Data_Transformation class"
198 |                 )
199 | 
200 |                 data_transformation_artifact = DataTransformationArtifact(
201 |                     transformed_object_file_path=self.data_transformation_config.transformed_object_file_path,
202 |                     transformed_train_file_path=self.data_transformation_config.transformed_train_file_path,
203 |                     transformed_test_file_path=self.data_transformation_config.transformed_test_file_path
204 |                 )
205 |                 return data_transformation_artifact
206 |             else:
207 |                 raise Exception(self.data_validation_artifact.message)
208 | 
209 |         except Exception as e:
210 |             raise USvisaException(e, sys) from e


--------------------------------------------------------------------------------
/us_visa/cloud_storage/aws_storage.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | from us_visa.configuration.aws_connection import S3Client
  3 | from io import StringIO
  4 | from typing import Union,List
  5 | import os,sys
  6 | from us_visa.logger import logging
  7 | from mypy_boto3_s3.service_resource import Bucket
  8 | from us_visa.exception import USvisaException
  9 | from botocore.exceptions import ClientError
 10 | from pandas import DataFrame,read_csv
 11 | import pickle
 12 | 
 13 | 
 14 | class SimpleStorageService:
 15 | 
 16 |     def __init__(self):
 17 |         s3_client = S3Client()
 18 |         self.s3_resource = s3_client.s3_resource
 19 |         self.s3_client = s3_client.s3_client
 20 | 
 21 |     def s3_key_path_available(self,bucket_name,s3_key)->bool:
 22 |         try:
 23 |             bucket = self.get_bucket(bucket_name)
 24 |             file_objects = [file_object for file_object in bucket.objects.filter(Prefix=s3_key)]
 25 |             if len(file_objects) > 0:
 26 |                 return True
 27 |             else:
 28 |                 return False
 29 |         except Exception as e:
 30 |             raise USvisaException(e,sys)
 31 |         
 32 |         
 33 | 
 34 |     @staticmethod
 35 |     def read_object(object_name: str, decode: bool = True, make_readable: bool = False) -> Union[StringIO, str]:
 36 |         """
 37 |         Method Name :   read_object
 38 |         Description :   This method reads the object_name object with kwargs
 39 | 
 40 |         Output      :   The column name is renamed
 41 |         On Failure  :   Write an exception log and then raise an exception
 42 | 
 43 |         Version     :   1.2
 44 |         Revisions   :   moved setup to cloud
 45 |         """
 46 |         logging.info("Entered the read_object method of S3Operations class")
 47 | 
 48 |         try:
 49 |             func = (
 50 |                 lambda: object_name.get()["Body"].read().decode()
 51 |                 if decode is True
 52 |                 else object_name.get()["Body"].read()
 53 |             )
 54 |             conv_func = lambda: StringIO(func()) if make_readable is True else func()
 55 |             logging.info("Exited the read_object method of S3Operations class")
 56 |             return conv_func()
 57 | 
 58 |         except Exception as e:
 59 |             raise USvisaException(e, sys) from e
 60 | 
 61 |     def get_bucket(self, bucket_name: str) -> Bucket:
 62 |         """
 63 |         Method Name :   get_bucket
 64 |         Description :   This method gets the bucket object based on the bucket_name
 65 | 
 66 |         Output      :   Bucket object is returned based on the bucket name
 67 |         On Failure  :   Write an exception log and then raise an exception
 68 | 
 69 |         Version     :   1.2
 70 |         Revisions   :   moved setup to cloud
 71 |         """
 72 |         logging.info("Entered the get_bucket method of S3Operations class")
 73 | 
 74 |         try:
 75 |             bucket = self.s3_resource.Bucket(bucket_name)
 76 |             logging.info("Exited the get_bucket method of S3Operations class")
 77 |             return bucket
 78 |         except Exception as e:
 79 |             raise USvisaException(e, sys) from e
 80 | 
 81 |     def get_file_object( self, filename: str, bucket_name: str) -> Union[List[object], object]:
 82 |         """
 83 |         Method Name :   get_file_object
 84 |         Description :   This method gets the file object from bucket_name bucket based on filename
 85 | 
 86 |         Output      :   list of objects or object is returned based on filename
 87 |         On Failure  :   Write an exception log and then raise an exception
 88 | 
 89 |         Version     :   1.2
 90 |         Revisions   :   moved setup to cloud
 91 |         """
 92 |         logging.info("Entered the get_file_object method of S3Operations class")
 93 | 
 94 |         try:
 95 |             bucket = self.get_bucket(bucket_name)
 96 | 
 97 |             file_objects = [file_object for file_object in bucket.objects.filter(Prefix=filename)]
 98 | 
 99 |             func = lambda x: x[0] if len(x) == 1 else x
100 | 
101 |             file_objs = func(file_objects)
102 |             logging.info("Exited the get_file_object method of S3Operations class")
103 | 
104 |             return file_objs
105 | 
106 |         except Exception as e:
107 |             raise USvisaException(e, sys) from e
108 | 
109 |     def load_model(self, model_name: str, bucket_name: str, model_dir: str = None) -> object:
110 |         """
111 |         Method Name :   load_model
112 |         Description :   This method loads the model_name model from bucket_name bucket with kwargs
113 | 
114 |         Output      :   list of objects or object is returned based on filename
115 |         On Failure  :   Write an exception log and then raise an exception
116 | 
117 |         Version     :   1.2
118 |         Revisions   :   moved setup to cloud
119 |         """
120 |         logging.info("Entered the load_model method of S3Operations class")
121 | 
122 |         try:
123 |             func = (
124 |                 lambda: model_name
125 |                 if model_dir is None
126 |                 else model_dir + "/" + model_name
127 |             )
128 |             model_file = func()
129 |             file_object = self.get_file_object(model_file, bucket_name)
130 |             model_obj = self.read_object(file_object, decode=False)
131 |             model = pickle.loads(model_obj)
132 |             logging.info("Exited the load_model method of S3Operations class")
133 |             return model
134 | 
135 |         except Exception as e:
136 |             raise USvisaException(e, sys) from e
137 | 
138 |     def create_folder(self, folder_name: str, bucket_name: str) -> None:
139 |         """
140 |         Method Name :   create_folder
141 |         Description :   This method creates a folder_name folder in bucket_name bucket
142 | 
143 |         Output      :   Folder is created in s3 bucket
144 |         On Failure  :   Write an exception log and then raise an exception
145 | 
146 |         Version     :   1.2
147 |         Revisions   :   moved setup to cloud
148 |         """
149 |         logging.info("Entered the create_folder method of S3Operations class")
150 | 
151 |         try:
152 |             self.s3_resource.Object(bucket_name, folder_name).load()
153 | 
154 |         except ClientError as e:
155 |             if e.response["Error"]["Code"] == "404":
156 |                 folder_obj = folder_name + "/"
157 |                 self.s3_client.put_object(Bucket=bucket_name, Key=folder_obj)
158 |             else:
159 |                 pass
160 |             logging.info("Exited the create_folder method of S3Operations class")
161 | 
162 |     def upload_file(self, from_filename: str, to_filename: str,  bucket_name: str,  remove: bool = True):
163 |         """
164 |         Method Name :   upload_file
165 |         Description :   This method uploads the from_filename file to bucket_name bucket with to_filename as bucket filename
166 | 
167 |         Output      :   Folder is created in s3 bucket
168 |         On Failure  :   Write an exception log and then raise an exception
169 | 
170 |         Version     :   1.2
171 |         Revisions   :   moved setup to cloud
172 |         """
173 |         logging.info("Entered the upload_file method of S3Operations class")
174 | 
175 |         try:
176 |             logging.info(
177 |                 f"Uploading {from_filename} file to {to_filename} file in {bucket_name} bucket"
178 |             )
179 | 
180 |             self.s3_resource.meta.client.upload_file(
181 |                 from_filename, bucket_name, to_filename
182 |             )
183 | 
184 |             logging.info(
185 |                 f"Uploaded {from_filename} file to {to_filename} file in {bucket_name} bucket"
186 |             )
187 | 
188 |             if remove is True:
189 |                 os.remove(from_filename)
190 | 
191 |                 logging.info(f"Remove is set to {remove}, deleted the file")
192 | 
193 |             else:
194 |                 logging.info(f"Remove is set to {remove}, not deleted the file")
195 | 
196 |             logging.info("Exited the upload_file method of S3Operations class")
197 | 
198 |         except Exception as e:
199 |             raise USvisaException(e, sys) from e
200 | 
201 |     def upload_df_as_csv(self,data_frame: DataFrame,local_filename: str, bucket_filename: str,bucket_name: str,) -> None:
202 |         """
203 |         Method Name :   upload_df_as_csv
204 |         Description :   This method uploads the dataframe to bucket_filename csv file in bucket_name bucket
205 | 
206 |         Output      :   Folder is created in s3 bucket
207 |         On Failure  :   Write an exception log and then raise an exception
208 | 
209 |         Version     :   1.2
210 |         Revisions   :   moved setup to cloud
211 |         """
212 |         logging.info("Entered the upload_df_as_csv method of S3Operations class")
213 | 
214 |         try:
215 |             data_frame.to_csv(local_filename, index=None, header=True)
216 | 
217 |             self.upload_file(local_filename, bucket_filename, bucket_name)
218 | 
219 |             logging.info("Exited the upload_df_as_csv method of S3Operations class")
220 | 
221 |         except Exception as e:
222 |             raise USvisaException(e, sys) from e
223 | 
224 |     def get_df_from_object(self, object_: object) -> DataFrame:
225 |         """
226 |         Method Name :   get_df_from_object
227 |         Description :   This method gets the dataframe from the object_name object
228 | 
229 |         Output      :   Folder is created in s3 bucket
230 |         On Failure  :   Write an exception log and then raise an exception
231 | 
232 |         Version     :   1.2
233 |         Revisions   :   moved setup to cloud
234 |         """
235 |         logging.info("Entered the get_df_from_object method of S3Operations class")
236 | 
237 |         try:
238 |             content = self.read_object(object_, make_readable=True)
239 |             df = read_csv(content, na_values="na")
240 |             logging.info("Exited the get_df_from_object method of S3Operations class")
241 |             return df
242 |         except Exception as e:
243 |             raise USvisaException(e, sys) from e
244 | 
245 |     def read_csv(self, filename: str, bucket_name: str) -> DataFrame:
246 |         """
247 |         Method Name :   get_df_from_object
248 |         Description :   This method gets the dataframe from the object_name object
249 | 
250 |         Output      :   Folder is created in s3 bucket
251 |         On Failure  :   Write an exception log and then raise an exception
252 | 
253 |         Version     :   1.2
254 |         Revisions   :   moved setup to cloud
255 |         """
256 |         logging.info("Entered the read_csv method of S3Operations class")
257 | 
258 |         try:
259 |             csv_obj = self.get_file_object(filename, bucket_name)
260 |             df = self.get_df_from_object(csv_obj)
261 |             logging.info("Exited the read_csv method of S3Operations class")
262 |             return df
263 |         except Exception as e:
264 |             raise USvisaException(e, sys) from e


--------------------------------------------------------------------------------