├── us_visa ├── __init__.py ├── entity │ ├── __init__.py │ ├── artifact_entity.py │ ├── estimator.py │ ├── s3_estimator.py │ └── config_entity.py ├── pipline │ ├── __init__.py │ ├── prediction_pipeline.py │ └── training_pipeline.py ├── utils │ ├── __init__.py │ └── main_utils.py ├── cloud_storage │ ├── __init__.py │ └── aws_storage.py ├── components │ ├── __init__.py │ ├── model_pusher.py │ ├── model_trainer.py │ ├── data_ingestion.py │ ├── model_evaluation.py │ ├── data_validation.py │ └── data_transformation.py ├── configuration │ ├── __init__.py │ ├── mongo_db_connection.py │ └── aws_connection.py ├── data_access │ ├── __init__.py │ └── usvisa_data.py ├── logger │ └── __init__.py ├── exception │ └── __init__.py └── constants │ └── __init__.py ├── assignments └── tasks.txt ├── flowcharts ├── Model Pusher.png ├── Model Trainer.png ├── Data Ingestion.png ├── Data Validation.png ├── 1_Folder Structure.png ├── Model Evaluation.png └── Data Transformation.png ├── demo.py ├── .dockerignore ├── Dockerfile ├── setup.py ├── requirements.txt ├── config ├── model.yaml └── schema.yaml ├── LICENSE ├── static └── css │ └── style.css ├── template.py ├── .github └── workflows │ └── aws.yaml ├── README.md ├── .gitignore ├── app.py └── templates └── usvisa.html /us_visa/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /us_visa/entity/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /us_visa/pipline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /us_visa/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /us_visa/cloud_storage/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /us_visa/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /us_visa/configuration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /us_visa/data_access/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assignments/tasks.txt: -------------------------------------------------------------------------------- 1 | 1. You have to re-create the folder structure flowchart & make the setup ready for the next class -------------------------------------------------------------------------------- /flowcharts/Model Pusher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Model Pusher.png -------------------------------------------------------------------------------- /flowcharts/Model Trainer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Model Trainer.png -------------------------------------------------------------------------------- /flowcharts/Data Ingestion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Data Ingestion.png -------------------------------------------------------------------------------- /flowcharts/Data Validation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Data Validation.png -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | from us_visa.pipline.training_pipeline import TrainPipeline 2 | 3 | 4 | pipline = TrainPipeline() 5 | pipline.run_pipeline() -------------------------------------------------------------------------------- /flowcharts/1_Folder Structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/1_Folder Structure.png -------------------------------------------------------------------------------- /flowcharts/Model Evaluation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Model Evaluation.png -------------------------------------------------------------------------------- /flowcharts/Data Transformation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/entbappy/US-Visa-Approval-Prediction/HEAD/flowcharts/Data Transformation.png -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | artifact 2 | venv 3 | env 4 | .gitignore 5 | logs 6 | template.py 7 | demo.py 8 | README.md 9 | LICENSE 10 | us_visa.egg.info 11 | notebook -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8.5-slim-buster 2 | 3 | WORKDIR /app 4 | 5 | COPY . /app 6 | 7 | RUN pip install -r requirements.txt 8 | 9 | CMD ["python3", "app.py"] -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="us_visa", 5 | version="0.0.0", 6 | author="Bappy", 7 | author_email="entbappy73@gmail.com", 8 | packages=find_packages() 9 | ) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ipykernel 2 | pandas 3 | numpy 4 | matplotlib 5 | plotly 6 | seaborn 7 | scipy 8 | scikit-learn 9 | imblearn 10 | xgboost 11 | catboost 12 | pymongo 13 | from_root 14 | evidently==0.2.8 15 | dill 16 | PyYAML 17 | neuro_mf 18 | boto3 19 | mypy-boto3-s3 20 | botocore 21 | fastapi 22 | uvicorn 23 | jinja2 24 | python-multipart 25 | -e . -------------------------------------------------------------------------------- /us_visa/logger/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from from_root import from_root 5 | from datetime import datetime 6 | 7 | LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log" 8 | 9 | log_dir = 'logs' 10 | 11 | logs_path = os.path.join(from_root(), log_dir, LOG_FILE) 12 | 13 | os.makedirs(log_dir, exist_ok=True) 14 | 15 | 16 | logging.basicConfig( 17 | filename=logs_path, 18 | format="[ %(asctime)s ] %(name)s - %(levelname)s - %(message)s", 19 | level=logging.DEBUG, 20 | ) -------------------------------------------------------------------------------- /us_visa/exception/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | def error_message_detail(error, error_detail:sys): 5 | _, _, exc_tb = error_detail.exc_info() 6 | file_name = exc_tb.tb_frame.f_code.co_filename 7 | error_message = "Error occurred python script name [{0}] line number [{1}] error message [{2}]".format( 8 | file_name, exc_tb.tb_lineno, str(error) 9 | ) 10 | 11 | return error_message 12 | 13 | class USvisaException(Exception): 14 | def __init__(self, error_message, error_detail): 15 | """ 16 | :param error_message: error message in string format 17 | """ 18 | super().__init__(error_message) 19 | self.error_message = error_message_detail( 20 | error_message, error_detail=error_detail 21 | ) 22 | 23 | def __str__(self): 24 | return self.error_message -------------------------------------------------------------------------------- /config/model.yaml: -------------------------------------------------------------------------------- 1 | grid_search: 2 | class: GridSearchCV 3 | module: sklearn.model_selection 4 | params: 5 | cv: 3 6 | verbose: 3 7 | model_selection: 8 | module_0: 9 | class: KNeighborsClassifier 10 | module: sklearn.neighbors 11 | params: 12 | algorithm: kd_tree 13 | weights: uniform 14 | n_neighbors: 3 15 | search_param_grid: 16 | algorithm: 17 | - auto 18 | - ball_tree 19 | - kd_tree 20 | - brute 21 | weights: 22 | - uniform 23 | - distance 24 | n_neighbors: 25 | - 3 26 | - 5 27 | - 9 28 | 29 | 30 | module_1: 31 | class: RandomForestClassifier 32 | module: sklearn.ensemble 33 | params: 34 | max_depth: 10 35 | max_features: sqrt 36 | n_estimators: 3 37 | search_param_grid: 38 | max_depth: 39 | - 10 40 | - 15 41 | - 20 42 | max_features: 43 | - sqrt 44 | - log2 45 | n_estimators: 46 | - 3 47 | - 5 48 | - 9 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 BAPPY AHMED 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /us_visa/entity/artifact_entity.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class DataIngestionArtifact: 6 | trained_file_path:str 7 | test_file_path:str 8 | 9 | 10 | 11 | @dataclass 12 | class DataValidationArtifact: 13 | validation_status:bool 14 | message: str 15 | drift_report_file_path: str 16 | 17 | 18 | @dataclass 19 | class DataTransformationArtifact: 20 | transformed_object_file_path:str 21 | transformed_train_file_path:str 22 | transformed_test_file_path:str 23 | 24 | 25 | @dataclass 26 | class ClassificationMetricArtifact: 27 | f1_score:float 28 | precision_score:float 29 | recall_score:float 30 | 31 | 32 | 33 | @dataclass 34 | class ModelTrainerArtifact: 35 | trained_model_file_path:str 36 | metric_artifact:ClassificationMetricArtifact 37 | 38 | 39 | 40 | @dataclass 41 | class ModelEvaluationArtifact: 42 | is_model_accepted:bool 43 | changed_accuracy:float 44 | s3_model_path:str 45 | trained_model_path:str 46 | 47 | 48 | 49 | @dataclass 50 | class ModelPusherArtifact: 51 | bucket_name:str 52 | s3_model_path:str 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /config/schema.yaml: -------------------------------------------------------------------------------- 1 | columns: 2 | - case_id: category 3 | - continent: category 4 | - education_of_employee: category 5 | - has_job_experience: category 6 | - requires_job_training: category 7 | - no_of_employees: int 8 | - yr_of_estab: int 9 | - region_of_employment: category 10 | - prevailing_wage: int 11 | - unit_of_wage: category 12 | - full_time_position: category 13 | - case_status: category 14 | 15 | numerical_columns: 16 | - no_of_employees 17 | - prevailing_wage 18 | - yr_of_estab 19 | 20 | categorical_columns: 21 | - case_id 22 | - continent 23 | - education_of_employee 24 | - has_job_experience 25 | - requires_job_training 26 | - region_of_employment 27 | - unit_of_wage 28 | - full_time_position 29 | - case_status 30 | 31 | drop_columns: 32 | - case_id 33 | - yr_of_estab 34 | 35 | # for data transformation 36 | num_features: 37 | - no_of_employees 38 | - prevailing_wage 39 | - company_age 40 | 41 | or_columns: 42 | - has_job_experience 43 | - requires_job_training 44 | - full_time_position 45 | - education_of_employee 46 | 47 | oh_columns: 48 | - continent 49 | - unit_of_wage 50 | - region_of_employment 51 | 52 | transform_columns: 53 | - no_of_employees 54 | - company_age -------------------------------------------------------------------------------- /us_visa/configuration/mongo_db_connection.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from us_visa.exception import USvisaException 4 | from us_visa.logger import logging 5 | 6 | import os 7 | from us_visa.constants import DATABASE_NAME, MONGODB_URL_KEY 8 | import pymongo 9 | import certifi 10 | 11 | ca = certifi.where() 12 | 13 | class MongoDBClient: 14 | """ 15 | Class Name : export_data_into_feature_store 16 | Description : This method exports the dataframe from mongodb feature store as dataframe 17 | 18 | Output : connection to mongodb database 19 | On Failure : raises an exception 20 | """ 21 | client = None 22 | 23 | def __init__(self, database_name=DATABASE_NAME) -> None: 24 | try: 25 | if MongoDBClient.client is None: 26 | mongo_db_url = os.getenv(MONGODB_URL_KEY) 27 | if mongo_db_url is None: 28 | raise Exception(f"Environment key: {MONGODB_URL_KEY} is not set.") 29 | MongoDBClient.client = pymongo.MongoClient(mongo_db_url, tlsCAFile=ca) 30 | self.client = MongoDBClient.client 31 | self.database = self.client[database_name] 32 | self.database_name = database_name 33 | logging.info("MongoDB connection succesfull") 34 | except Exception as e: 35 | raise USvisaException(e,sys) -------------------------------------------------------------------------------- /us_visa/data_access/usvisa_data.py: -------------------------------------------------------------------------------- 1 | from us_visa.configuration.mongo_db_connection import MongoDBClient 2 | from us_visa.constants import DATABASE_NAME 3 | from us_visa.exception import USvisaException 4 | import pandas as pd 5 | import sys 6 | from typing import Optional 7 | import numpy as np 8 | 9 | 10 | 11 | class USvisaData: 12 | """ 13 | This class help to export entire mongo db record as pandas dataframe 14 | """ 15 | 16 | def __init__(self): 17 | """ 18 | """ 19 | try: 20 | self.mongo_client = MongoDBClient(database_name=DATABASE_NAME) 21 | except Exception as e: 22 | raise USvisaException(e,sys) 23 | 24 | 25 | def export_collection_as_dataframe(self,collection_name:str,database_name:Optional[str]=None)->pd.DataFrame: 26 | try: 27 | """ 28 | export entire collectin as dataframe: 29 | return pd.DataFrame of collection 30 | """ 31 | if database_name is None: 32 | collection = self.mongo_client.database[collection_name] 33 | else: 34 | collection = self.mongo_client[database_name][collection_name] 35 | 36 | df = pd.DataFrame(list(collection.find())) 37 | if "_id" in df.columns.to_list(): 38 | df = df.drop(columns=["_id"], axis=1) 39 | df.replace({"na":np.nan},inplace=True) 40 | return df 41 | except Exception as e: 42 | raise USvisaException(e,sys) -------------------------------------------------------------------------------- /static/css/style.css: -------------------------------------------------------------------------------- 1 | .navbar-light .navbar-brand { 2 | color: rgba(0, 0, 0, .9); 3 | } 4 | 5 | .navbar-light .navbar-brand { 6 | margin-left: auto; 7 | margin-right: auto; 8 | } 9 | 10 | body { 11 | margin: 0; 12 | padding: 0; 13 | font-family: sans-serif; 14 | } 15 | 16 | .formBox { 17 | margin-top: 50px; 18 | padding: 50px; 19 | } 20 | 21 | h1 { 22 | margin: 0; 23 | padding: 0; 24 | text-align: center; 25 | margin-bottom: 50px !important; 26 | text-transform: uppercase; 27 | font-size: 48px; 28 | } 29 | 30 | .inputBox { 31 | position: relative; 32 | box-sizing: border-box; 33 | margin-bottom: 40px; 34 | } 35 | 36 | .button { 37 | width: 100%; 38 | background: #00bcd4; 39 | color: #fff; 40 | border-radius: 0; 41 | border: none; 42 | outline: none; 43 | height: 50px; 44 | font-size: 24px; 45 | } 46 | 47 | .input { 48 | position: relative; 49 | width: 100%; 50 | height: 50px; 51 | background: transparent; 52 | border: none; 53 | outline: none; 54 | font-size: 24px; 55 | border-bottom: 2px solid rgba(0, 0, 0, .5); 56 | } 57 | 58 | .inputText { 59 | position: absolute; 60 | line-height: 50px; 61 | font-size: 24px; 62 | transition: .5s; 63 | opacity: 0.5; 64 | } 65 | 66 | .focus .inputText { 67 | transform: translateY(-30px); 68 | font-size: 18px; 69 | opacity: 1; 70 | color: #00bcd4; 71 | } 72 | 73 | .btn_train { 74 | width: 100%; 75 | background: red; 76 | color: #fff; 77 | border-radius: 0; 78 | border: none; 79 | outline: none; 80 | height: 50px; 81 | font-size: 24px; 82 | } -------------------------------------------------------------------------------- /template.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | project_name = "us_visa" 5 | 6 | list_of_files = [ 7 | 8 | f"{project_name}/__init__.py", 9 | f"{project_name}/components/__init__.py", 10 | f"{project_name}/components/data_ingestion.py", 11 | f"{project_name}/components/data_validation.py", 12 | f"{project_name}/components/data_transformation.py", 13 | f"{project_name}/components/model_trainer.py", 14 | f"{project_name}/components/model_evaluation.py", 15 | f"{project_name}/components/model_pusher.py", 16 | f"{project_name}/configuration/__init__.py", 17 | f"{project_name}/constants/__init__.py", 18 | f"{project_name}/entity/__init__.py", 19 | f"{project_name}/entity/config_entity.py", 20 | f"{project_name}/entity/artifact_entity.py", 21 | f"{project_name}/exception/__init__.py", 22 | f"{project_name}/logger/__init__.py", 23 | f"{project_name}/pipline/__init__.py", 24 | f"{project_name}/pipline/training_pipeline.py", 25 | f"{project_name}/pipline/prediction_pipeline.py", 26 | f"{project_name}/utils/__init__.py", 27 | f"{project_name}/utils/main_utils.py", 28 | "app.py", 29 | "requirements.txt", 30 | "Dockerfile", 31 | ".dockerignore", 32 | "demo.py", 33 | "setup.py", 34 | "config/model.yaml", 35 | "config/schema.yaml", 36 | 37 | 38 | ] 39 | 40 | 41 | for filepath in list_of_files: 42 | filepath = Path(filepath) 43 | filedir, filename = os.path.split(filepath) 44 | if filedir != "": 45 | os.makedirs(filedir, exist_ok=True) 46 | if (not os.path.exists(filepath)) or (os.path.getsize(filepath) == 0): 47 | with open(filepath, "w") as f: 48 | pass 49 | else: 50 | print(f"file is already present at: {filepath}") -------------------------------------------------------------------------------- /us_visa/configuration/aws_connection.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import os 3 | from us_visa.constants import AWS_SECRET_ACCESS_KEY_ENV_KEY, AWS_ACCESS_KEY_ID_ENV_KEY,REGION_NAME 4 | 5 | 6 | class S3Client: 7 | 8 | s3_client=None 9 | s3_resource = None 10 | def __init__(self, region_name=REGION_NAME): 11 | """ 12 | This Class gets aws credentials from env_variable and creates an connection with s3 bucket 13 | and raise exception when environment variable is not set 14 | """ 15 | 16 | if S3Client.s3_resource==None or S3Client.s3_client==None: 17 | __access_key_id = os.getenv(AWS_ACCESS_KEY_ID_ENV_KEY, ) 18 | __secret_access_key = os.getenv(AWS_SECRET_ACCESS_KEY_ENV_KEY, ) 19 | if __access_key_id is None: 20 | raise Exception(f"Environment variable: {AWS_ACCESS_KEY_ID_ENV_KEY} is not not set.") 21 | if __secret_access_key is None: 22 | raise Exception(f"Environment variable: {AWS_SECRET_ACCESS_KEY_ENV_KEY} is not set.") 23 | 24 | S3Client.s3_resource = boto3.resource('s3', 25 | aws_access_key_id=__access_key_id, 26 | aws_secret_access_key=__secret_access_key, 27 | region_name=region_name 28 | ) 29 | S3Client.s3_client = boto3.client('s3', 30 | aws_access_key_id=__access_key_id, 31 | aws_secret_access_key=__secret_access_key, 32 | region_name=region_name 33 | ) 34 | self.s3_resource = S3Client.s3_resource 35 | self.s3_client = S3Client.s3_client 36 | -------------------------------------------------------------------------------- /us_visa/entity/estimator.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from pandas import DataFrame 4 | from sklearn.pipeline import Pipeline 5 | 6 | from us_visa.exception import USvisaException 7 | from us_visa.logger import logging 8 | 9 | class TargetValueMapping: 10 | def __init__(self): 11 | self.Certified:int = 0 12 | self.Denied:int = 1 13 | def _asdict(self): 14 | return self.__dict__ 15 | def reverse_mapping(self): 16 | mapping_response = self._asdict() 17 | return dict(zip(mapping_response.values(),mapping_response.keys())) 18 | 19 | 20 | 21 | 22 | 23 | class USvisaModel: 24 | def __init__(self, preprocessing_object: Pipeline, trained_model_object: object): 25 | """ 26 | :param preprocessing_object: Input Object of preprocesser 27 | :param trained_model_object: Input Object of trained model 28 | """ 29 | self.preprocessing_object = preprocessing_object 30 | self.trained_model_object = trained_model_object 31 | 32 | def predict(self, dataframe: DataFrame) -> DataFrame: 33 | """ 34 | Function accepts raw inputs and then transformed raw input using preprocessing_object 35 | which guarantees that the inputs are in the same format as the training data 36 | At last it performs prediction on transformed features 37 | """ 38 | logging.info("Entered predict method of UTruckModel class") 39 | 40 | try: 41 | logging.info("Using the trained model to get predictions") 42 | 43 | transformed_feature = self.preprocessing_object.transform(dataframe) 44 | 45 | logging.info("Used the trained model to get predictions") 46 | return self.trained_model_object.predict(transformed_feature) 47 | 48 | except Exception as e: 49 | raise USvisaException(e, sys) from e 50 | 51 | def __repr__(self): 52 | return f"{type(self.trained_model_object).__name__}()" 53 | 54 | def __str__(self): 55 | return f"{type(self.trained_model_object).__name__}()" -------------------------------------------------------------------------------- /.github/workflows/aws.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy Application Docker Image to EC2 instance 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | 7 | jobs: 8 | Continuous-Integration: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v2 14 | 15 | - name: Configure AWS credentials 16 | uses: aws-actions/configure-aws-credentials@v1 17 | with: 18 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 19 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 20 | aws-region: ${{ secrets.AWS_DEFAULT_REGION }} 21 | 22 | - name: Login to Amazon ECR 23 | id: login-ecr 24 | uses: aws-actions/amazon-ecr-login@v1 25 | 26 | - name: Build, tag, and push image to Amazon ECR 27 | id: build-image 28 | env: 29 | ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} 30 | ECR_REPOSITORY: ${{ secrets.ECR_REPO }} 31 | IMAGE_TAG: latest 32 | run: | 33 | docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . 34 | docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG 35 | echo "::set-output name=image::$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" 36 | 37 | Continuous-Deployment: 38 | needs: Continuous-Integration 39 | runs-on: self-hosted 40 | steps: 41 | - name: Checkout 42 | uses: actions/checkout@v3 43 | 44 | - name: Configure AWS credentials 45 | uses: aws-actions/configure-aws-credentials@v1 46 | with: 47 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 48 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 49 | aws-region: ${{ secrets.AWS_DEFAULT_REGION }} 50 | 51 | - name: Login to Amazon ECR 52 | id: login-ecr 53 | uses: aws-actions/amazon-ecr-login@v1 54 | 55 | - name: Run Docker Image to serve users 56 | run: | 57 | docker run -d -e AWS_ACCESS_KEY_ID="${{ secrets.AWS_ACCESS_KEY_ID }}" -e AWS_SECRET_ACCESS_KEY="${{ secrets.AWS_SECRET_ACCESS_KEY }}" -e AWS_DEFAULT_REGION="${{ secrets.AWS_DEFAULT_REGION }}" -e MONGODB_URL="${{ secrets.MONGODB_URL }}" -p 8080:8080 "${{ steps.login-ecr.outputs.registry }}"/"${{ secrets.ECR_REPO }}":latest -------------------------------------------------------------------------------- /us_visa/constants/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import date 3 | 4 | DATABASE_NAME = "US_VISA" 5 | 6 | COLLECTION_NAME = "visa_data" 7 | 8 | MONGODB_URL_KEY = "MONGODB_URL" 9 | 10 | PIPELINE_NAME: str = "usvisa" 11 | ARTIFACT_DIR: str = "artifact" 12 | 13 | MODEL_FILE_NAME = "model.pkl" 14 | 15 | TARGET_COLUMN = "case_status" 16 | CURRENT_YEAR = date.today().year 17 | PREPROCSSING_OBJECT_FILE_NAME = "preprocessing.pkl" 18 | 19 | FILE_NAME: str = "usvisa.csv" 20 | TRAIN_FILE_NAME: str = "train.csv" 21 | TEST_FILE_NAME: str = "test.csv" 22 | SCHEMA_FILE_PATH = os.path.join("config", "schema.yaml") 23 | 24 | 25 | AWS_ACCESS_KEY_ID_ENV_KEY = "AWS_ACCESS_KEY_ID" 26 | AWS_SECRET_ACCESS_KEY_ENV_KEY = "AWS_SECRET_ACCESS_KEY" 27 | REGION_NAME = "us-east-1" 28 | 29 | 30 | """ 31 | Data Ingestion related constant start with DATA_INGESTION VAR NAME 32 | """ 33 | DATA_INGESTION_COLLECTION_NAME: str = "visa_data" 34 | DATA_INGESTION_DIR_NAME: str = "data_ingestion" 35 | DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store" 36 | DATA_INGESTION_INGESTED_DIR: str = "ingested" 37 | DATA_INGESTION_TRAIN_TEST_SPLIT_RATIO: float = 0.2 38 | 39 | 40 | 41 | """ 42 | Data Validation realted contant start with DATA_VALIDATION VAR NAME 43 | """ 44 | DATA_VALIDATION_DIR_NAME: str = "data_validation" 45 | DATA_VALIDATION_DRIFT_REPORT_DIR: str = "drift_report" 46 | DATA_VALIDATION_DRIFT_REPORT_FILE_NAME: str = "report.yaml" 47 | 48 | 49 | 50 | """ 51 | Data Transformation ralated constant start with DATA_TRANSFORMATION VAR NAME 52 | """ 53 | DATA_TRANSFORMATION_DIR_NAME: str = "data_transformation" 54 | DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR: str = "transformed" 55 | DATA_TRANSFORMATION_TRANSFORMED_OBJECT_DIR: str = "transformed_object" 56 | 57 | 58 | """ 59 | MODEL TRAINER related constant start with MODEL_TRAINER var name 60 | """ 61 | MODEL_TRAINER_DIR_NAME: str = "model_trainer" 62 | MODEL_TRAINER_TRAINED_MODEL_DIR: str = "trained_model" 63 | MODEL_TRAINER_TRAINED_MODEL_NAME: str = "model.pkl" 64 | MODEL_TRAINER_EXPECTED_SCORE: float = 0.6 65 | MODEL_TRAINER_MODEL_CONFIG_FILE_PATH: str = os.path.join("config", "model.yaml") 66 | 67 | 68 | MODEL_EVALUATION_CHANGED_THRESHOLD_SCORE: float = 0.02 69 | MODEL_BUCKET_NAME = "usvisa-model2024" 70 | MODEL_PUSHER_S3_KEY = "model-registry" 71 | 72 | 73 | APP_HOST = "0.0.0.0" 74 | APP_PORT = 8080 75 | -------------------------------------------------------------------------------- /us_visa/entity/s3_estimator.py: -------------------------------------------------------------------------------- 1 | from us_visa.cloud_storage.aws_storage import SimpleStorageService 2 | from us_visa.exception import USvisaException 3 | from us_visa.entity.estimator import USvisaModel 4 | import sys 5 | from pandas import DataFrame 6 | 7 | 8 | class USvisaEstimator: 9 | """ 10 | This class is used to save and retrieve us_visas model in s3 bucket and to do prediction 11 | """ 12 | 13 | def __init__(self,bucket_name,model_path,): 14 | """ 15 | :param bucket_name: Name of your model bucket 16 | :param model_path: Location of your model in bucket 17 | """ 18 | self.bucket_name = bucket_name 19 | self.s3 = SimpleStorageService() 20 | self.model_path = model_path 21 | self.loaded_model:USvisaModel=None 22 | 23 | 24 | def is_model_present(self,model_path): 25 | try: 26 | return self.s3.s3_key_path_available(bucket_name=self.bucket_name, s3_key=model_path) 27 | except USvisaException as e: 28 | print(e) 29 | return False 30 | 31 | def load_model(self,)->USvisaModel: 32 | """ 33 | Load the model from the model_path 34 | :return: 35 | """ 36 | 37 | return self.s3.load_model(self.model_path,bucket_name=self.bucket_name) 38 | 39 | def save_model(self,from_file,remove:bool=False)->None: 40 | """ 41 | Save the model to the model_path 42 | :param from_file: Your local system model path 43 | :param remove: By default it is false that mean you will have your model locally available in your system folder 44 | :return: 45 | """ 46 | try: 47 | self.s3.upload_file(from_file, 48 | to_filename=self.model_path, 49 | bucket_name=self.bucket_name, 50 | remove=remove 51 | ) 52 | except Exception as e: 53 | raise USvisaException(e, sys) 54 | 55 | 56 | def predict(self,dataframe:DataFrame): 57 | """ 58 | :param dataframe: 59 | :return: 60 | """ 61 | try: 62 | if self.loaded_model is None: 63 | self.loaded_model = self.load_model() 64 | return self.loaded_model.predict(dataframe=dataframe) 65 | except Exception as e: 66 | raise USvisaException(e, sys) -------------------------------------------------------------------------------- /us_visa/components/model_pusher.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from us_visa.cloud_storage.aws_storage import SimpleStorageService 4 | from us_visa.exception import USvisaException 5 | from us_visa.logger import logging 6 | from us_visa.entity.artifact_entity import ModelPusherArtifact, ModelEvaluationArtifact 7 | from us_visa.entity.config_entity import ModelPusherConfig 8 | from us_visa.entity.s3_estimator import USvisaEstimator 9 | 10 | 11 | class ModelPusher: 12 | def __init__(self, model_evaluation_artifact: ModelEvaluationArtifact, 13 | model_pusher_config: ModelPusherConfig): 14 | """ 15 | :param model_evaluation_artifact: Output reference of data evaluation artifact stage 16 | :param model_pusher_config: Configuration for model pusher 17 | """ 18 | self.s3 = SimpleStorageService() 19 | self.model_evaluation_artifact = model_evaluation_artifact 20 | self.model_pusher_config = model_pusher_config 21 | self.usvisa_estimator = USvisaEstimator(bucket_name=model_pusher_config.bucket_name, 22 | model_path=model_pusher_config.s3_model_key_path) 23 | 24 | def initiate_model_pusher(self) -> ModelPusherArtifact: 25 | """ 26 | Method Name : initiate_model_evaluation 27 | Description : This function is used to initiate all steps of the model pusher 28 | 29 | Output : Returns model evaluation artifact 30 | On Failure : Write an exception log and then raise an exception 31 | """ 32 | logging.info("Entered initiate_model_pusher method of ModelTrainer class") 33 | 34 | try: 35 | logging.info("Uploading artifacts folder to s3 bucket") 36 | 37 | self.usvisa_estimator.save_model(from_file=self.model_evaluation_artifact.trained_model_path) 38 | 39 | 40 | model_pusher_artifact = ModelPusherArtifact(bucket_name=self.model_pusher_config.bucket_name, 41 | s3_model_path=self.model_pusher_config.s3_model_key_path) 42 | 43 | logging.info("Uploaded artifacts folder to s3 bucket") 44 | logging.info(f"Model pusher artifact: [{model_pusher_artifact}]") 45 | logging.info("Exited initiate_model_pusher method of ModelTrainer class") 46 | 47 | return model_pusher_artifact 48 | except Exception as e: 49 | raise USvisaException(e, sys) from e -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # US-Visa-Approval-Prediction 2 | 3 | ## Live matarials docs 4 | 5 | [link](https://docs.google.com/document/d/1UFiHnyKRqgx8Lodsvdzu58LbVjdWHNf-uab2WmhE0A4/edit?usp=sharing) 6 | 7 | 8 | ## Git commands 9 | 10 | ```bash 11 | git add . 12 | 13 | git commit -m "Updated" 14 | 15 | git push origin main 16 | ``` 17 | 18 | ## How to run? 19 | 20 | ```bash 21 | conda create -n visa python=3.8 -y 22 | ``` 23 | 24 | ```bash 25 | conda activate visa 26 | ``` 27 | 28 | ```bash 29 | pip install -r requirements.txt 30 | ``` 31 | 32 | ```bash 33 | python app.py 34 | ``` 35 | 36 | 37 | ## Workflow 38 | 39 | 1. constant 40 | 2. config_entity 41 | 3. artifact_entity 42 | 4. conponent 43 | 5. pipeline 44 | 6. app.py / demo.py 45 | 46 | 47 | ### Export the environment variable 48 | ```bash 49 | 50 | 51 | export MONGODB_URL="mongodb+srv://:...." 52 | 53 | export AWS_ACCESS_KEY_ID= 54 | 55 | export AWS_SECRET_ACCESS_KEY= 56 | ``` 57 | 58 | 59 | 60 | # AWS-CICD-Deployment-with-Github-Actions 61 | 62 | ## 1. Login to AWS console. 63 | 64 | ## 2. Create IAM user for deployment 65 | 66 | #with specific access 67 | 68 | 1. EC2 access : It is virtual machine 69 | 70 | 2. ECR: Elastic Container registry to save your docker image in aws 71 | 72 | 73 | #Description: About the deployment 74 | 75 | 1. Build docker image of the source code 76 | 77 | 2. Push your docker image to ECR 78 | 79 | 3. Launch Your EC2 80 | 81 | 4. Pull Your image from ECR in EC2 82 | 83 | 5. Lauch your docker image in EC2 84 | 85 | #Policy: 86 | 87 | 1. AmazonEC2ContainerRegistryFullAccess 88 | 89 | 2. AmazonEC2FullAccess 90 | 91 | 92 | ## 3. Create ECR repo to store/save docker image 93 | - Save the URI: 136566696263.dkr.ecr.us-east-1.amazonaws.com/mlproject 94 | 95 | 96 | ## 4. Create EC2 machine (Ubuntu) 97 | 98 | ## 5. Open EC2 and Install docker in EC2 Machine: 99 | 100 | 101 | #optinal 102 | 103 | sudo apt-get update -y 104 | 105 | sudo apt-get upgrade 106 | 107 | #required 108 | 109 | curl -fsSL https://get.docker.com -o get-docker.sh 110 | 111 | sudo sh get-docker.sh 112 | 113 | sudo usermod -aG docker ubuntu 114 | 115 | newgrp docker 116 | 117 | # 6. Configure EC2 as self-hosted runner: 118 | setting>actions>runner>new self hosted runner> choose os> then run command one by one 119 | 120 | 121 | # 7. Setup github secrets: 122 | 123 | - AWS_ACCESS_KEY_ID 124 | - AWS_SECRET_ACCESS_KEY 125 | - AWS_DEFAULT_REGION 126 | - ECR_REPO 127 | 128 | 129 | -------------------------------------------------------------------------------- /us_visa/entity/config_entity.py: -------------------------------------------------------------------------------- 1 | import os 2 | from us_visa.constants import * 3 | from dataclasses import dataclass 4 | from datetime import datetime 5 | 6 | TIMESTAMP: str = datetime.now().strftime("%m_%d_%Y_%H_%M_%S") 7 | 8 | @dataclass 9 | class TrainingPipelineConfig: 10 | pipeline_name: str = PIPELINE_NAME 11 | artifact_dir: str = os.path.join(ARTIFACT_DIR, TIMESTAMP) 12 | timestamp: str = TIMESTAMP 13 | 14 | 15 | training_pipeline_config: TrainingPipelineConfig = TrainingPipelineConfig() 16 | 17 | @dataclass 18 | class DataIngestionConfig: 19 | data_ingestion_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_INGESTION_DIR_NAME) 20 | feature_store_file_path: str = os.path.join(data_ingestion_dir, DATA_INGESTION_FEATURE_STORE_DIR, FILE_NAME) 21 | training_file_path: str = os.path.join(data_ingestion_dir, DATA_INGESTION_INGESTED_DIR, TRAIN_FILE_NAME) 22 | testing_file_path: str = os.path.join(data_ingestion_dir, DATA_INGESTION_INGESTED_DIR, TEST_FILE_NAME) 23 | train_test_split_ratio: float = DATA_INGESTION_TRAIN_TEST_SPLIT_RATIO 24 | collection_name:str = DATA_INGESTION_COLLECTION_NAME 25 | 26 | 27 | 28 | @dataclass 29 | class DataValidationConfig: 30 | data_validation_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_VALIDATION_DIR_NAME) 31 | drift_report_file_path: str = os.path.join(data_validation_dir, DATA_VALIDATION_DRIFT_REPORT_DIR, 32 | DATA_VALIDATION_DRIFT_REPORT_FILE_NAME) 33 | 34 | 35 | 36 | 37 | @dataclass 38 | class DataTransformationConfig: 39 | data_transformation_dir: str = os.path.join(training_pipeline_config.artifact_dir, DATA_TRANSFORMATION_DIR_NAME) 40 | transformed_train_file_path: str = os.path.join(data_transformation_dir, DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR, 41 | TRAIN_FILE_NAME.replace("csv", "npy")) 42 | transformed_test_file_path: str = os.path.join(data_transformation_dir, DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR, 43 | TEST_FILE_NAME.replace("csv", "npy")) 44 | transformed_object_file_path: str = os.path.join(data_transformation_dir, 45 | DATA_TRANSFORMATION_TRANSFORMED_OBJECT_DIR, 46 | PREPROCSSING_OBJECT_FILE_NAME) 47 | 48 | 49 | 50 | 51 | @dataclass 52 | class ModelTrainerConfig: 53 | model_trainer_dir: str = os.path.join(training_pipeline_config.artifact_dir, MODEL_TRAINER_DIR_NAME) 54 | trained_model_file_path: str = os.path.join(model_trainer_dir, MODEL_TRAINER_TRAINED_MODEL_DIR, MODEL_FILE_NAME) 55 | expected_accuracy: float = MODEL_TRAINER_EXPECTED_SCORE 56 | model_config_file_path: str = MODEL_TRAINER_MODEL_CONFIG_FILE_PATH 57 | 58 | 59 | 60 | @dataclass 61 | class ModelEvaluationConfig: 62 | changed_threshold_score: float = MODEL_EVALUATION_CHANGED_THRESHOLD_SCORE 63 | bucket_name: str = MODEL_BUCKET_NAME 64 | s3_model_key_path: str = MODEL_FILE_NAME 65 | 66 | 67 | 68 | @dataclass 69 | class ModelPusherConfig: 70 | bucket_name: str = MODEL_BUCKET_NAME 71 | s3_model_key_path: str = MODEL_FILE_NAME 72 | 73 | 74 | 75 | @dataclass 76 | class USvisaPredictorConfig: 77 | model_file_path: str = MODEL_FILE_NAME 78 | model_bucket_name: str = MODEL_BUCKET_NAME 79 | 80 | -------------------------------------------------------------------------------- /us_visa/utils/main_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import numpy as np 5 | import dill 6 | import yaml 7 | from pandas import DataFrame 8 | 9 | from us_visa.exception import USvisaException 10 | from us_visa.logger import logging 11 | 12 | 13 | def read_yaml_file(file_path: str) -> dict: 14 | try: 15 | with open(file_path, "rb") as yaml_file: 16 | return yaml.safe_load(yaml_file) 17 | 18 | except Exception as e: 19 | raise USvisaException(e, sys) from e 20 | 21 | 22 | def write_yaml_file(file_path: str, content: object, replace: bool = False) -> None: 23 | try: 24 | if replace: 25 | if os.path.exists(file_path): 26 | os.remove(file_path) 27 | os.makedirs(os.path.dirname(file_path), exist_ok=True) 28 | with open(file_path, "w") as file: 29 | yaml.dump(content, file) 30 | except Exception as e: 31 | raise USvisaException(e, sys) from e 32 | 33 | 34 | def load_object(file_path: str) -> object: 35 | logging.info("Entered the load_object method of utils") 36 | 37 | try: 38 | 39 | with open(file_path, "rb") as file_obj: 40 | obj = dill.load(file_obj) 41 | 42 | logging.info("Exited the load_object method of utils") 43 | 44 | return obj 45 | 46 | except Exception as e: 47 | raise USvisaException(e, sys) from e 48 | 49 | def save_numpy_array_data(file_path: str, array: np.array): 50 | """ 51 | Save numpy array data to file 52 | file_path: str location of file to save 53 | array: np.array data to save 54 | """ 55 | try: 56 | dir_path = os.path.dirname(file_path) 57 | os.makedirs(dir_path, exist_ok=True) 58 | with open(file_path, 'wb') as file_obj: 59 | np.save(file_obj, array) 60 | except Exception as e: 61 | raise USvisaException(e, sys) from e 62 | 63 | 64 | def load_numpy_array_data(file_path: str) -> np.array: 65 | """ 66 | load numpy array data from file 67 | file_path: str location of file to load 68 | return: np.array data loaded 69 | """ 70 | try: 71 | with open(file_path, 'rb') as file_obj: 72 | return np.load(file_obj) 73 | except Exception as e: 74 | raise USvisaException(e, sys) from e 75 | 76 | 77 | def save_object(file_path: str, obj: object) -> None: 78 | logging.info("Entered the save_object method of utils") 79 | 80 | try: 81 | os.makedirs(os.path.dirname(file_path), exist_ok=True) 82 | with open(file_path, "wb") as file_obj: 83 | dill.dump(obj, file_obj) 84 | 85 | logging.info("Exited the save_object method of utils") 86 | 87 | except Exception as e: 88 | raise USvisaException(e, sys) from e 89 | 90 | 91 | def drop_columns(df: DataFrame, cols: list)-> DataFrame: 92 | 93 | """ 94 | drop the columns form a pandas DataFrame 95 | df: pandas DataFrame 96 | cols: list of columns to be dropped 97 | """ 98 | logging.info("Entered drop_columns methon of utils") 99 | 100 | try: 101 | df = df.drop(columns=cols, axis=1) 102 | 103 | logging.info("Exited the drop_columns method of utils") 104 | 105 | return df 106 | except Exception as e: 107 | raise USvisaException(e, sys) from e -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | artifact/* 162 | -------------------------------------------------------------------------------- /us_visa/pipline/prediction_pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from us_visa.entity.config_entity import USvisaPredictorConfig 7 | from us_visa.entity.s3_estimator import USvisaEstimator 8 | from us_visa.exception import USvisaException 9 | from us_visa.logger import logging 10 | from us_visa.utils.main_utils import read_yaml_file 11 | from pandas import DataFrame 12 | 13 | 14 | class USvisaData: 15 | def __init__(self, 16 | continent, 17 | education_of_employee, 18 | has_job_experience, 19 | requires_job_training, 20 | no_of_employees, 21 | region_of_employment, 22 | prevailing_wage, 23 | unit_of_wage, 24 | full_time_position, 25 | company_age 26 | ): 27 | """ 28 | Usvisa Data constructor 29 | Input: all features of the trained model for prediction 30 | """ 31 | try: 32 | self.continent = continent 33 | self.education_of_employee = education_of_employee 34 | self.has_job_experience = has_job_experience 35 | self.requires_job_training = requires_job_training 36 | self.no_of_employees = no_of_employees 37 | self.region_of_employment = region_of_employment 38 | self.prevailing_wage = prevailing_wage 39 | self.unit_of_wage = unit_of_wage 40 | self.full_time_position = full_time_position 41 | self.company_age = company_age 42 | 43 | 44 | except Exception as e: 45 | raise USvisaException(e, sys) from e 46 | 47 | def get_usvisa_input_data_frame(self)-> DataFrame: 48 | """ 49 | This function returns a DataFrame from USvisaData class input 50 | """ 51 | try: 52 | 53 | usvisa_input_dict = self.get_usvisa_data_as_dict() 54 | return DataFrame(usvisa_input_dict) 55 | 56 | except Exception as e: 57 | raise USvisaException(e, sys) from e 58 | 59 | 60 | def get_usvisa_data_as_dict(self): 61 | """ 62 | This function returns a dictionary from USvisaData class input 63 | """ 64 | logging.info("Entered get_usvisa_data_as_dict method as USvisaData class") 65 | 66 | try: 67 | input_data = { 68 | "continent": [self.continent], 69 | "education_of_employee": [self.education_of_employee], 70 | "has_job_experience": [self.has_job_experience], 71 | "requires_job_training": [self.requires_job_training], 72 | "no_of_employees": [self.no_of_employees], 73 | "region_of_employment": [self.region_of_employment], 74 | "prevailing_wage": [self.prevailing_wage], 75 | "unit_of_wage": [self.unit_of_wage], 76 | "full_time_position": [self.full_time_position], 77 | "company_age": [self.company_age], 78 | } 79 | 80 | logging.info("Created usvisa data dict") 81 | 82 | logging.info("Exited get_usvisa_data_as_dict method as USvisaData class") 83 | 84 | return input_data 85 | 86 | except Exception as e: 87 | raise USvisaException(e, sys) from e 88 | 89 | class USvisaClassifier: 90 | def __init__(self,prediction_pipeline_config: USvisaPredictorConfig = USvisaPredictorConfig(),) -> None: 91 | """ 92 | :param prediction_pipeline_config: Configuration for prediction the value 93 | """ 94 | try: 95 | # self.schema_config = read_yaml_file(SCHEMA_FILE_PATH) 96 | self.prediction_pipeline_config = prediction_pipeline_config 97 | except Exception as e: 98 | raise USvisaException(e, sys) 99 | 100 | def predict(self, dataframe) -> str: 101 | """ 102 | This is the method of USvisaClassifier 103 | Returns: Prediction in string format 104 | """ 105 | try: 106 | logging.info("Entered predict method of USvisaClassifier class") 107 | model = USvisaEstimator( 108 | bucket_name=self.prediction_pipeline_config.model_bucket_name, 109 | model_path=self.prediction_pipeline_config.model_file_path, 110 | ) 111 | result = model.predict(dataframe) 112 | 113 | return result 114 | 115 | except Exception as e: 116 | raise USvisaException(e, sys) -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | 2 | from fastapi import FastAPI, Request 3 | from fastapi.middleware.cors import CORSMiddleware 4 | from fastapi.responses import Response 5 | from fastapi.staticfiles import StaticFiles 6 | from fastapi.templating import Jinja2Templates 7 | from starlette.responses import HTMLResponse, RedirectResponse 8 | from uvicorn import run as app_run 9 | 10 | from typing import Optional 11 | 12 | from us_visa.constants import APP_HOST, APP_PORT 13 | from us_visa.pipline.prediction_pipeline import USvisaData, USvisaClassifier 14 | from us_visa.pipline.training_pipeline import TrainPipeline 15 | 16 | app = FastAPI() 17 | 18 | app.mount("/static", StaticFiles(directory="static"), name="static") 19 | 20 | templates = Jinja2Templates(directory='templates') 21 | 22 | origins = ["*"] 23 | 24 | app.add_middleware( 25 | CORSMiddleware, 26 | allow_origins=origins, 27 | allow_credentials=True, 28 | allow_methods=["*"], 29 | allow_headers=["*"], 30 | ) 31 | 32 | class DataForm: 33 | def __init__(self, request: Request): 34 | self.request: Request = request 35 | self.continent: Optional[str] = None 36 | self.education_of_employee: Optional[str] = None 37 | self.has_job_experience: Optional[str] = None 38 | self.requires_job_training: Optional[str] = None 39 | self.no_of_employees: Optional[str] = None 40 | self.company_age: Optional[str] = None 41 | self.region_of_employment: Optional[str] = None 42 | self.prevailing_wage: Optional[str] = None 43 | self.unit_of_wage: Optional[str] = None 44 | self.full_time_position: Optional[str] = None 45 | 46 | 47 | async def get_usvisa_data(self): 48 | form = await self.request.form() 49 | self.continent = form.get("continent") 50 | self.education_of_employee = form.get("education_of_employee") 51 | self.has_job_experience = form.get("has_job_experience") 52 | self.requires_job_training = form.get("requires_job_training") 53 | self.no_of_employees = form.get("no_of_employees") 54 | self.company_age = form.get("company_age") 55 | self.region_of_employment = form.get("region_of_employment") 56 | self.prevailing_wage = form.get("prevailing_wage") 57 | self.unit_of_wage = form.get("unit_of_wage") 58 | self.full_time_position = form.get("full_time_position") 59 | 60 | @app.get("/", tags=["authentication"]) 61 | async def index(request: Request): 62 | 63 | return templates.TemplateResponse( 64 | "usvisa.html",{"request": request, "context": "Rendering"}) 65 | 66 | 67 | @app.get("/train") 68 | async def trainRouteClient(): 69 | try: 70 | train_pipeline = TrainPipeline() 71 | 72 | train_pipeline.run_pipeline() 73 | 74 | return Response("Training successful !!") 75 | 76 | except Exception as e: 77 | return Response(f"Error Occurred! {e}") 78 | 79 | 80 | @app.post("/") 81 | async def predictRouteClient(request: Request): 82 | try: 83 | form = DataForm(request) 84 | await form.get_usvisa_data() 85 | 86 | usvisa_data = USvisaData( 87 | continent= form.continent, 88 | education_of_employee = form.education_of_employee, 89 | has_job_experience = form.has_job_experience, 90 | requires_job_training = form.requires_job_training, 91 | no_of_employees= form.no_of_employees, 92 | company_age= form.company_age, 93 | region_of_employment = form.region_of_employment, 94 | prevailing_wage= form.prevailing_wage, 95 | unit_of_wage= form.unit_of_wage, 96 | full_time_position= form.full_time_position, 97 | ) 98 | 99 | usvisa_df = usvisa_data.get_usvisa_input_data_frame() 100 | 101 | model_predictor = USvisaClassifier() 102 | 103 | value = model_predictor.predict(dataframe=usvisa_df)[0] 104 | 105 | status = None 106 | if value == 1: 107 | status = "Visa-approved" 108 | else: 109 | status = "Visa Not-Approved" 110 | 111 | return templates.TemplateResponse( 112 | "usvisa.html", 113 | {"request": request, "context": status}, 114 | ) 115 | 116 | except Exception as e: 117 | return {"status": False, "error": f"{e}"} 118 | 119 | 120 | if __name__ == "__main__": 121 | app_run(app, host=APP_HOST, port=APP_PORT) -------------------------------------------------------------------------------- /us_visa/components/model_trainer.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import Tuple 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from pandas import DataFrame 7 | from sklearn.pipeline import Pipeline 8 | from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score 9 | from neuro_mf import ModelFactory 10 | 11 | from us_visa.exception import USvisaException 12 | from us_visa.logger import logging 13 | from us_visa.utils.main_utils import load_numpy_array_data, read_yaml_file, load_object, save_object 14 | from us_visa.entity.config_entity import ModelTrainerConfig 15 | from us_visa.entity.artifact_entity import DataTransformationArtifact, ModelTrainerArtifact, ClassificationMetricArtifact 16 | from us_visa.entity.estimator import USvisaModel 17 | 18 | class ModelTrainer: 19 | def __init__(self, data_transformation_artifact: DataTransformationArtifact, 20 | model_trainer_config: ModelTrainerConfig): 21 | """ 22 | :param data_ingestion_artifact: Output reference of data ingestion artifact stage 23 | :param data_transformation_config: Configuration for data transformation 24 | """ 25 | self.data_transformation_artifact = data_transformation_artifact 26 | self.model_trainer_config = model_trainer_config 27 | 28 | def get_model_object_and_report(self, train: np.array, test: np.array) -> Tuple[object, object]: 29 | """ 30 | Method Name : get_model_object_and_report 31 | Description : This function uses neuro_mf to get the best model object and report of the best model 32 | 33 | Output : Returns metric artifact object and best model object 34 | On Failure : Write an exception log and then raise an exception 35 | """ 36 | try: 37 | logging.info("Using neuro_mf to get best model object and report") 38 | model_factory = ModelFactory(model_config_path=self.model_trainer_config.model_config_file_path) 39 | 40 | x_train, y_train, x_test, y_test = train[:, :-1], train[:, -1], test[:, :-1], test[:, -1] 41 | 42 | best_model_detail = model_factory.get_best_model( 43 | X=x_train,y=y_train,base_accuracy=self.model_trainer_config.expected_accuracy 44 | ) 45 | model_obj = best_model_detail.best_model 46 | 47 | y_pred = model_obj.predict(x_test) 48 | 49 | accuracy = accuracy_score(y_test, y_pred) 50 | f1 = f1_score(y_test, y_pred) 51 | precision = precision_score(y_test, y_pred) 52 | recall = recall_score(y_test, y_pred) 53 | metric_artifact = ClassificationMetricArtifact(f1_score=f1, precision_score=precision, recall_score=recall) 54 | 55 | return best_model_detail, metric_artifact 56 | 57 | except Exception as e: 58 | raise USvisaException(e, sys) from e 59 | 60 | 61 | def initiate_model_trainer(self, ) -> ModelTrainerArtifact: 62 | logging.info("Entered initiate_model_trainer method of ModelTrainer class") 63 | """ 64 | Method Name : initiate_model_trainer 65 | Description : This function initiates a model trainer steps 66 | 67 | Output : Returns model trainer artifact 68 | On Failure : Write an exception log and then raise an exception 69 | """ 70 | try: 71 | train_arr = load_numpy_array_data(file_path=self.data_transformation_artifact.transformed_train_file_path) 72 | test_arr = load_numpy_array_data(file_path=self.data_transformation_artifact.transformed_test_file_path) 73 | 74 | best_model_detail ,metric_artifact = self.get_model_object_and_report(train=train_arr, test=test_arr) 75 | 76 | preprocessing_obj = load_object(file_path=self.data_transformation_artifact.transformed_object_file_path) 77 | 78 | 79 | if best_model_detail.best_score < self.model_trainer_config.expected_accuracy: 80 | logging.info("No best model found with score more than base score") 81 | raise Exception("No best model found with score more than base score") 82 | 83 | usvisa_model = USvisaModel(preprocessing_object=preprocessing_obj, 84 | trained_model_object=best_model_detail.best_model) 85 | logging.info("Created usvisa model object with preprocessor and model") 86 | logging.info("Created best model file path.") 87 | save_object(self.model_trainer_config.trained_model_file_path, usvisa_model) 88 | 89 | model_trainer_artifact = ModelTrainerArtifact( 90 | trained_model_file_path=self.model_trainer_config.trained_model_file_path, 91 | metric_artifact=metric_artifact, 92 | ) 93 | logging.info(f"Model trainer artifact: {model_trainer_artifact}") 94 | return model_trainer_artifact 95 | except Exception as e: 96 | raise USvisaException(e, sys) from e -------------------------------------------------------------------------------- /us_visa/components/data_ingestion.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from pandas import DataFrame 5 | from sklearn.model_selection import train_test_split 6 | 7 | from us_visa.entity.config_entity import DataIngestionConfig 8 | from us_visa.entity.artifact_entity import DataIngestionArtifact 9 | from us_visa.exception import USvisaException 10 | from us_visa.logger import logging 11 | from us_visa.data_access.usvisa_data import USvisaData 12 | 13 | class DataIngestion: 14 | def __init__(self,data_ingestion_config:DataIngestionConfig=DataIngestionConfig()): 15 | """ 16 | :param data_ingestion_config: configuration for data ingestion 17 | """ 18 | try: 19 | self.data_ingestion_config = data_ingestion_config 20 | except Exception as e: 21 | raise USvisaException(e,sys) 22 | 23 | def export_data_into_feature_store(self)->DataFrame: 24 | """ 25 | Method Name : export_data_into_feature_store 26 | Description : This method exports data from mongodb to csv file 27 | 28 | Output : data is returned as artifact of data ingestion components 29 | On Failure : Write an exception log and then raise an exception 30 | """ 31 | try: 32 | logging.info(f"Exporting data from mongodb") 33 | usvisa_data = USvisaData() 34 | dataframe = usvisa_data.export_collection_as_dataframe(collection_name= 35 | self.data_ingestion_config.collection_name) 36 | logging.info(f"Shape of dataframe: {dataframe.shape}") 37 | feature_store_file_path = self.data_ingestion_config.feature_store_file_path 38 | dir_path = os.path.dirname(feature_store_file_path) 39 | os.makedirs(dir_path,exist_ok=True) 40 | logging.info(f"Saving exported data into feature store file path: {feature_store_file_path}") 41 | dataframe.to_csv(feature_store_file_path,index=False,header=True) 42 | return dataframe 43 | 44 | except Exception as e: 45 | raise USvisaException(e,sys) 46 | 47 | def split_data_as_train_test(self,dataframe: DataFrame) ->None: 48 | """ 49 | Method Name : split_data_as_train_test 50 | Description : This method splits the dataframe into train set and test set based on split ratio 51 | 52 | Output : Folder is created in s3 bucket 53 | On Failure : Write an exception log and then raise an exception 54 | """ 55 | logging.info("Entered split_data_as_train_test method of Data_Ingestion class") 56 | 57 | try: 58 | train_set, test_set = train_test_split(dataframe, test_size=self.data_ingestion_config.train_test_split_ratio) 59 | logging.info("Performed train test split on the dataframe") 60 | logging.info( 61 | "Exited split_data_as_train_test method of Data_Ingestion class" 62 | ) 63 | dir_path = os.path.dirname(self.data_ingestion_config.training_file_path) 64 | os.makedirs(dir_path,exist_ok=True) 65 | 66 | logging.info(f"Exporting train and test file path.") 67 | train_set.to_csv(self.data_ingestion_config.training_file_path,index=False,header=True) 68 | test_set.to_csv(self.data_ingestion_config.testing_file_path,index=False,header=True) 69 | 70 | logging.info(f"Exported train and test file path.") 71 | except Exception as e: 72 | raise USvisaException(e, sys) from e 73 | 74 | def initiate_data_ingestion(self) ->DataIngestionArtifact: 75 | """ 76 | Method Name : initiate_data_ingestion 77 | Description : This method initiates the data ingestion components of training pipeline 78 | 79 | Output : train set and test set are returned as the artifacts of data ingestion components 80 | On Failure : Write an exception log and then raise an exception 81 | """ 82 | logging.info("Entered initiate_data_ingestion method of Data_Ingestion class") 83 | 84 | try: 85 | dataframe = self.export_data_into_feature_store() 86 | 87 | logging.info("Got the data from mongodb") 88 | 89 | self.split_data_as_train_test(dataframe) 90 | 91 | logging.info("Performed train test split on the dataset") 92 | 93 | logging.info( 94 | "Exited initiate_data_ingestion method of Data_Ingestion class" 95 | ) 96 | 97 | data_ingestion_artifact = DataIngestionArtifact(trained_file_path=self.data_ingestion_config.training_file_path, 98 | test_file_path=self.data_ingestion_config.testing_file_path) 99 | 100 | logging.info(f"Data ingestion artifact: {data_ingestion_artifact}") 101 | return data_ingestion_artifact 102 | except Exception as e: 103 | raise USvisaException(e, sys) from e -------------------------------------------------------------------------------- /us_visa/components/model_evaluation.py: -------------------------------------------------------------------------------- 1 | from us_visa.entity.config_entity import ModelEvaluationConfig 2 | from us_visa.entity.artifact_entity import ModelTrainerArtifact, DataIngestionArtifact, ModelEvaluationArtifact 3 | from sklearn.metrics import f1_score 4 | from us_visa.exception import USvisaException 5 | from us_visa.constants import TARGET_COLUMN, CURRENT_YEAR 6 | from us_visa.logger import logging 7 | import sys 8 | import pandas as pd 9 | from typing import Optional 10 | from us_visa.entity.s3_estimator import USvisaEstimator 11 | from dataclasses import dataclass 12 | from us_visa.entity.estimator import USvisaModel 13 | from us_visa.entity.estimator import TargetValueMapping 14 | 15 | @dataclass 16 | class EvaluateModelResponse: 17 | trained_model_f1_score: float 18 | best_model_f1_score: float 19 | is_model_accepted: bool 20 | difference: float 21 | 22 | 23 | class ModelEvaluation: 24 | 25 | def __init__(self, model_eval_config: ModelEvaluationConfig, data_ingestion_artifact: DataIngestionArtifact, 26 | model_trainer_artifact: ModelTrainerArtifact): 27 | try: 28 | self.model_eval_config = model_eval_config 29 | self.data_ingestion_artifact = data_ingestion_artifact 30 | self.model_trainer_artifact = model_trainer_artifact 31 | except Exception as e: 32 | raise USvisaException(e, sys) from e 33 | 34 | def get_best_model(self) -> Optional[USvisaEstimator]: 35 | """ 36 | Method Name : get_best_model 37 | Description : This function is used to get model in production 38 | 39 | Output : Returns model object if available in s3 storage 40 | On Failure : Write an exception log and then raise an exception 41 | """ 42 | try: 43 | bucket_name = self.model_eval_config.bucket_name 44 | model_path=self.model_eval_config.s3_model_key_path 45 | usvisa_estimator = USvisaEstimator(bucket_name=bucket_name, 46 | model_path=model_path) 47 | 48 | if usvisa_estimator.is_model_present(model_path=model_path): 49 | return usvisa_estimator 50 | return None 51 | except Exception as e: 52 | raise USvisaException(e,sys) 53 | 54 | def evaluate_model(self) -> EvaluateModelResponse: 55 | """ 56 | Method Name : evaluate_model 57 | Description : This function is used to evaluate trained model 58 | with production model and choose best model 59 | 60 | Output : Returns bool value based on validation results 61 | On Failure : Write an exception log and then raise an exception 62 | """ 63 | try: 64 | test_df = pd.read_csv(self.data_ingestion_artifact.test_file_path) 65 | test_df['company_age'] = CURRENT_YEAR-test_df['yr_of_estab'] 66 | 67 | x, y = test_df.drop(TARGET_COLUMN, axis=1), test_df[TARGET_COLUMN] 68 | y = y.replace( 69 | TargetValueMapping()._asdict() 70 | ) 71 | 72 | # trained_model = load_object(file_path=self.model_trainer_artifact.trained_model_file_path) 73 | trained_model_f1_score = self.model_trainer_artifact.metric_artifact.f1_score 74 | 75 | best_model_f1_score=None 76 | best_model = self.get_best_model() 77 | if best_model is not None: 78 | y_hat_best_model = best_model.predict(x) 79 | best_model_f1_score = f1_score(y, y_hat_best_model) 80 | 81 | tmp_best_model_score = 0 if best_model_f1_score is None else best_model_f1_score 82 | result = EvaluateModelResponse(trained_model_f1_score=trained_model_f1_score, 83 | best_model_f1_score=best_model_f1_score, 84 | is_model_accepted=trained_model_f1_score > tmp_best_model_score, 85 | difference=trained_model_f1_score - tmp_best_model_score 86 | ) 87 | logging.info(f"Result: {result}") 88 | return result 89 | 90 | except Exception as e: 91 | raise USvisaException(e, sys) 92 | 93 | def initiate_model_evaluation(self) -> ModelEvaluationArtifact: 94 | """ 95 | Method Name : initiate_model_evaluation 96 | Description : This function is used to initiate all steps of the model evaluation 97 | 98 | Output : Returns model evaluation artifact 99 | On Failure : Write an exception log and then raise an exception 100 | """ 101 | try: 102 | evaluate_model_response = self.evaluate_model() 103 | s3_model_path = self.model_eval_config.s3_model_key_path 104 | 105 | model_evaluation_artifact = ModelEvaluationArtifact( 106 | is_model_accepted=evaluate_model_response.is_model_accepted, 107 | s3_model_path=s3_model_path, 108 | trained_model_path=self.model_trainer_artifact.trained_model_file_path, 109 | changed_accuracy=evaluate_model_response.difference) 110 | 111 | logging.info(f"Model evaluation artifact: {model_evaluation_artifact}") 112 | return model_evaluation_artifact 113 | except Exception as e: 114 | raise USvisaException(e, sys) from e -------------------------------------------------------------------------------- /us_visa/components/data_validation.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | 4 | import pandas as pd 5 | from evidently.model_profile import Profile 6 | from evidently.model_profile.sections import DataDriftProfileSection 7 | 8 | from pandas import DataFrame 9 | 10 | from us_visa.exception import USvisaException 11 | from us_visa.logger import logging 12 | from us_visa.utils.main_utils import read_yaml_file, write_yaml_file 13 | from us_visa.entity.artifact_entity import DataIngestionArtifact, DataValidationArtifact 14 | from us_visa.entity.config_entity import DataValidationConfig 15 | from us_visa.constants import SCHEMA_FILE_PATH 16 | 17 | 18 | class DataValidation: 19 | def __init__(self, data_ingestion_artifact: DataIngestionArtifact, data_validation_config: DataValidationConfig): 20 | """ 21 | :param data_ingestion_artifact: Output reference of data ingestion artifact stage 22 | :param data_validation_config: configuration for data validation 23 | """ 24 | try: 25 | self.data_ingestion_artifact = data_ingestion_artifact 26 | self.data_validation_config = data_validation_config 27 | self._schema_config =read_yaml_file(file_path=SCHEMA_FILE_PATH) 28 | except Exception as e: 29 | raise USvisaException(e,sys) 30 | 31 | def validate_number_of_columns(self, dataframe: DataFrame) -> bool: 32 | """ 33 | Method Name : validate_number_of_columns 34 | Description : This method validates the number of columns 35 | 36 | Output : Returns bool value based on validation results 37 | On Failure : Write an exception log and then raise an exception 38 | """ 39 | try: 40 | status = len(dataframe.columns) == len(self._schema_config["columns"]) 41 | logging.info(f"Is required column present: [{status}]") 42 | return status 43 | except Exception as e: 44 | raise USvisaException(e, sys) 45 | 46 | def is_column_exist(self, df: DataFrame) -> bool: 47 | """ 48 | Method Name : is_column_exist 49 | Description : This method validates the existence of a numerical and categorical columns 50 | 51 | Output : Returns bool value based on validation results 52 | On Failure : Write an exception log and then raise an exception 53 | """ 54 | try: 55 | dataframe_columns = df.columns 56 | missing_numerical_columns = [] 57 | missing_categorical_columns = [] 58 | for column in self._schema_config["numerical_columns"]: 59 | if column not in dataframe_columns: 60 | missing_numerical_columns.append(column) 61 | 62 | if len(missing_numerical_columns)>0: 63 | logging.info(f"Missing numerical column: {missing_numerical_columns}") 64 | 65 | 66 | for column in self._schema_config["categorical_columns"]: 67 | if column not in dataframe_columns: 68 | missing_categorical_columns.append(column) 69 | 70 | if len(missing_categorical_columns)>0: 71 | logging.info(f"Missing categorical column: {missing_categorical_columns}") 72 | 73 | return False if len(missing_categorical_columns)>0 or len(missing_numerical_columns)>0 else True 74 | except Exception as e: 75 | raise USvisaException(e, sys) from e 76 | 77 | @staticmethod 78 | def read_data(file_path) -> DataFrame: 79 | try: 80 | return pd.read_csv(file_path) 81 | except Exception as e: 82 | raise USvisaException(e, sys) 83 | 84 | def detect_dataset_drift(self, reference_df: DataFrame, current_df: DataFrame, ) -> bool: 85 | """ 86 | Method Name : detect_dataset_drift 87 | Description : This method validates if drift is detected 88 | 89 | Output : Returns bool value based on validation results 90 | On Failure : Write an exception log and then raise an exception 91 | """ 92 | try: 93 | data_drift_profile = Profile(sections=[DataDriftProfileSection()]) 94 | 95 | data_drift_profile.calculate(reference_df, current_df) 96 | 97 | report = data_drift_profile.json() 98 | json_report = json.loads(report) 99 | 100 | write_yaml_file(file_path=self.data_validation_config.drift_report_file_path, content=json_report) 101 | 102 | n_features = json_report["data_drift"]["data"]["metrics"]["n_features"] 103 | n_drifted_features = json_report["data_drift"]["data"]["metrics"]["n_drifted_features"] 104 | 105 | logging.info(f"{n_drifted_features}/{n_features} drift detected.") 106 | drift_status = json_report["data_drift"]["data"]["metrics"]["dataset_drift"] 107 | return drift_status 108 | except Exception as e: 109 | raise USvisaException(e, sys) from e 110 | 111 | def initiate_data_validation(self) -> DataValidationArtifact: 112 | """ 113 | Method Name : initiate_data_validation 114 | Description : This method initiates the data validation component for the pipeline 115 | 116 | Output : Returns bool value based on validation results 117 | On Failure : Write an exception log and then raise an exception 118 | """ 119 | 120 | try: 121 | validation_error_msg = "" 122 | logging.info("Starting data validation") 123 | train_df, test_df = (DataValidation.read_data(file_path=self.data_ingestion_artifact.trained_file_path), 124 | DataValidation.read_data(file_path=self.data_ingestion_artifact.test_file_path)) 125 | 126 | status = self.validate_number_of_columns(dataframe=train_df) 127 | logging.info(f"All required columns present in training dataframe: {status}") 128 | if not status: 129 | validation_error_msg += f"Columns are missing in training dataframe." 130 | status = self.validate_number_of_columns(dataframe=test_df) 131 | 132 | logging.info(f"All required columns present in testing dataframe: {status}") 133 | if not status: 134 | validation_error_msg += f"Columns are missing in test dataframe." 135 | 136 | status = self.is_column_exist(df=train_df) 137 | 138 | if not status: 139 | validation_error_msg += f"Columns are missing in training dataframe." 140 | status = self.is_column_exist(df=test_df) 141 | 142 | if not status: 143 | validation_error_msg += f"columns are missing in test dataframe." 144 | 145 | validation_status = len(validation_error_msg) == 0 146 | 147 | if validation_status: 148 | drift_status = self.detect_dataset_drift(train_df, test_df) 149 | if drift_status: 150 | logging.info(f"Drift detected.") 151 | validation_error_msg = "Drift detected" 152 | else: 153 | validation_error_msg = "Drift not detected" 154 | else: 155 | logging.info(f"Validation_error: {validation_error_msg}") 156 | 157 | 158 | data_validation_artifact = DataValidationArtifact( 159 | validation_status=validation_status, 160 | message=validation_error_msg, 161 | drift_report_file_path=self.data_validation_config.drift_report_file_path 162 | ) 163 | 164 | logging.info(f"Data validation artifact: {data_validation_artifact}") 165 | return data_validation_artifact 166 | except Exception as e: 167 | raise USvisaException(e, sys) from e -------------------------------------------------------------------------------- /us_visa/pipline/training_pipeline.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from us_visa.exception import USvisaException 3 | from us_visa.logger import logging 4 | 5 | from us_visa.components.data_ingestion import DataIngestion 6 | from us_visa.components.data_validation import DataValidation 7 | from us_visa.components.data_transformation import DataTransformation 8 | from us_visa.components.model_trainer import ModelTrainer 9 | from us_visa.components.model_evaluation import ModelEvaluation 10 | from us_visa.components.model_pusher import ModelPusher 11 | 12 | from us_visa.entity.config_entity import (DataIngestionConfig, 13 | DataValidationConfig, 14 | DataTransformationConfig, 15 | ModelTrainerConfig, 16 | ModelEvaluationConfig, 17 | ModelPusherConfig) 18 | 19 | 20 | from us_visa.entity.artifact_entity import (DataIngestionArtifact, 21 | DataValidationArtifact, 22 | DataTransformationArtifact, 23 | ModelTrainerArtifact, 24 | ModelEvaluationArtifact, 25 | ModelPusherArtifact) 26 | 27 | 28 | 29 | class TrainPipeline: 30 | def __init__(self): 31 | self.data_ingestion_config = DataIngestionConfig() 32 | self.data_validation_config = DataValidationConfig() 33 | self.data_transformation_config = DataTransformationConfig() 34 | self.model_trainer_config = ModelTrainerConfig() 35 | self.model_evaluation_config = ModelEvaluationConfig() 36 | self.model_pusher_config = ModelPusherConfig() 37 | 38 | 39 | 40 | def start_data_ingestion(self) -> DataIngestionArtifact: 41 | """ 42 | This method of TrainPipeline class is responsible for starting data ingestion component 43 | """ 44 | try: 45 | logging.info("Entered the start_data_ingestion method of TrainPipeline class") 46 | logging.info("Getting the data from mongodb") 47 | data_ingestion = DataIngestion(data_ingestion_config=self.data_ingestion_config) 48 | data_ingestion_artifact = data_ingestion.initiate_data_ingestion() 49 | logging.info("Got the train_set and test_set from mongodb") 50 | logging.info( 51 | "Exited the start_data_ingestion method of TrainPipeline class" 52 | ) 53 | return data_ingestion_artifact 54 | except Exception as e: 55 | raise USvisaException(e, sys) from e 56 | 57 | 58 | 59 | def start_data_validation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataValidationArtifact: 60 | """ 61 | This method of TrainPipeline class is responsible for starting data validation component 62 | """ 63 | logging.info("Entered the start_data_validation method of TrainPipeline class") 64 | 65 | try: 66 | data_validation = DataValidation(data_ingestion_artifact=data_ingestion_artifact, 67 | data_validation_config=self.data_validation_config 68 | ) 69 | 70 | data_validation_artifact = data_validation.initiate_data_validation() 71 | 72 | logging.info("Performed the data validation operation") 73 | 74 | logging.info( 75 | "Exited the start_data_validation method of TrainPipeline class" 76 | ) 77 | 78 | return data_validation_artifact 79 | 80 | except Exception as e: 81 | raise USvisaException(e, sys) from e 82 | 83 | 84 | 85 | 86 | 87 | def start_data_transformation(self, data_ingestion_artifact: DataIngestionArtifact, data_validation_artifact: DataValidationArtifact) -> DataTransformationArtifact: 88 | """ 89 | This method of TrainPipeline class is responsible for starting data transformation component 90 | """ 91 | try: 92 | data_transformation = DataTransformation(data_ingestion_artifact=data_ingestion_artifact, 93 | data_transformation_config=self.data_transformation_config, 94 | data_validation_artifact=data_validation_artifact) 95 | data_transformation_artifact = data_transformation.initiate_data_transformation() 96 | return data_transformation_artifact 97 | except Exception as e: 98 | raise USvisaException(e, sys) 99 | 100 | 101 | 102 | def start_model_trainer(self, data_transformation_artifact: DataTransformationArtifact) -> ModelTrainerArtifact: 103 | """ 104 | This method of TrainPipeline class is responsible for starting model training 105 | """ 106 | try: 107 | model_trainer = ModelTrainer(data_transformation_artifact=data_transformation_artifact, 108 | model_trainer_config=self.model_trainer_config 109 | ) 110 | model_trainer_artifact = model_trainer.initiate_model_trainer() 111 | return model_trainer_artifact 112 | 113 | except Exception as e: 114 | raise USvisaException(e, sys) 115 | 116 | 117 | 118 | def start_model_evaluation(self, data_ingestion_artifact: DataIngestionArtifact, 119 | model_trainer_artifact: ModelTrainerArtifact) -> ModelEvaluationArtifact: 120 | """ 121 | This method of TrainPipeline class is responsible for starting modle evaluation 122 | """ 123 | try: 124 | model_evaluation = ModelEvaluation(model_eval_config=self.model_evaluation_config, 125 | data_ingestion_artifact=data_ingestion_artifact, 126 | model_trainer_artifact=model_trainer_artifact) 127 | model_evaluation_artifact = model_evaluation.initiate_model_evaluation() 128 | return model_evaluation_artifact 129 | except Exception as e: 130 | raise USvisaException(e, sys) 131 | 132 | 133 | 134 | 135 | def start_model_pusher(self, model_evaluation_artifact: ModelEvaluationArtifact) -> ModelPusherArtifact: 136 | """ 137 | This method of TrainPipeline class is responsible for starting model pushing 138 | """ 139 | try: 140 | model_pusher = ModelPusher(model_evaluation_artifact=model_evaluation_artifact, 141 | model_pusher_config=self.model_pusher_config 142 | ) 143 | model_pusher_artifact = model_pusher.initiate_model_pusher() 144 | return model_pusher_artifact 145 | except Exception as e: 146 | raise USvisaException(e, sys) 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | def run_pipeline(self, ) -> None: 156 | """ 157 | This method of TrainPipeline class is responsible for running complete pipeline 158 | """ 159 | try: 160 | data_ingestion_artifact = self.start_data_ingestion() 161 | data_validation_artifact = self.start_data_validation(data_ingestion_artifact=data_ingestion_artifact) 162 | data_transformation_artifact = self.start_data_transformation( 163 | data_ingestion_artifact=data_ingestion_artifact, data_validation_artifact=data_validation_artifact) 164 | model_trainer_artifact = self.start_model_trainer(data_transformation_artifact=data_transformation_artifact) 165 | model_evaluation_artifact = self.start_model_evaluation(data_ingestion_artifact=data_ingestion_artifact, 166 | model_trainer_artifact=model_trainer_artifact) 167 | 168 | if not model_evaluation_artifact.is_model_accepted: 169 | logging.info(f"Model not accepted.") 170 | return None 171 | model_pusher_artifact = self.start_model_pusher(model_evaluation_artifact=model_evaluation_artifact) 172 | 173 | 174 | 175 | except Exception as e: 176 | raise USvisaException(e, sys) -------------------------------------------------------------------------------- /templates/usvisa.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | US Visa Prediction 7 | 8 | 9 | 10 | 11 | 16 |
17 |
18 |
19 |
20 | US Visa approval Form 21 |
22 | 23 | 32 |
33 | 34 |
35 | 36 | 43 |
44 | 45 |
46 | 47 | 52 |
53 | 54 |
55 | 56 | 61 |
62 | 63 |
64 | 65 | 66 |
67 | 68 |
69 | 70 | 78 |
79 | 80 |
81 | 82 | 83 |
84 | 85 | 86 | 87 |
88 | 89 | 96 |
97 | 98 |
99 | 100 | 105 |
106 | 107 |
108 | 109 | 110 |
111 | 112 | 113 |
114 | 115 | 116 |
117 |
118 |

Visa Prediction Status: {{context}}

119 |
120 | 121 |
122 | 123 | 132 | 133 | -------------------------------------------------------------------------------- /us_visa/components/data_transformation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from imblearn.combine import SMOTEENN 6 | from sklearn.pipeline import Pipeline 7 | from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder, PowerTransformer 8 | from sklearn.compose import ColumnTransformer 9 | 10 | from us_visa.constants import TARGET_COLUMN, SCHEMA_FILE_PATH, CURRENT_YEAR 11 | from us_visa.entity.config_entity import DataTransformationConfig 12 | from us_visa.entity.artifact_entity import DataTransformationArtifact, DataIngestionArtifact, DataValidationArtifact 13 | from us_visa.exception import USvisaException 14 | from us_visa.logger import logging 15 | from us_visa.utils.main_utils import save_object, save_numpy_array_data, read_yaml_file, drop_columns 16 | from us_visa.entity.estimator import TargetValueMapping 17 | 18 | class DataTransformation: 19 | def __init__(self, data_ingestion_artifact: DataIngestionArtifact, 20 | data_transformation_config: DataTransformationConfig, 21 | data_validation_artifact: DataValidationArtifact): 22 | """ 23 | :param data_ingestion_artifact: Output reference of data ingestion artifact stage 24 | :param data_transformation_config: configuration for data transformation 25 | """ 26 | try: 27 | self.data_ingestion_artifact = data_ingestion_artifact 28 | self.data_transformation_config = data_transformation_config 29 | self.data_validation_artifact = data_validation_artifact 30 | self._schema_config = read_yaml_file(file_path=SCHEMA_FILE_PATH) 31 | except Exception as e: 32 | raise USvisaException(e, sys) 33 | 34 | @staticmethod 35 | def read_data(file_path) -> pd.DataFrame: 36 | try: 37 | return pd.read_csv(file_path) 38 | except Exception as e: 39 | raise USvisaException(e, sys) 40 | 41 | 42 | def get_data_transformer_object(self) -> Pipeline: 43 | """ 44 | Method Name : get_data_transformer_object 45 | Description : This method creates and returns a data transformer object for the data 46 | 47 | Output : data transformer object is created and returned 48 | On Failure : Write an exception log and then raise an exception 49 | """ 50 | logging.info( 51 | "Entered get_data_transformer_object method of DataTransformation class" 52 | ) 53 | 54 | try: 55 | logging.info("Got numerical cols from schema config") 56 | 57 | numeric_transformer = StandardScaler() 58 | oh_transformer = OneHotEncoder() 59 | ordinal_encoder = OrdinalEncoder() 60 | 61 | logging.info("Initialized StandardScaler, OneHotEncoder, OrdinalEncoder") 62 | 63 | oh_columns = self._schema_config['oh_columns'] 64 | or_columns = self._schema_config['or_columns'] 65 | transform_columns = self._schema_config['transform_columns'] 66 | num_features = self._schema_config['num_features'] 67 | 68 | logging.info("Initialize PowerTransformer") 69 | 70 | transform_pipe = Pipeline(steps=[ 71 | ('transformer', PowerTransformer(method='yeo-johnson')) 72 | ]) 73 | preprocessor = ColumnTransformer( 74 | [ 75 | ("OneHotEncoder", oh_transformer, oh_columns), 76 | ("Ordinal_Encoder", ordinal_encoder, or_columns), 77 | ("Transformer", transform_pipe, transform_columns), 78 | ("StandardScaler", numeric_transformer, num_features) 79 | ] 80 | ) 81 | 82 | logging.info("Created preprocessor object from ColumnTransformer") 83 | 84 | logging.info( 85 | "Exited get_data_transformer_object method of DataTransformation class" 86 | ) 87 | return preprocessor 88 | 89 | except Exception as e: 90 | raise USvisaException(e, sys) from e 91 | 92 | def initiate_data_transformation(self, ) -> DataTransformationArtifact: 93 | """ 94 | Method Name : initiate_data_transformation 95 | Description : This method initiates the data transformation component for the pipeline 96 | 97 | Output : data transformer steps are performed and preprocessor object is created 98 | On Failure : Write an exception log and then raise an exception 99 | """ 100 | try: 101 | if self.data_validation_artifact.validation_status: 102 | logging.info("Starting data transformation") 103 | preprocessor = self.get_data_transformer_object() 104 | logging.info("Got the preprocessor object") 105 | 106 | train_df = DataTransformation.read_data(file_path=self.data_ingestion_artifact.trained_file_path) 107 | test_df = DataTransformation.read_data(file_path=self.data_ingestion_artifact.test_file_path) 108 | 109 | input_feature_train_df = train_df.drop(columns=[TARGET_COLUMN], axis=1) 110 | target_feature_train_df = train_df[TARGET_COLUMN] 111 | 112 | logging.info("Got train features and test features of Training dataset") 113 | 114 | input_feature_train_df['company_age'] = CURRENT_YEAR-input_feature_train_df['yr_of_estab'] 115 | 116 | logging.info("Added company_age column to the Training dataset") 117 | 118 | drop_cols = self._schema_config['drop_columns'] 119 | 120 | logging.info("drop the columns in drop_cols of Training dataset") 121 | 122 | input_feature_train_df = drop_columns(df=input_feature_train_df, cols = drop_cols) 123 | 124 | target_feature_train_df = target_feature_train_df.replace( 125 | TargetValueMapping()._asdict() 126 | ) 127 | 128 | 129 | input_feature_test_df = test_df.drop(columns=[TARGET_COLUMN], axis=1) 130 | 131 | target_feature_test_df = test_df[TARGET_COLUMN] 132 | 133 | 134 | input_feature_test_df['company_age'] = CURRENT_YEAR-input_feature_test_df['yr_of_estab'] 135 | 136 | logging.info("Added company_age column to the Test dataset") 137 | 138 | input_feature_test_df = drop_columns(df=input_feature_test_df, cols = drop_cols) 139 | 140 | logging.info("drop the columns in drop_cols of Test dataset") 141 | 142 | target_feature_test_df = target_feature_test_df.replace( 143 | TargetValueMapping()._asdict() 144 | ) 145 | 146 | logging.info("Got train features and test features of Testing dataset") 147 | 148 | logging.info( 149 | "Applying preprocessing object on training dataframe and testing dataframe" 150 | ) 151 | 152 | input_feature_train_arr = preprocessor.fit_transform(input_feature_train_df) 153 | 154 | logging.info( 155 | "Used the preprocessor object to fit transform the train features" 156 | ) 157 | 158 | input_feature_test_arr = preprocessor.transform(input_feature_test_df) 159 | 160 | logging.info("Used the preprocessor object to transform the test features") 161 | 162 | logging.info("Applying SMOTEENN on Training dataset") 163 | 164 | smt = SMOTEENN(sampling_strategy="minority") 165 | 166 | input_feature_train_final, target_feature_train_final = smt.fit_resample( 167 | input_feature_train_arr, target_feature_train_df 168 | ) 169 | 170 | logging.info("Applied SMOTEENN on training dataset") 171 | 172 | logging.info("Applying SMOTEENN on testing dataset") 173 | 174 | input_feature_test_final, target_feature_test_final = smt.fit_resample( 175 | input_feature_test_arr, target_feature_test_df 176 | ) 177 | 178 | logging.info("Applied SMOTEENN on testing dataset") 179 | 180 | logging.info("Created train array and test array") 181 | 182 | train_arr = np.c_[ 183 | input_feature_train_final, np.array(target_feature_train_final) 184 | ] 185 | 186 | test_arr = np.c_[ 187 | input_feature_test_final, np.array(target_feature_test_final) 188 | ] 189 | 190 | save_object(self.data_transformation_config.transformed_object_file_path, preprocessor) 191 | save_numpy_array_data(self.data_transformation_config.transformed_train_file_path, array=train_arr) 192 | save_numpy_array_data(self.data_transformation_config.transformed_test_file_path, array=test_arr) 193 | 194 | logging.info("Saved the preprocessor object") 195 | 196 | logging.info( 197 | "Exited initiate_data_transformation method of Data_Transformation class" 198 | ) 199 | 200 | data_transformation_artifact = DataTransformationArtifact( 201 | transformed_object_file_path=self.data_transformation_config.transformed_object_file_path, 202 | transformed_train_file_path=self.data_transformation_config.transformed_train_file_path, 203 | transformed_test_file_path=self.data_transformation_config.transformed_test_file_path 204 | ) 205 | return data_transformation_artifact 206 | else: 207 | raise Exception(self.data_validation_artifact.message) 208 | 209 | except Exception as e: 210 | raise USvisaException(e, sys) from e -------------------------------------------------------------------------------- /us_visa/cloud_storage/aws_storage.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from us_visa.configuration.aws_connection import S3Client 3 | from io import StringIO 4 | from typing import Union,List 5 | import os,sys 6 | from us_visa.logger import logging 7 | from mypy_boto3_s3.service_resource import Bucket 8 | from us_visa.exception import USvisaException 9 | from botocore.exceptions import ClientError 10 | from pandas import DataFrame,read_csv 11 | import pickle 12 | 13 | 14 | class SimpleStorageService: 15 | 16 | def __init__(self): 17 | s3_client = S3Client() 18 | self.s3_resource = s3_client.s3_resource 19 | self.s3_client = s3_client.s3_client 20 | 21 | def s3_key_path_available(self,bucket_name,s3_key)->bool: 22 | try: 23 | bucket = self.get_bucket(bucket_name) 24 | file_objects = [file_object for file_object in bucket.objects.filter(Prefix=s3_key)] 25 | if len(file_objects) > 0: 26 | return True 27 | else: 28 | return False 29 | except Exception as e: 30 | raise USvisaException(e,sys) 31 | 32 | 33 | 34 | @staticmethod 35 | def read_object(object_name: str, decode: bool = True, make_readable: bool = False) -> Union[StringIO, str]: 36 | """ 37 | Method Name : read_object 38 | Description : This method reads the object_name object with kwargs 39 | 40 | Output : The column name is renamed 41 | On Failure : Write an exception log and then raise an exception 42 | 43 | Version : 1.2 44 | Revisions : moved setup to cloud 45 | """ 46 | logging.info("Entered the read_object method of S3Operations class") 47 | 48 | try: 49 | func = ( 50 | lambda: object_name.get()["Body"].read().decode() 51 | if decode is True 52 | else object_name.get()["Body"].read() 53 | ) 54 | conv_func = lambda: StringIO(func()) if make_readable is True else func() 55 | logging.info("Exited the read_object method of S3Operations class") 56 | return conv_func() 57 | 58 | except Exception as e: 59 | raise USvisaException(e, sys) from e 60 | 61 | def get_bucket(self, bucket_name: str) -> Bucket: 62 | """ 63 | Method Name : get_bucket 64 | Description : This method gets the bucket object based on the bucket_name 65 | 66 | Output : Bucket object is returned based on the bucket name 67 | On Failure : Write an exception log and then raise an exception 68 | 69 | Version : 1.2 70 | Revisions : moved setup to cloud 71 | """ 72 | logging.info("Entered the get_bucket method of S3Operations class") 73 | 74 | try: 75 | bucket = self.s3_resource.Bucket(bucket_name) 76 | logging.info("Exited the get_bucket method of S3Operations class") 77 | return bucket 78 | except Exception as e: 79 | raise USvisaException(e, sys) from e 80 | 81 | def get_file_object( self, filename: str, bucket_name: str) -> Union[List[object], object]: 82 | """ 83 | Method Name : get_file_object 84 | Description : This method gets the file object from bucket_name bucket based on filename 85 | 86 | Output : list of objects or object is returned based on filename 87 | On Failure : Write an exception log and then raise an exception 88 | 89 | Version : 1.2 90 | Revisions : moved setup to cloud 91 | """ 92 | logging.info("Entered the get_file_object method of S3Operations class") 93 | 94 | try: 95 | bucket = self.get_bucket(bucket_name) 96 | 97 | file_objects = [file_object for file_object in bucket.objects.filter(Prefix=filename)] 98 | 99 | func = lambda x: x[0] if len(x) == 1 else x 100 | 101 | file_objs = func(file_objects) 102 | logging.info("Exited the get_file_object method of S3Operations class") 103 | 104 | return file_objs 105 | 106 | except Exception as e: 107 | raise USvisaException(e, sys) from e 108 | 109 | def load_model(self, model_name: str, bucket_name: str, model_dir: str = None) -> object: 110 | """ 111 | Method Name : load_model 112 | Description : This method loads the model_name model from bucket_name bucket with kwargs 113 | 114 | Output : list of objects or object is returned based on filename 115 | On Failure : Write an exception log and then raise an exception 116 | 117 | Version : 1.2 118 | Revisions : moved setup to cloud 119 | """ 120 | logging.info("Entered the load_model method of S3Operations class") 121 | 122 | try: 123 | func = ( 124 | lambda: model_name 125 | if model_dir is None 126 | else model_dir + "/" + model_name 127 | ) 128 | model_file = func() 129 | file_object = self.get_file_object(model_file, bucket_name) 130 | model_obj = self.read_object(file_object, decode=False) 131 | model = pickle.loads(model_obj) 132 | logging.info("Exited the load_model method of S3Operations class") 133 | return model 134 | 135 | except Exception as e: 136 | raise USvisaException(e, sys) from e 137 | 138 | def create_folder(self, folder_name: str, bucket_name: str) -> None: 139 | """ 140 | Method Name : create_folder 141 | Description : This method creates a folder_name folder in bucket_name bucket 142 | 143 | Output : Folder is created in s3 bucket 144 | On Failure : Write an exception log and then raise an exception 145 | 146 | Version : 1.2 147 | Revisions : moved setup to cloud 148 | """ 149 | logging.info("Entered the create_folder method of S3Operations class") 150 | 151 | try: 152 | self.s3_resource.Object(bucket_name, folder_name).load() 153 | 154 | except ClientError as e: 155 | if e.response["Error"]["Code"] == "404": 156 | folder_obj = folder_name + "/" 157 | self.s3_client.put_object(Bucket=bucket_name, Key=folder_obj) 158 | else: 159 | pass 160 | logging.info("Exited the create_folder method of S3Operations class") 161 | 162 | def upload_file(self, from_filename: str, to_filename: str, bucket_name: str, remove: bool = True): 163 | """ 164 | Method Name : upload_file 165 | Description : This method uploads the from_filename file to bucket_name bucket with to_filename as bucket filename 166 | 167 | Output : Folder is created in s3 bucket 168 | On Failure : Write an exception log and then raise an exception 169 | 170 | Version : 1.2 171 | Revisions : moved setup to cloud 172 | """ 173 | logging.info("Entered the upload_file method of S3Operations class") 174 | 175 | try: 176 | logging.info( 177 | f"Uploading {from_filename} file to {to_filename} file in {bucket_name} bucket" 178 | ) 179 | 180 | self.s3_resource.meta.client.upload_file( 181 | from_filename, bucket_name, to_filename 182 | ) 183 | 184 | logging.info( 185 | f"Uploaded {from_filename} file to {to_filename} file in {bucket_name} bucket" 186 | ) 187 | 188 | if remove is True: 189 | os.remove(from_filename) 190 | 191 | logging.info(f"Remove is set to {remove}, deleted the file") 192 | 193 | else: 194 | logging.info(f"Remove is set to {remove}, not deleted the file") 195 | 196 | logging.info("Exited the upload_file method of S3Operations class") 197 | 198 | except Exception as e: 199 | raise USvisaException(e, sys) from e 200 | 201 | def upload_df_as_csv(self,data_frame: DataFrame,local_filename: str, bucket_filename: str,bucket_name: str,) -> None: 202 | """ 203 | Method Name : upload_df_as_csv 204 | Description : This method uploads the dataframe to bucket_filename csv file in bucket_name bucket 205 | 206 | Output : Folder is created in s3 bucket 207 | On Failure : Write an exception log and then raise an exception 208 | 209 | Version : 1.2 210 | Revisions : moved setup to cloud 211 | """ 212 | logging.info("Entered the upload_df_as_csv method of S3Operations class") 213 | 214 | try: 215 | data_frame.to_csv(local_filename, index=None, header=True) 216 | 217 | self.upload_file(local_filename, bucket_filename, bucket_name) 218 | 219 | logging.info("Exited the upload_df_as_csv method of S3Operations class") 220 | 221 | except Exception as e: 222 | raise USvisaException(e, sys) from e 223 | 224 | def get_df_from_object(self, object_: object) -> DataFrame: 225 | """ 226 | Method Name : get_df_from_object 227 | Description : This method gets the dataframe from the object_name object 228 | 229 | Output : Folder is created in s3 bucket 230 | On Failure : Write an exception log and then raise an exception 231 | 232 | Version : 1.2 233 | Revisions : moved setup to cloud 234 | """ 235 | logging.info("Entered the get_df_from_object method of S3Operations class") 236 | 237 | try: 238 | content = self.read_object(object_, make_readable=True) 239 | df = read_csv(content, na_values="na") 240 | logging.info("Exited the get_df_from_object method of S3Operations class") 241 | return df 242 | except Exception as e: 243 | raise USvisaException(e, sys) from e 244 | 245 | def read_csv(self, filename: str, bucket_name: str) -> DataFrame: 246 | """ 247 | Method Name : get_df_from_object 248 | Description : This method gets the dataframe from the object_name object 249 | 250 | Output : Folder is created in s3 bucket 251 | On Failure : Write an exception log and then raise an exception 252 | 253 | Version : 1.2 254 | Revisions : moved setup to cloud 255 | """ 256 | logging.info("Entered the read_csv method of S3Operations class") 257 | 258 | try: 259 | csv_obj = self.get_file_object(filename, bucket_name) 260 | df = self.get_df_from_object(csv_obj) 261 | logging.info("Exited the read_csv method of S3Operations class") 262 | return df 263 | except Exception as e: 264 | raise USvisaException(e, sys) from e --------------------------------------------------------------------------------