├── packages ├── car_evaluation_model │ ├── tests │ │ └── __init__.py │ ├── car_evaluation_model │ │ ├── VERSION │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── raw │ │ │ │ └── __init__.py │ │ │ └── interim │ │ │ │ └── __init__.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ └── core.py │ │ ├── models │ │ │ └── __init__.py │ │ ├── processing │ │ │ ├── __init__.py │ │ │ ├── features.py │ │ │ ├── validation.py │ │ │ └── data_manager.py │ │ ├── __init__.py │ │ ├── train_pipeline.py │ │ ├── predict.py │ │ ├── config.yml │ │ └── pipeline.py │ ├── pyproject.toml │ ├── requirements │ │ └── requirements.txt │ ├── mypy.ini │ ├── MANIFEST.in │ ├── tox.ini │ ├── LICENSE │ └── setup.py ├── car_evaluation_api │ ├── app │ │ ├── __init__.py │ │ ├── schemas │ │ │ ├── __init__.py │ │ │ ├── health.py │ │ │ └── predict.py │ │ ├── main.py │ │ ├── api.py │ │ └── config.py │ ├── run.sh │ ├── Procfile │ ├── mypy.ini │ ├── .dockerignore │ ├── requirements.txt │ ├── tox.ini │ └── Dockerfile ├── car_evaluation_streamlit │ ├── requirements.txt │ ├── .dockerignore │ ├── Dockerfile │ └── app.py └── docker-compose.yml ├── README.md └── .gitignore /packages/car_evaluation_model/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/VERSION: -------------------------------------------------------------------------------- 1 | 1.0.0 -------------------------------------------------------------------------------- /packages/car_evaluation_api/app/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.0.0" -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/data/raw/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/processing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/data/interim/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/car_evaluation_streamlit/requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit>=1.12.0, <1.13.0 -------------------------------------------------------------------------------- /packages/car_evaluation_api/run.sh: -------------------------------------------------------------------------------- 1 | uvicorn app.main:app --host 0.0.0.0 --port 8001 2 | -------------------------------------------------------------------------------- /packages/car_evaluation_api/Procfile: -------------------------------------------------------------------------------- 1 | web: uvicorn app.main:app --host 0.0.0.0 --port $PORT 2 | -------------------------------------------------------------------------------- /packages/car_evaluation_api/mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | plugins = pydantic.mypy 3 | ignore_missing_imports = True 4 | disallow_untyped_defs = True -------------------------------------------------------------------------------- /packages/car_evaluation_api/app/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | from .health import Health 2 | from .predict import MultipleCarTransactionInputData, PredictionResults -------------------------------------------------------------------------------- /packages/car_evaluation_model/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=63.0", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /packages/car_evaluation_api/.dockerignore: -------------------------------------------------------------------------------- 1 | jupyter_notebooks* 2 | */env* 3 | */venv* 4 | venv 5 | env 6 | .circleci* 7 | *.env 8 | *.log 9 | .git 10 | .gitignore 11 | .tox -------------------------------------------------------------------------------- /packages/car_evaluation_api/app/schemas/health.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | class Health(BaseModel): 4 | name: str 5 | api_version: str 6 | model_version: str -------------------------------------------------------------------------------- /packages/car_evaluation_streamlit/.dockerignore: -------------------------------------------------------------------------------- 1 | jupyter_notebooks* 2 | */env* 3 | */venv* 4 | venv 5 | env 6 | .circleci* 7 | *.env 8 | *.log 9 | .git 10 | .gitignore 11 | .tox -------------------------------------------------------------------------------- /packages/car_evaluation_api/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url="https://repo.fury.io/kurtispykes/" 2 | car-evaluation-model==1.0.0 3 | 4 | uvicorn>=0.18.2, <0.19.0 5 | fastapi>=0.79.0, <1.0.0 6 | python-multipart>=0.0.5, <0.1.0 7 | pydantic>=1.9.1, <1.10.0 8 | typing_extensions>=3.10.0, <3.11.0 9 | loguru>=0.6.0, <0.7.0 -------------------------------------------------------------------------------- /packages/car_evaluation_streamlit/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.4 2 | 3 | WORKDIR /opt/car_evaluation_streamlit 4 | 5 | ADD ./car_evaluation_streamlit /opt/car_evaluation_streamlit 6 | RUN pip install --upgrade pip 7 | RUN pip install -r /opt/car_evaluation_streamlit/requirements.txt 8 | 9 | EXPOSE 8501 10 | 11 | CMD ["streamlit", "run", "app.py"] -------------------------------------------------------------------------------- /packages/car_evaluation_model/requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | # ML requirements 2 | pandas>=1.4.3, <1.5.0 3 | numpy>=1.23.1, <1.24.0 4 | scikit-learn>=1.1.1, <1.2.0 5 | joblib>=1.1.0, <1.2.0 6 | 7 | # Config & parsing 8 | pydantic>=1.9.1, <1.10.1 9 | strictyaml>=1.5.0, <1.6.0 10 | 11 | # packaging 12 | setuptools>=63.2.0,<63.3.0 13 | wheel>=0.37.1, <0.38.0 -------------------------------------------------------------------------------- /packages/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | car_evaluation_streamlit: 5 | build: 6 | dockerfile: car_evaluation_streamlit\Dockerfile 7 | ports: 8 | - 8501:8501 9 | depends_on: 10 | - car_evaluation_api 11 | 12 | car_evaluation_api: 13 | build: 14 | dockerfile: car_evaluation_api\Dockerfile 15 | ports: 16 | - 8001:8001 17 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | # warn_unreachable = True 3 | warn_unused_ignores = True 4 | follow_imports = skip 5 | show_error_context = True 6 | warn_incomplete_stub = True 7 | ignore_missing_imports = True 8 | check_untyped_defs = True 9 | cache_dir = /dev/null 10 | # Cannot enable this one as we still allow defining functions without any types. 11 | # disallow_untyped_defs = True 12 | warn_redundant_casts = True 13 | warn_unused_configs = True 14 | strict_optional = True -------------------------------------------------------------------------------- /packages/car_evaluation_api/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = test_app 3 | skipsdist = True 4 | 5 | [testenv] 6 | install_command = pip install {opts} {packages} 7 | 8 | passenv = 9 | PIP_EXTRA_INDEX_URL 10 | 11 | [testenv:test_app] 12 | deps = 13 | -rrequirements.txt 14 | 15 | setenv = 16 | PYTHONPATH=. 17 | PYTHONHASHSEED=0 18 | 19 | [testenv:run] 20 | envdir = {toxworkdir}/test_app 21 | deps = 22 | {[testenv:test_app]deps} 23 | 24 | setenv = 25 | {[testenv:test_app]setenv} 26 | 27 | commands= 28 | python app/main.py -------------------------------------------------------------------------------- /packages/car_evaluation_model/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | include *.md 3 | include *.pkl 4 | recursive-include ./fraud_detection_model/* 5 | 6 | include car_evaluation_model/data/interim/*.csv 7 | include car_evaluation_model/models/*.pkl 8 | include car_evaluation_model/VERSION 9 | include car_evaluation_model/config.yml 10 | 11 | include ./requirements/requirements.txt 12 | include ./requirements/test_requirements.txt 13 | exclude *.log 14 | exclude *.cfg 15 | 16 | recursive-exclude * __pycache__ 17 | recursive-exclude * *.py[co] -------------------------------------------------------------------------------- /packages/car_evaluation_model/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = test_package 3 | skipsdist = True 4 | 5 | [testenv] 6 | install_command = pip install {opts} {packages} 7 | 8 | [testenv:test_package] 9 | deps = 10 | -rrequirements/test_requirements.txt 11 | 12 | setenv = 13 | PYTHONPATH=. 14 | PYTHONHASHSEED=0 15 | 16 | commands= 17 | python car_evaluation_model/train_pipeline.py 18 | 19 | [testenv:train] 20 | envdir = {toxworkdir}/test_package 21 | deps = 22 | {[testenv:test_package]deps} 23 | 24 | setenv = 25 | {[testenv:test_package]setenv} 26 | 27 | commands= 28 | python car_evaluation_model/train_pipeline.py 29 | -------------------------------------------------------------------------------- /packages/car_evaluation_api/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.4 2 | 3 | # Create the user that will run the app 4 | RUN adduser --disabled-password --gecos '' ml-api-user 5 | 6 | WORKDIR /opt/car_evaluation_api 7 | 8 | ARG PIP_EXTRA_INDEX_URL 9 | 10 | # Install requirements, including from Gemfury 11 | ADD ./car_evaluation_api /opt/car_evaluation_api 12 | RUN pip install --upgrade pip 13 | RUN pip install -r /opt/car_evaluation_api/requirements.txt 14 | 15 | RUN chmod +x /opt/car_evaluation_api/run.sh 16 | RUN chown -R ml-api-user:ml-api-user ./ 17 | 18 | USER ml-api-user 19 | 20 | EXPOSE 8001 21 | 22 | CMD ["bash", "./run.sh"] 23 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/processing/features.py: -------------------------------------------------------------------------------- 1 | 2 | def convert_mapping(X, columns, values_to_replace): 3 | """ 4 | Convert the mapping keys to values. 5 | """ 6 | if isinstance(columns, list): 7 | for col in columns: 8 | X[col] = X[col].map(values_to_replace) 9 | else: 10 | X[columns] = X[columns].map(values_to_replace) 11 | 12 | return X 13 | 14 | def revert_mapping(X, columns, values_to_replace): 15 | """ 16 | Convert the mapping values to keys 17 | """ 18 | reverse_dict = {v:k for k, v in values_to_replace.items()} 19 | 20 | if isinstance(columns, list): 21 | for col in columns: 22 | X[col] = X[col].map(reverse_dict) 23 | else: 24 | X[columns] = X[columns].map(reverse_dict) 25 | 26 | return X -------------------------------------------------------------------------------- /packages/car_evaluation_api/app/schemas/predict.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Optional 2 | 3 | from car_evaluation_model.processing import validation as v 4 | from pydantic import BaseModel 5 | 6 | 7 | class PredictionResults(BaseModel): 8 | errors: Optional[Any] 9 | version: str 10 | predictions: Optional[List[int]] 11 | 12 | 13 | class MultipleCarTransactionInputData(BaseModel): 14 | inputs: List[v.CarTransactionInputData] 15 | 16 | class Config: 17 | schema_extra = { 18 | "example": { 19 | "inputs": [ 20 | { 21 | "buying": "vhigh", 22 | "maint": "med", 23 | "doors": 4, 24 | "persons": "more", 25 | "lug_boot": "med", 26 | "safety": "high" 27 | } 28 | ] 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from car_evaluation_model.config.core import PACKAGE_ROOT, config 4 | 5 | # It is strongly advised that you do not add any handlers other than 6 | # NullHandler to your library’s loggers. This is because the configuration 7 | # of handlers is the prerogative of the application developer who uses your 8 | # library. The application developer knows their target audience and what 9 | # handlers are most appropriate for their application: if you add handlers 10 | # ‘under the hood’, you might well interfere with their ability to carry out 11 | # unit tests and deliver logs which suit their requirements. 12 | # https://docs.python.org/3/howto/logging.html#configuring-logging-for-a-library 13 | logging.getLogger(config.app_config.package_name).addHandler(logging.NullHandler()) 14 | 15 | 16 | with open(PACKAGE_ROOT / "VERSION") as version_file: 17 | __version__ = version_file.read().strip() -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/train_pipeline.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | 4 | from config.core import RAW_DATA_DIR, config 5 | from pipeline import car_evaluation_pipe 6 | from processing import data_manager as dm 7 | 8 | from car_evaluation_model import __version__ as _version 9 | 10 | _logger = logging.getLogger(__name__) 11 | 12 | def run_training(): 13 | """Train the model""" 14 | 15 | # Read the training data 16 | dataset = dm.load_dataset( 17 | path_to_data= Path(f"{RAW_DATA_DIR}/{config.app_config.data}"), 18 | names=True 19 | ) 20 | 21 | # Create train and test sets 22 | X_train, _, y_train, _ = dm.create_train_and_test(data=dataset) 23 | 24 | # Train the pipeline 25 | car_evaluation_pipe.fit(X_train, y_train) 26 | 27 | # Persist the trained model 28 | _logger.warning(f"saving model version: {_version}") 29 | dm.save_pipeline(pipeline_to_persist=car_evaluation_pipe) 30 | 31 | if __name__ == "__main__": 32 | run_training() 33 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/processing/validation.py: -------------------------------------------------------------------------------- 1 | import typing as t 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pydantic import BaseModel, ValidationError 6 | 7 | from car_evaluation_model.config.core import config 8 | 9 | 10 | def validate_inputs(*, inputs: pd.DataFrame): 11 | """Check model inputs for unprocessable values.""" 12 | # replace numpy nans so that Marshmallow can validate 13 | data_ = inputs.replace({np.nan: None}).to_dict(orient="records") 14 | errors = None 15 | 16 | try: 17 | MultipleCarTransactionInputData(inputs=data_) 18 | except ValidationError as exc: 19 | errors = exc.json() 20 | 21 | return data_, errors 22 | 23 | 24 | class CarTransactionInputData(BaseModel): 25 | buying: t.Optional[str] 26 | maint: t.Optional[str] 27 | doors: t.Optional[str] 28 | persons: t.Optional[str] 29 | lug_boot: t.Optional[str] 30 | safety: t.Optional[str] 31 | 32 | class MultipleCarTransactionInputData(BaseModel): 33 | inputs: t.List[CarTransactionInputData] -------------------------------------------------------------------------------- /packages/car_evaluation_model/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Kurtis Pykes 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/predict.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | 4 | import pandas as pd 5 | 6 | from car_evaluation_model import __version__ as _version 7 | from car_evaluation_model.config.core import config 8 | from car_evaluation_model.processing import data_manager as dm 9 | from car_evaluation_model.processing.validation import validate_inputs 10 | 11 | _logger = logging.getLogger(__name__) 12 | 13 | pipeline_file_name = f"{config.app_config.pipeline_save_file}{_version}.pkl" 14 | _car_evaluation_pipe = dm.load_pipeline(file_name=pipeline_file_name) 15 | 16 | 17 | def make_prediction(*, inputs): 18 | """Make a prediction using a saved model pipeline.""" 19 | 20 | input_df = pd.DataFrame(inputs) 21 | 22 | validated_data, errors = validate_inputs(inputs=input_df) 23 | results = {"predictions": None, "version": _version, "errors": errors} 24 | 25 | if not errors: 26 | predictions = _car_evaluation_pipe.predict( 27 | X=pd.DataFrame(validated_data) 28 | ) 29 | _logger.info( 30 | f"Making predictions with model version: {_version} " 31 | f"Predictions: {predictions}" 32 | ) 33 | results = { 34 | "predictions": predictions.tolist(), 35 | "version": _version, 36 | "errors": errors, 37 | } 38 | 39 | return results -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/config.yml: -------------------------------------------------------------------------------- 1 | # Package name 2 | package_name: car_evaluation_model 3 | 4 | # Pipeline name 5 | pipeline_name: car_evaluation_model 6 | pipeline_save_file: car_evaluation_model_output_v 7 | 8 | # Data files 9 | data: car.data 10 | train_data: train_data.csv 11 | test_data: test_data.csv 12 | 13 | # Target label 14 | target: class 15 | 16 | column_names: 17 | - buying 18 | - maint 19 | - doors 20 | - persons 21 | - lug_boot 22 | - safety 23 | - class 24 | 25 | # Model features 26 | features: 27 | - buying 28 | - maint 29 | - doors 30 | - persons 31 | - lug_boot 32 | - safety 33 | 34 | buying: buying 35 | maint: maint 36 | doors: doors 37 | persons: persons 38 | lug_boot: lug_boot 39 | safety: safety 40 | 41 | buying_and_maint: 42 | - buying 43 | - maint 44 | 45 | buying_and_maint_mappings: 46 | vhigh: 3 47 | high: 2 48 | med: 1 49 | low: 0 50 | 51 | doors_mappings: 52 | 2: 0 53 | 3: 1 54 | 4: 2 55 | 5more: 3 56 | 57 | persons_mappings: 58 | 2: 0 59 | 4: 1 60 | more: 2 61 | 62 | lug_boot_mappings: 63 | small: 0 64 | med: 1 65 | big: 2 66 | 67 | safety_mappings: 68 | low: 0 69 | med: 1 70 | high: 2 71 | 72 | class_mappings: 73 | unacc: 0 74 | acc: 1 75 | good: 2 76 | vgood: 3 77 | 78 | # Model configurations 79 | random_state: 26 80 | 81 | # Data splits 82 | test_size: 0.25 -------------------------------------------------------------------------------- /packages/car_evaluation_api/app/main.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from fastapi import APIRouter, FastAPI, Request 4 | from fastapi.middleware.cors import CORSMiddleware 5 | from fastapi.responses import HTMLResponse 6 | from loguru import logger 7 | 8 | from app.api import api_router 9 | from app.config import settings, setup_app_logging 10 | 11 | # setup logging as early as possible 12 | setup_app_logging(config=settings) 13 | 14 | 15 | app = FastAPI( 16 | title=settings.PROJECT_NAME, openapi_url=f"{settings.API_V1_STR}/openapi.json" 17 | ) 18 | 19 | root_router = APIRouter() 20 | 21 | 22 | @root_router.get("/") 23 | def index(request: Request) -> Any: 24 | """Basic HTML response.""" 25 | body = ( 26 | "" 27 | "" 28 | "

Welcome to the API

" 29 | "
" 30 | "Check the docs: here" 31 | "
" 32 | "" 33 | "" 34 | ) 35 | 36 | return HTMLResponse(content=body) 37 | 38 | 39 | app.include_router(api_router, prefix=settings.API_V1_STR) 40 | app.include_router(root_router) 41 | 42 | # Set all CORS enabled origins 43 | if settings.BACKEND_CORS_ORIGINS: 44 | app.add_middleware( 45 | CORSMiddleware, 46 | allow_origins=[str(origin) for origin in settings.BACKEND_CORS_ORIGINS], 47 | allow_credentials=True, 48 | allow_methods=["*"], 49 | allow_headers=["*"], 50 | ) 51 | 52 | 53 | if __name__ == "__main__": 54 | # Use this for debugging purposes only 55 | logger.warning("Running in development mode. Do not run like this in production.") 56 | import uvicorn 57 | 58 | uvicorn.run(app, host="localhost", port=8001, log_level="debug") -------------------------------------------------------------------------------- /packages/car_evaluation_api/app/api.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Any 3 | 4 | import numpy as np 5 | import pandas as pd 6 | from fastapi import APIRouter, HTTPException 7 | from fastapi.encoders import jsonable_encoder 8 | from car_evaluation_model import __version__ as model_version 9 | from car_evaluation_model.predict import make_prediction 10 | from loguru import logger 11 | 12 | from app import __version__, schemas 13 | from app.config import settings 14 | 15 | api_router = APIRouter() 16 | 17 | 18 | @api_router.get("/health", response_model=schemas.Health, status_code=200) 19 | def health() -> dict: 20 | """ 21 | Root Get 22 | """ 23 | health = schemas.Health( 24 | name=settings.PROJECT_NAME, api_version=__version__, model_version=model_version 25 | ) 26 | 27 | return health.dict() 28 | 29 | 30 | @api_router.post("/predict", response_model=schemas.PredictionResults, status_code=200) 31 | async def predict(input_data: schemas.MultipleCarTransactionInputData) -> Any: 32 | """ 33 | Make predictions with the Fraud detection model 34 | """ 35 | 36 | input_df = pd.DataFrame(jsonable_encoder(input_data.inputs)) 37 | 38 | # Advanced: You can improve performance of your API by rewriting the 39 | # `make prediction` function to be async and using await here. 40 | logger.info(f"Making prediction on inputs: {input_data.inputs}") 41 | results = make_prediction(inputs=input_df.replace({np.nan: None})) 42 | 43 | if results["errors"] is not None: 44 | logger.warning(f"Prediction validation error: {results.get('errors')}") 45 | raise HTTPException(status_code=400, detail=json.loads(results["errors"])) 46 | 47 | logger.info(f"Prediction results: {results.get('predictions')}") 48 | 49 | return results -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Car Evaluation 2 | The purpose behind this project was to demonstrate how to build an instant machine learning application with Streamlit - this is great for rapid prototyping. 3 | To achieve this I created a simple classification model on the [Car Evaluation Dataset](https://archive.ics.uci.edu/ml/datasets/Car+Evaluation) from the 4 | [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/index.php). By following along with the articles below, you will learn how to: create a machine 5 | learning microservice, create a front end for your machine learning model, and how to wire the two applications together using [Docker](https://www.docker.com/) 6 | and [Docker-compose](https://docs.docker.com/compose/). The GIF below is a demonstration of how the application works. 7 | 8 | ![Example of Machine Learning Web application(2)](https://user-images.githubusercontent.com/43003716/190868371-fb1f5d3f-f74b-4506-9409-0c2fbb1b505e.gif) 9 | 10 | ## Installation & Usage 11 | These instructions assume that you already have Docker and Docker-compose installed on your machine - if not, please follow the instructions 12 | [here](https://docs.docker.com/compose/install/). 13 | - Clone this repository to your computer 14 | - Navigate to the root of the project: `cd car-evaluation-project` 15 | - Build the docker images using `docker-compose up -d --build` 16 | - This may take a minute 17 | - Open your browser and navigate to http://localhost:8501 to use the application. 18 | 19 | ## Extending this project 20 | - Conduct analysis of the data to build a better classification model 21 | - Set up monitoring for the machine learning model 22 | - Deploy on the cloud 23 | 24 | ## Articles About this Project 25 | - [How to Build a Machine Learning Microservice with FastAPI](https://developer.nvidia.com/blog/building-a-machine-learning-microservice-with-fastapi/) 26 | - [How to Build an Instant Machine Learning Web Application with Streamlit and FastAPI](https://developer.nvidia.com/blog/how-to-build-an-instant-machine-learning-web-application-with-streamlit-and-fastapi/) 27 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/pipeline.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from multiprocessing.connection import Pipe 3 | 4 | from sklearn.pipeline import Pipeline 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.preprocessing import FunctionTransformer 7 | 8 | from car_evaluation_model.config.core import config 9 | from car_evaluation_model.processing import features as f 10 | 11 | _logger = logging.getLogger(__name__) 12 | 13 | # Define pipeline 14 | car_evaluation_pipe = Pipeline( 15 | [ 16 | ( 17 | "get_buying_and_maint_mappings", 18 | FunctionTransformer(func=f.convert_mapping, 19 | kw_args = {"columns": config.model_config.buying_and_maint, 20 | "values_to_replace": config.model_config.buying_and_maint_mappings} 21 | ) 22 | ), 23 | ( 24 | "get_doors_mappings", 25 | FunctionTransformer(func=f.convert_mapping, 26 | kw_args = {"columns": config.model_config.doors, 27 | "values_to_replace": config.model_config.doors_mappings} 28 | ) 29 | ), 30 | ( 31 | "get_persons_mappings", 32 | FunctionTransformer(func=f.convert_mapping, 33 | kw_args = {"columns": config.model_config.persons, 34 | "values_to_replace": config.model_config.persons_mappings} 35 | ) 36 | ), 37 | ( 38 | "get_lug_boot_mappings", 39 | FunctionTransformer(func=f.convert_mapping, 40 | kw_args = {"columns": config.model_config.lug_boot, 41 | "values_to_replace": config.model_config.lug_boot_mappings} 42 | ) 43 | ), 44 | ( 45 | "get_safety_mappings", 46 | FunctionTransformer(func=f.convert_mapping, 47 | kw_args = {"columns": config.model_config.safety, 48 | "values_to_replace": config.model_config.safety_mappings} 49 | ) 50 | ), 51 | ( 52 | "logistic_regression", 53 | LogisticRegression(random_state=config.model_config.random_state) 54 | ) 55 | ] 56 | ) -------------------------------------------------------------------------------- /packages/car_evaluation_streamlit/app.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | import streamlit as st 4 | 5 | # Define the title 6 | st.title("Car evaluation web application") 7 | st.write( 8 | "The model evaluates a cars acceptability based on the inputs below.\ 9 | Pass the appropiate details about your car using the questions below to discover if your car is acceptable." 10 | ) 11 | 12 | # Input 1 13 | buying = st.radio( 14 | "What are your thought's on the cars buying price?", 15 | ("vhigh", "high", "med", "low") 16 | ) 17 | 18 | # Input 2 19 | maint = st.radio( 20 | "What are your thoughts on the price of maintanence for the car?", 21 | ("vhigh", "high", "med", "low") 22 | ) 23 | 24 | # Input 3 25 | doors = st.select_slider( 26 | "How many doors does the car have?", 27 | options=["2", "3", "4", "5more"] 28 | ) 29 | 30 | # Input 4 31 | persons = st.select_slider( 32 | "How many passengers can the car carry?", 33 | options=["2", "4", "more"] 34 | ) 35 | 36 | # Input 5 37 | lug_boot = st.select_slider( 38 | "What is the size of the luggage boot?", 39 | options=["small", "med", "big"] 40 | ) 41 | 42 | # Input 6 43 | safety = st.select_slider( 44 | "What estimated level of safety does the car provide?", 45 | options=["low", "med", "high"] 46 | ) 47 | 48 | # Class values to be returned by the model 49 | class_values = { 50 | 0: "unacceptable", 51 | 1: "acceptable", 52 | 2: "good", 53 | 3: "very good" 54 | } 55 | 56 | # When 'Submit' is selected 57 | if st.button("Submit"): 58 | 59 | # Inputs to ML model 60 | inputs = { 61 | "inputs": [ 62 | { 63 | "buying": buying, 64 | "maint": maint, 65 | "doors": doors, 66 | "persons": persons, 67 | "lug_boot": lug_boot, 68 | "safety": safety 69 | } 70 | ] 71 | } 72 | 73 | # Posting inputs to ML API 74 | response = requests.post(f"http://host.docker.internal:8001/api/v1/predict/", json=inputs, verify=False) 75 | json_response = response.json() 76 | 77 | prediction = class_values[json_response.get("predictions")[0]] 78 | 79 | st.subheader(f"This car is **{prediction}!**") 80 | 81 | 82 | -------------------------------------------------------------------------------- /packages/car_evaluation_model/setup.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from setuptools import find_packages, setup 4 | 5 | # Package meta-data. 6 | NAME = 'car_evaluation_model' 7 | DESCRIPTION = "Classification model to predict quality of a car." 8 | URL = "https://github.com/kurtispykes/car-evaluation-project/tree/Main/packages/car_evaluation_model" 9 | EMAIL = "kurtispykes@gmail.com" 10 | AUTHOR = "Kurtis Pykes" 11 | REQUIRES_PYTHON = ">=3.8.0" 12 | 13 | long_description = DESCRIPTION 14 | 15 | # Load the package's VERSION file as a dictionary. 16 | about = {} 17 | ROOT_DIR = Path(__file__).resolve().parent 18 | REQUIREMENTS_DIR = ROOT_DIR / 'requirements' 19 | PACKAGE_DIR = ROOT_DIR / 'car_evaluation_model' 20 | with open(PACKAGE_DIR / "VERSION") as f: 21 | _version = f.read().strip() 22 | about["__version__"] = _version 23 | 24 | 25 | # What packages are required for this module to be executed? 26 | def list_reqs(fname="requirements.txt"): 27 | with open(REQUIREMENTS_DIR / fname) as fd: 28 | return fd.read().splitlines() 29 | 30 | # Where the magic happens: 31 | setup( 32 | name=NAME, 33 | version=about["__version__"], 34 | description=DESCRIPTION, 35 | long_description=long_description, 36 | long_description_content_type="text/markdown", 37 | author=AUTHOR, 38 | author_email=EMAIL, 39 | python_requires=REQUIRES_PYTHON, 40 | url=URL, 41 | packages=find_packages(exclude=("tests",)), 42 | package_data={"car_evaluation_model": ["VERSION"]}, 43 | install_requires=list_reqs(), 44 | extras_require={}, 45 | include_package_data=True, 46 | license="BSD-3", 47 | classifiers=[ 48 | # Trove classifiers 49 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 50 | "License :: OSI Approved :: MIT License", 51 | "Programming Language :: Python", 52 | "Programming Language :: Python :: 3", 53 | "Programming Language :: Python :: 3.6", 54 | "Programming Language :: Python :: 3.7", 55 | "Programming Language :: Python :: 3.8", 56 | "Programming Language :: Python :: 3.9", 57 | "Programming Language :: Python :: Implementation :: CPython", 58 | "Programming Language :: Python :: Implementation :: PyPy", 59 | ], 60 | ) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | packages/car_evaluation_streamlit/venv 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | # pycharm 108 | .idea/ 109 | 110 | # trained models 111 | *.pkl 112 | packages/car_evaluation_model/car_evaluation_model/models/*.h5 113 | packages/car_evaluation_model/car_evaluation_model/models/*.pkl 114 | 115 | # datafiles 116 | *.csv 117 | data/*.csv 118 | packages/car_evaluation_model/car_evaluation_model/data/interim/*.csv 119 | packages/car_evaluation_model/car_evaluation_model/data/raw/*.csv 120 | packages/car_evaluation_model/car_evaluation_model/data/raw/*.data -------------------------------------------------------------------------------- /packages/car_evaluation_api/app/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from types import FrameType 4 | from typing import List, cast 5 | 6 | from loguru import logger 7 | from pydantic import AnyHttpUrl, BaseSettings 8 | 9 | 10 | class LoggingSettings(BaseSettings): 11 | LOGGING_LEVEL: int = logging.INFO # logging levels are type int 12 | 13 | 14 | class Settings(BaseSettings): 15 | API_V1_STR: str = "/api/v1" 16 | 17 | # Meta 18 | logging: LoggingSettings = LoggingSettings() 19 | 20 | # BACKEND_CORS_ORIGINS is a comma-separated list of origins 21 | # e.g: http://localhost,http://localhost:4200,http://localhost:3000 22 | BACKEND_CORS_ORIGINS: List[AnyHttpUrl] = [ 23 | "http://localhost:3000", 24 | "http://localhost:8000", 25 | "https://localhost:3000", 26 | "https://localhost:8000", 27 | ] 28 | 29 | PROJECT_NAME: str = "Car Evaluation API" 30 | 31 | class Config: 32 | case_sensitive = True 33 | 34 | 35 | # See: https://loguru.readthedocs.io/en/stable/overview.html#entirely-compatible-with-standard-logging # noqa 36 | class InterceptHandler(logging.Handler): 37 | def emit(self, record: logging.LogRecord) -> None: # pragma: no cover 38 | # Get corresponding Loguru level if it exists 39 | try: 40 | level = logger.level(record.levelname).name 41 | except ValueError: 42 | level = str(record.levelno) 43 | 44 | # Find caller from where originated the logged message 45 | frame, depth = logging.currentframe(), 2 46 | while frame.f_code.co_filename == logging.__file__: # noqa: WPS609 47 | frame = cast(FrameType, frame.f_back) 48 | depth += 1 49 | 50 | logger.opt(depth=depth, exception=record.exc_info).log( 51 | level, 52 | record.getMessage(), 53 | ) 54 | 55 | 56 | def setup_app_logging(config: Settings) -> None: 57 | """Prepare custom logging for our application.""" 58 | 59 | LOGGERS = ("uvicorn.asgi", "uvicorn.access") 60 | logging.getLogger().handlers = [InterceptHandler()] 61 | for logger_name in LOGGERS: 62 | logging_logger = logging.getLogger(logger_name) 63 | logging_logger.handlers = [InterceptHandler(level=config.logging.LOGGING_LEVEL)] 64 | 65 | logger.configure( 66 | handlers=[{"sink": sys.stderr, "level": config.logging.LOGGING_LEVEL}] 67 | ) 68 | 69 | 70 | settings = Settings() -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/config/core.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import strictyaml 4 | from pydantic import BaseModel 5 | 6 | import car_evaluation_model 7 | 8 | # Project directories 9 | PACKAGE_ROOT = Path(car_evaluation_model.__file__).resolve().parent 10 | ROOT = PACKAGE_ROOT.parent 11 | CONFIG_FILE_PATH = PACKAGE_ROOT / "config.yml" 12 | DATASET_DIR = PACKAGE_ROOT / "data" 13 | RAW_DATA_DIR = DATASET_DIR / "raw" 14 | INTERIM_DATA_DIR = DATASET_DIR / "interim" 15 | TRAINED_MODEL_DIR = PACKAGE_ROOT / "models" 16 | 17 | 18 | class AppConfig(BaseModel): 19 | """ 20 | Application level configuration. 21 | """ 22 | package_name: str 23 | pipeline_name: str 24 | pipeline_save_file: str 25 | data: str 26 | train_data: str 27 | test_data: str 28 | 29 | class ModelConfig(BaseModel): 30 | """ 31 | All configuration relevant to model 32 | training and feature engineering 33 | """ 34 | features: list 35 | column_names: list 36 | target: str 37 | buying: str 38 | maint: str 39 | doors: str 40 | persons: str 41 | lug_boot: str 42 | safety: str 43 | buying_and_maint: list 44 | buying_and_maint_mappings: dict 45 | doors_mappings: dict 46 | persons_mappings: dict 47 | lug_boot_mappings: dict 48 | safety_mappings: dict 49 | class_mappings: dict 50 | random_state: int 51 | test_size: float 52 | 53 | class Config(BaseModel): 54 | """Master config object""" 55 | app_config: AppConfig 56 | model_config: ModelConfig 57 | 58 | def find_config_file(): 59 | """Locate the configuration file""" 60 | if CONFIG_FILE_PATH.is_file(): 61 | return CONFIG_FILE_PATH 62 | raise Exception(f"Config not found at {CONFIG_FILE_PATH}") 63 | 64 | def fetch_config_from_yaml(cfg_path= None): 65 | "Parse YAML containing the package configuration." 66 | 67 | if not cfg_path: 68 | cfg_path = find_config_file() 69 | 70 | if cfg_path: 71 | with open(cfg_path, "r") as conf_file: 72 | parsed_config = strictyaml.load(conf_file.read()) 73 | return parsed_config 74 | raise OSError(f"Did not find confige file at path: {cfg_path}") 75 | 76 | def create_and_validate_config(parsed_config= None): 77 | """Run validation on config values.""" 78 | if parsed_config is None: 79 | parsed_config = fetch_config_from_yaml() 80 | 81 | # specify the data attribute from the strictyaml YAML type. 82 | _config = Config( 83 | app_config=AppConfig(**parsed_config.data), 84 | model_config=ModelConfig(**parsed_config.data), 85 | ) 86 | return _config 87 | 88 | 89 | config = create_and_validate_config() -------------------------------------------------------------------------------- /packages/car_evaluation_model/car_evaluation_model/processing/data_manager.py: -------------------------------------------------------------------------------- 1 | import joblib 2 | import pandas as pd 3 | from pathlib import Path 4 | from car_evaluation_model.config.core import INTERIM_DATA_DIR, TRAINED_MODEL_DIR, config 5 | from sklearn.model_selection import train_test_split 6 | 7 | from car_evaluation_model import __version__ as _version 8 | 9 | 10 | def load_dataset(path_to_data, names=None): 11 | """ 12 | Load the data into memory 13 | 14 | Parameters 15 | ---------- 16 | :param path_to_data: data file location. 17 | :param names: specify the give names of each column. 18 | :return the dataset that has been loaded into memory. 19 | """ 20 | if names: 21 | dataset = pd.read_csv( 22 | path_to_data, 23 | names = config.model_config.column_names 24 | ) 25 | else: 26 | dataset = pd.read_csv(path_to_data) 27 | 28 | return dataset 29 | 30 | def create_train_and_test(data): 31 | """ 32 | Create the training and test set. Both datasets are 33 | automatically saved as interim data. 34 | 35 | :param data: the data to split into train and test sets 36 | """ 37 | data = data.copy() 38 | 39 | # Convert labels to numeric 40 | data.loc[:, config.model_config.target] = data.loc[:, config.model_config.target].map( 41 | config.model_config.class_mappings 42 | ) 43 | 44 | # Split data into features and labels 45 | X = data[config.model_config.features] 46 | y = data[config.model_config.target] 47 | 48 | # Create train and test splits 49 | X_train, X_test, y_train, y_test = train_test_split( 50 | X, 51 | y, 52 | test_size = config.model_config.test_size, 53 | random_state = config.model_config.random_state 54 | ) 55 | 56 | # Join the labels to the features to create one dataframe 57 | train_data = pd.concat([X_train, y_train], axis=1) 58 | test_data = pd.concat([X_test, y_test], axis=1) 59 | 60 | # Save data files. 61 | train_data.to_csv( 62 | Path(f"{INTERIM_DATA_DIR}/{config.app_config.train_data}"), 63 | index=False 64 | ) 65 | test_data.to_csv( 66 | Path(f"{INTERIM_DATA_DIR}/{config.app_config.test_data}"), 67 | index=False) 68 | 69 | return X_train, X_test, y_train, y_test 70 | 71 | 72 | def save_pipeline(*, pipeline_to_persist): 73 | """ 74 | Persist the pipeline. Saves the versioned model, and overwrites 75 | any previous saved models. This ensures that when the package is 76 | published, there is only one trained model that can be 77 | called, and we know exactly how it was built. 78 | """ 79 | 80 | # Prepare versioned save file name 81 | save_file_name = f"{config.app_config.pipeline_save_file}{_version}.pkl" 82 | save_path = TRAINED_MODEL_DIR / save_file_name 83 | 84 | remove_old_pipelines(files_to_keep=[save_file_name]) 85 | joblib.dump(pipeline_to_persist, save_path) 86 | 87 | 88 | def load_pipeline(*, file_name): 89 | """Load a persisted pipeline.""" 90 | 91 | file_path = TRAINED_MODEL_DIR / file_name 92 | trained_model = joblib.load(filename=file_path) 93 | return trained_model 94 | 95 | 96 | def remove_old_pipelines(*, files_to_keep) -> None: 97 | """ 98 | Remove old model pipelines. 99 | This is to ensure there is a simple one-to-one 100 | mapping between the package version and the model 101 | version to be imported and used by other applications. 102 | """ 103 | do_not_delete = files_to_keep + ["__init__.py"] 104 | for model_file in TRAINED_MODEL_DIR.iterdir(): 105 | if model_file.name not in do_not_delete: 106 | model_file.unlink() 107 | --------------------------------------------------------------------------------