├── packages
├── car_evaluation_model
│ ├── tests
│ │ └── __init__.py
│ ├── car_evaluation_model
│ │ ├── VERSION
│ │ ├── data
│ │ │ ├── __init__.py
│ │ │ ├── raw
│ │ │ │ └── __init__.py
│ │ │ └── interim
│ │ │ │ └── __init__.py
│ │ ├── config
│ │ │ ├── __init__.py
│ │ │ └── core.py
│ │ ├── models
│ │ │ └── __init__.py
│ │ ├── processing
│ │ │ ├── __init__.py
│ │ │ ├── features.py
│ │ │ ├── validation.py
│ │ │ └── data_manager.py
│ │ ├── __init__.py
│ │ ├── train_pipeline.py
│ │ ├── predict.py
│ │ ├── config.yml
│ │ └── pipeline.py
│ ├── pyproject.toml
│ ├── requirements
│ │ └── requirements.txt
│ ├── mypy.ini
│ ├── MANIFEST.in
│ ├── tox.ini
│ ├── LICENSE
│ └── setup.py
├── car_evaluation_api
│ ├── app
│ │ ├── __init__.py
│ │ ├── schemas
│ │ │ ├── __init__.py
│ │ │ ├── health.py
│ │ │ └── predict.py
│ │ ├── main.py
│ │ ├── api.py
│ │ └── config.py
│ ├── run.sh
│ ├── Procfile
│ ├── mypy.ini
│ ├── .dockerignore
│ ├── requirements.txt
│ ├── tox.ini
│ └── Dockerfile
├── car_evaluation_streamlit
│ ├── requirements.txt
│ ├── .dockerignore
│ ├── Dockerfile
│ └── app.py
└── docker-compose.yml
├── README.md
└── .gitignore
/packages/car_evaluation_model/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/VERSION:
--------------------------------------------------------------------------------
1 | 1.0.0
--------------------------------------------------------------------------------
/packages/car_evaluation_api/app/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "1.0.0"
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/config/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/data/raw/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/processing/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/data/interim/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/packages/car_evaluation_streamlit/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit>=1.12.0, <1.13.0
--------------------------------------------------------------------------------
/packages/car_evaluation_api/run.sh:
--------------------------------------------------------------------------------
1 | uvicorn app.main:app --host 0.0.0.0 --port 8001
2 |
--------------------------------------------------------------------------------
/packages/car_evaluation_api/Procfile:
--------------------------------------------------------------------------------
1 | web: uvicorn app.main:app --host 0.0.0.0 --port $PORT
2 |
--------------------------------------------------------------------------------
/packages/car_evaluation_api/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | plugins = pydantic.mypy
3 | ignore_missing_imports = True
4 | disallow_untyped_defs = True
--------------------------------------------------------------------------------
/packages/car_evaluation_api/app/schemas/__init__.py:
--------------------------------------------------------------------------------
1 | from .health import Health
2 | from .predict import MultipleCarTransactionInputData, PredictionResults
--------------------------------------------------------------------------------
/packages/car_evaluation_model/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "setuptools>=63.0",
4 | "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"
--------------------------------------------------------------------------------
/packages/car_evaluation_api/.dockerignore:
--------------------------------------------------------------------------------
1 | jupyter_notebooks*
2 | */env*
3 | */venv*
4 | venv
5 | env
6 | .circleci*
7 | *.env
8 | *.log
9 | .git
10 | .gitignore
11 | .tox
--------------------------------------------------------------------------------
/packages/car_evaluation_api/app/schemas/health.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 |
3 | class Health(BaseModel):
4 | name: str
5 | api_version: str
6 | model_version: str
--------------------------------------------------------------------------------
/packages/car_evaluation_streamlit/.dockerignore:
--------------------------------------------------------------------------------
1 | jupyter_notebooks*
2 | */env*
3 | */venv*
4 | venv
5 | env
6 | .circleci*
7 | *.env
8 | *.log
9 | .git
10 | .gitignore
11 | .tox
--------------------------------------------------------------------------------
/packages/car_evaluation_api/requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url="https://repo.fury.io/kurtispykes/"
2 | car-evaluation-model==1.0.0
3 |
4 | uvicorn>=0.18.2, <0.19.0
5 | fastapi>=0.79.0, <1.0.0
6 | python-multipart>=0.0.5, <0.1.0
7 | pydantic>=1.9.1, <1.10.0
8 | typing_extensions>=3.10.0, <3.11.0
9 | loguru>=0.6.0, <0.7.0
--------------------------------------------------------------------------------
/packages/car_evaluation_streamlit/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9.4
2 |
3 | WORKDIR /opt/car_evaluation_streamlit
4 |
5 | ADD ./car_evaluation_streamlit /opt/car_evaluation_streamlit
6 | RUN pip install --upgrade pip
7 | RUN pip install -r /opt/car_evaluation_streamlit/requirements.txt
8 |
9 | EXPOSE 8501
10 |
11 | CMD ["streamlit", "run", "app.py"]
--------------------------------------------------------------------------------
/packages/car_evaluation_model/requirements/requirements.txt:
--------------------------------------------------------------------------------
1 | # ML requirements
2 | pandas>=1.4.3, <1.5.0
3 | numpy>=1.23.1, <1.24.0
4 | scikit-learn>=1.1.1, <1.2.0
5 | joblib>=1.1.0, <1.2.0
6 |
7 | # Config & parsing
8 | pydantic>=1.9.1, <1.10.1
9 | strictyaml>=1.5.0, <1.6.0
10 |
11 | # packaging
12 | setuptools>=63.2.0,<63.3.0
13 | wheel>=0.37.1, <0.38.0
--------------------------------------------------------------------------------
/packages/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 |
3 | services:
4 | car_evaluation_streamlit:
5 | build:
6 | dockerfile: car_evaluation_streamlit\Dockerfile
7 | ports:
8 | - 8501:8501
9 | depends_on:
10 | - car_evaluation_api
11 |
12 | car_evaluation_api:
13 | build:
14 | dockerfile: car_evaluation_api\Dockerfile
15 | ports:
16 | - 8001:8001
17 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | # warn_unreachable = True
3 | warn_unused_ignores = True
4 | follow_imports = skip
5 | show_error_context = True
6 | warn_incomplete_stub = True
7 | ignore_missing_imports = True
8 | check_untyped_defs = True
9 | cache_dir = /dev/null
10 | # Cannot enable this one as we still allow defining functions without any types.
11 | # disallow_untyped_defs = True
12 | warn_redundant_casts = True
13 | warn_unused_configs = True
14 | strict_optional = True
--------------------------------------------------------------------------------
/packages/car_evaluation_api/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = test_app
3 | skipsdist = True
4 |
5 | [testenv]
6 | install_command = pip install {opts} {packages}
7 |
8 | passenv =
9 | PIP_EXTRA_INDEX_URL
10 |
11 | [testenv:test_app]
12 | deps =
13 | -rrequirements.txt
14 |
15 | setenv =
16 | PYTHONPATH=.
17 | PYTHONHASHSEED=0
18 |
19 | [testenv:run]
20 | envdir = {toxworkdir}/test_app
21 | deps =
22 | {[testenv:test_app]deps}
23 |
24 | setenv =
25 | {[testenv:test_app]setenv}
26 |
27 | commands=
28 | python app/main.py
--------------------------------------------------------------------------------
/packages/car_evaluation_model/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | include *.md
3 | include *.pkl
4 | recursive-include ./fraud_detection_model/*
5 |
6 | include car_evaluation_model/data/interim/*.csv
7 | include car_evaluation_model/models/*.pkl
8 | include car_evaluation_model/VERSION
9 | include car_evaluation_model/config.yml
10 |
11 | include ./requirements/requirements.txt
12 | include ./requirements/test_requirements.txt
13 | exclude *.log
14 | exclude *.cfg
15 |
16 | recursive-exclude * __pycache__
17 | recursive-exclude * *.py[co]
--------------------------------------------------------------------------------
/packages/car_evaluation_model/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = test_package
3 | skipsdist = True
4 |
5 | [testenv]
6 | install_command = pip install {opts} {packages}
7 |
8 | [testenv:test_package]
9 | deps =
10 | -rrequirements/test_requirements.txt
11 |
12 | setenv =
13 | PYTHONPATH=.
14 | PYTHONHASHSEED=0
15 |
16 | commands=
17 | python car_evaluation_model/train_pipeline.py
18 |
19 | [testenv:train]
20 | envdir = {toxworkdir}/test_package
21 | deps =
22 | {[testenv:test_package]deps}
23 |
24 | setenv =
25 | {[testenv:test_package]setenv}
26 |
27 | commands=
28 | python car_evaluation_model/train_pipeline.py
29 |
--------------------------------------------------------------------------------
/packages/car_evaluation_api/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9.4
2 |
3 | # Create the user that will run the app
4 | RUN adduser --disabled-password --gecos '' ml-api-user
5 |
6 | WORKDIR /opt/car_evaluation_api
7 |
8 | ARG PIP_EXTRA_INDEX_URL
9 |
10 | # Install requirements, including from Gemfury
11 | ADD ./car_evaluation_api /opt/car_evaluation_api
12 | RUN pip install --upgrade pip
13 | RUN pip install -r /opt/car_evaluation_api/requirements.txt
14 |
15 | RUN chmod +x /opt/car_evaluation_api/run.sh
16 | RUN chown -R ml-api-user:ml-api-user ./
17 |
18 | USER ml-api-user
19 |
20 | EXPOSE 8001
21 |
22 | CMD ["bash", "./run.sh"]
23 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/processing/features.py:
--------------------------------------------------------------------------------
1 |
2 | def convert_mapping(X, columns, values_to_replace):
3 | """
4 | Convert the mapping keys to values.
5 | """
6 | if isinstance(columns, list):
7 | for col in columns:
8 | X[col] = X[col].map(values_to_replace)
9 | else:
10 | X[columns] = X[columns].map(values_to_replace)
11 |
12 | return X
13 |
14 | def revert_mapping(X, columns, values_to_replace):
15 | """
16 | Convert the mapping values to keys
17 | """
18 | reverse_dict = {v:k for k, v in values_to_replace.items()}
19 |
20 | if isinstance(columns, list):
21 | for col in columns:
22 | X[col] = X[col].map(reverse_dict)
23 | else:
24 | X[columns] = X[columns].map(reverse_dict)
25 |
26 | return X
--------------------------------------------------------------------------------
/packages/car_evaluation_api/app/schemas/predict.py:
--------------------------------------------------------------------------------
1 | from typing import Any, List, Optional
2 |
3 | from car_evaluation_model.processing import validation as v
4 | from pydantic import BaseModel
5 |
6 |
7 | class PredictionResults(BaseModel):
8 | errors: Optional[Any]
9 | version: str
10 | predictions: Optional[List[int]]
11 |
12 |
13 | class MultipleCarTransactionInputData(BaseModel):
14 | inputs: List[v.CarTransactionInputData]
15 |
16 | class Config:
17 | schema_extra = {
18 | "example": {
19 | "inputs": [
20 | {
21 | "buying": "vhigh",
22 | "maint": "med",
23 | "doors": 4,
24 | "persons": "more",
25 | "lug_boot": "med",
26 | "safety": "high"
27 | }
28 | ]
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from car_evaluation_model.config.core import PACKAGE_ROOT, config
4 |
5 | # It is strongly advised that you do not add any handlers other than
6 | # NullHandler to your library’s loggers. This is because the configuration
7 | # of handlers is the prerogative of the application developer who uses your
8 | # library. The application developer knows their target audience and what
9 | # handlers are most appropriate for their application: if you add handlers
10 | # ‘under the hood’, you might well interfere with their ability to carry out
11 | # unit tests and deliver logs which suit their requirements.
12 | # https://docs.python.org/3/howto/logging.html#configuring-logging-for-a-library
13 | logging.getLogger(config.app_config.package_name).addHandler(logging.NullHandler())
14 |
15 |
16 | with open(PACKAGE_ROOT / "VERSION") as version_file:
17 | __version__ = version_file.read().strip()
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/train_pipeline.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from pathlib import Path
3 |
4 | from config.core import RAW_DATA_DIR, config
5 | from pipeline import car_evaluation_pipe
6 | from processing import data_manager as dm
7 |
8 | from car_evaluation_model import __version__ as _version
9 |
10 | _logger = logging.getLogger(__name__)
11 |
12 | def run_training():
13 | """Train the model"""
14 |
15 | # Read the training data
16 | dataset = dm.load_dataset(
17 | path_to_data= Path(f"{RAW_DATA_DIR}/{config.app_config.data}"),
18 | names=True
19 | )
20 |
21 | # Create train and test sets
22 | X_train, _, y_train, _ = dm.create_train_and_test(data=dataset)
23 |
24 | # Train the pipeline
25 | car_evaluation_pipe.fit(X_train, y_train)
26 |
27 | # Persist the trained model
28 | _logger.warning(f"saving model version: {_version}")
29 | dm.save_pipeline(pipeline_to_persist=car_evaluation_pipe)
30 |
31 | if __name__ == "__main__":
32 | run_training()
33 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/processing/validation.py:
--------------------------------------------------------------------------------
1 | import typing as t
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from pydantic import BaseModel, ValidationError
6 |
7 | from car_evaluation_model.config.core import config
8 |
9 |
10 | def validate_inputs(*, inputs: pd.DataFrame):
11 | """Check model inputs for unprocessable values."""
12 | # replace numpy nans so that Marshmallow can validate
13 | data_ = inputs.replace({np.nan: None}).to_dict(orient="records")
14 | errors = None
15 |
16 | try:
17 | MultipleCarTransactionInputData(inputs=data_)
18 | except ValidationError as exc:
19 | errors = exc.json()
20 |
21 | return data_, errors
22 |
23 |
24 | class CarTransactionInputData(BaseModel):
25 | buying: t.Optional[str]
26 | maint: t.Optional[str]
27 | doors: t.Optional[str]
28 | persons: t.Optional[str]
29 | lug_boot: t.Optional[str]
30 | safety: t.Optional[str]
31 |
32 | class MultipleCarTransactionInputData(BaseModel):
33 | inputs: t.List[CarTransactionInputData]
--------------------------------------------------------------------------------
/packages/car_evaluation_model/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Kurtis Pykes
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/predict.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from pathlib import Path
3 |
4 | import pandas as pd
5 |
6 | from car_evaluation_model import __version__ as _version
7 | from car_evaluation_model.config.core import config
8 | from car_evaluation_model.processing import data_manager as dm
9 | from car_evaluation_model.processing.validation import validate_inputs
10 |
11 | _logger = logging.getLogger(__name__)
12 |
13 | pipeline_file_name = f"{config.app_config.pipeline_save_file}{_version}.pkl"
14 | _car_evaluation_pipe = dm.load_pipeline(file_name=pipeline_file_name)
15 |
16 |
17 | def make_prediction(*, inputs):
18 | """Make a prediction using a saved model pipeline."""
19 |
20 | input_df = pd.DataFrame(inputs)
21 |
22 | validated_data, errors = validate_inputs(inputs=input_df)
23 | results = {"predictions": None, "version": _version, "errors": errors}
24 |
25 | if not errors:
26 | predictions = _car_evaluation_pipe.predict(
27 | X=pd.DataFrame(validated_data)
28 | )
29 | _logger.info(
30 | f"Making predictions with model version: {_version} "
31 | f"Predictions: {predictions}"
32 | )
33 | results = {
34 | "predictions": predictions.tolist(),
35 | "version": _version,
36 | "errors": errors,
37 | }
38 |
39 | return results
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/config.yml:
--------------------------------------------------------------------------------
1 | # Package name
2 | package_name: car_evaluation_model
3 |
4 | # Pipeline name
5 | pipeline_name: car_evaluation_model
6 | pipeline_save_file: car_evaluation_model_output_v
7 |
8 | # Data files
9 | data: car.data
10 | train_data: train_data.csv
11 | test_data: test_data.csv
12 |
13 | # Target label
14 | target: class
15 |
16 | column_names:
17 | - buying
18 | - maint
19 | - doors
20 | - persons
21 | - lug_boot
22 | - safety
23 | - class
24 |
25 | # Model features
26 | features:
27 | - buying
28 | - maint
29 | - doors
30 | - persons
31 | - lug_boot
32 | - safety
33 |
34 | buying: buying
35 | maint: maint
36 | doors: doors
37 | persons: persons
38 | lug_boot: lug_boot
39 | safety: safety
40 |
41 | buying_and_maint:
42 | - buying
43 | - maint
44 |
45 | buying_and_maint_mappings:
46 | vhigh: 3
47 | high: 2
48 | med: 1
49 | low: 0
50 |
51 | doors_mappings:
52 | 2: 0
53 | 3: 1
54 | 4: 2
55 | 5more: 3
56 |
57 | persons_mappings:
58 | 2: 0
59 | 4: 1
60 | more: 2
61 |
62 | lug_boot_mappings:
63 | small: 0
64 | med: 1
65 | big: 2
66 |
67 | safety_mappings:
68 | low: 0
69 | med: 1
70 | high: 2
71 |
72 | class_mappings:
73 | unacc: 0
74 | acc: 1
75 | good: 2
76 | vgood: 3
77 |
78 | # Model configurations
79 | random_state: 26
80 |
81 | # Data splits
82 | test_size: 0.25
--------------------------------------------------------------------------------
/packages/car_evaluation_api/app/main.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from fastapi import APIRouter, FastAPI, Request
4 | from fastapi.middleware.cors import CORSMiddleware
5 | from fastapi.responses import HTMLResponse
6 | from loguru import logger
7 |
8 | from app.api import api_router
9 | from app.config import settings, setup_app_logging
10 |
11 | # setup logging as early as possible
12 | setup_app_logging(config=settings)
13 |
14 |
15 | app = FastAPI(
16 | title=settings.PROJECT_NAME, openapi_url=f"{settings.API_V1_STR}/openapi.json"
17 | )
18 |
19 | root_router = APIRouter()
20 |
21 |
22 | @root_router.get("/")
23 | def index(request: Request) -> Any:
24 | """Basic HTML response."""
25 | body = (
26 | ""
27 | "
"
28 | "Welcome to the API
"
29 | ""
30 | "Check the docs:
here"
31 | "
"
32 | ""
33 | ""
34 | )
35 |
36 | return HTMLResponse(content=body)
37 |
38 |
39 | app.include_router(api_router, prefix=settings.API_V1_STR)
40 | app.include_router(root_router)
41 |
42 | # Set all CORS enabled origins
43 | if settings.BACKEND_CORS_ORIGINS:
44 | app.add_middleware(
45 | CORSMiddleware,
46 | allow_origins=[str(origin) for origin in settings.BACKEND_CORS_ORIGINS],
47 | allow_credentials=True,
48 | allow_methods=["*"],
49 | allow_headers=["*"],
50 | )
51 |
52 |
53 | if __name__ == "__main__":
54 | # Use this for debugging purposes only
55 | logger.warning("Running in development mode. Do not run like this in production.")
56 | import uvicorn
57 |
58 | uvicorn.run(app, host="localhost", port=8001, log_level="debug")
--------------------------------------------------------------------------------
/packages/car_evaluation_api/app/api.py:
--------------------------------------------------------------------------------
1 | import json
2 | from typing import Any
3 |
4 | import numpy as np
5 | import pandas as pd
6 | from fastapi import APIRouter, HTTPException
7 | from fastapi.encoders import jsonable_encoder
8 | from car_evaluation_model import __version__ as model_version
9 | from car_evaluation_model.predict import make_prediction
10 | from loguru import logger
11 |
12 | from app import __version__, schemas
13 | from app.config import settings
14 |
15 | api_router = APIRouter()
16 |
17 |
18 | @api_router.get("/health", response_model=schemas.Health, status_code=200)
19 | def health() -> dict:
20 | """
21 | Root Get
22 | """
23 | health = schemas.Health(
24 | name=settings.PROJECT_NAME, api_version=__version__, model_version=model_version
25 | )
26 |
27 | return health.dict()
28 |
29 |
30 | @api_router.post("/predict", response_model=schemas.PredictionResults, status_code=200)
31 | async def predict(input_data: schemas.MultipleCarTransactionInputData) -> Any:
32 | """
33 | Make predictions with the Fraud detection model
34 | """
35 |
36 | input_df = pd.DataFrame(jsonable_encoder(input_data.inputs))
37 |
38 | # Advanced: You can improve performance of your API by rewriting the
39 | # `make prediction` function to be async and using await here.
40 | logger.info(f"Making prediction on inputs: {input_data.inputs}")
41 | results = make_prediction(inputs=input_df.replace({np.nan: None}))
42 |
43 | if results["errors"] is not None:
44 | logger.warning(f"Prediction validation error: {results.get('errors')}")
45 | raise HTTPException(status_code=400, detail=json.loads(results["errors"]))
46 |
47 | logger.info(f"Prediction results: {results.get('predictions')}")
48 |
49 | return results
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Car Evaluation
2 | The purpose behind this project was to demonstrate how to build an instant machine learning application with Streamlit - this is great for rapid prototyping.
3 | To achieve this I created a simple classification model on the [Car Evaluation Dataset](https://archive.ics.uci.edu/ml/datasets/Car+Evaluation) from the
4 | [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/index.php). By following along with the articles below, you will learn how to: create a machine
5 | learning microservice, create a front end for your machine learning model, and how to wire the two applications together using [Docker](https://www.docker.com/)
6 | and [Docker-compose](https://docs.docker.com/compose/). The GIF below is a demonstration of how the application works.
7 |
8 | 
9 |
10 | ## Installation & Usage
11 | These instructions assume that you already have Docker and Docker-compose installed on your machine - if not, please follow the instructions
12 | [here](https://docs.docker.com/compose/install/).
13 | - Clone this repository to your computer
14 | - Navigate to the root of the project: `cd car-evaluation-project`
15 | - Build the docker images using `docker-compose up -d --build`
16 | - This may take a minute
17 | - Open your browser and navigate to http://localhost:8501 to use the application.
18 |
19 | ## Extending this project
20 | - Conduct analysis of the data to build a better classification model
21 | - Set up monitoring for the machine learning model
22 | - Deploy on the cloud
23 |
24 | ## Articles About this Project
25 | - [How to Build a Machine Learning Microservice with FastAPI](https://developer.nvidia.com/blog/building-a-machine-learning-microservice-with-fastapi/)
26 | - [How to Build an Instant Machine Learning Web Application with Streamlit and FastAPI](https://developer.nvidia.com/blog/how-to-build-an-instant-machine-learning-web-application-with-streamlit-and-fastapi/)
27 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/pipeline.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from multiprocessing.connection import Pipe
3 |
4 | from sklearn.pipeline import Pipeline
5 | from sklearn.linear_model import LogisticRegression
6 | from sklearn.preprocessing import FunctionTransformer
7 |
8 | from car_evaluation_model.config.core import config
9 | from car_evaluation_model.processing import features as f
10 |
11 | _logger = logging.getLogger(__name__)
12 |
13 | # Define pipeline
14 | car_evaluation_pipe = Pipeline(
15 | [
16 | (
17 | "get_buying_and_maint_mappings",
18 | FunctionTransformer(func=f.convert_mapping,
19 | kw_args = {"columns": config.model_config.buying_and_maint,
20 | "values_to_replace": config.model_config.buying_and_maint_mappings}
21 | )
22 | ),
23 | (
24 | "get_doors_mappings",
25 | FunctionTransformer(func=f.convert_mapping,
26 | kw_args = {"columns": config.model_config.doors,
27 | "values_to_replace": config.model_config.doors_mappings}
28 | )
29 | ),
30 | (
31 | "get_persons_mappings",
32 | FunctionTransformer(func=f.convert_mapping,
33 | kw_args = {"columns": config.model_config.persons,
34 | "values_to_replace": config.model_config.persons_mappings}
35 | )
36 | ),
37 | (
38 | "get_lug_boot_mappings",
39 | FunctionTransformer(func=f.convert_mapping,
40 | kw_args = {"columns": config.model_config.lug_boot,
41 | "values_to_replace": config.model_config.lug_boot_mappings}
42 | )
43 | ),
44 | (
45 | "get_safety_mappings",
46 | FunctionTransformer(func=f.convert_mapping,
47 | kw_args = {"columns": config.model_config.safety,
48 | "values_to_replace": config.model_config.safety_mappings}
49 | )
50 | ),
51 | (
52 | "logistic_regression",
53 | LogisticRegression(random_state=config.model_config.random_state)
54 | )
55 | ]
56 | )
--------------------------------------------------------------------------------
/packages/car_evaluation_streamlit/app.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | import streamlit as st
4 |
5 | # Define the title
6 | st.title("Car evaluation web application")
7 | st.write(
8 | "The model evaluates a cars acceptability based on the inputs below.\
9 | Pass the appropiate details about your car using the questions below to discover if your car is acceptable."
10 | )
11 |
12 | # Input 1
13 | buying = st.radio(
14 | "What are your thought's on the cars buying price?",
15 | ("vhigh", "high", "med", "low")
16 | )
17 |
18 | # Input 2
19 | maint = st.radio(
20 | "What are your thoughts on the price of maintanence for the car?",
21 | ("vhigh", "high", "med", "low")
22 | )
23 |
24 | # Input 3
25 | doors = st.select_slider(
26 | "How many doors does the car have?",
27 | options=["2", "3", "4", "5more"]
28 | )
29 |
30 | # Input 4
31 | persons = st.select_slider(
32 | "How many passengers can the car carry?",
33 | options=["2", "4", "more"]
34 | )
35 |
36 | # Input 5
37 | lug_boot = st.select_slider(
38 | "What is the size of the luggage boot?",
39 | options=["small", "med", "big"]
40 | )
41 |
42 | # Input 6
43 | safety = st.select_slider(
44 | "What estimated level of safety does the car provide?",
45 | options=["low", "med", "high"]
46 | )
47 |
48 | # Class values to be returned by the model
49 | class_values = {
50 | 0: "unacceptable",
51 | 1: "acceptable",
52 | 2: "good",
53 | 3: "very good"
54 | }
55 |
56 | # When 'Submit' is selected
57 | if st.button("Submit"):
58 |
59 | # Inputs to ML model
60 | inputs = {
61 | "inputs": [
62 | {
63 | "buying": buying,
64 | "maint": maint,
65 | "doors": doors,
66 | "persons": persons,
67 | "lug_boot": lug_boot,
68 | "safety": safety
69 | }
70 | ]
71 | }
72 |
73 | # Posting inputs to ML API
74 | response = requests.post(f"http://host.docker.internal:8001/api/v1/predict/", json=inputs, verify=False)
75 | json_response = response.json()
76 |
77 | prediction = class_values[json_response.get("predictions")[0]]
78 |
79 | st.subheader(f"This car is **{prediction}!**")
80 |
81 |
82 |
--------------------------------------------------------------------------------
/packages/car_evaluation_model/setup.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from setuptools import find_packages, setup
4 |
5 | # Package meta-data.
6 | NAME = 'car_evaluation_model'
7 | DESCRIPTION = "Classification model to predict quality of a car."
8 | URL = "https://github.com/kurtispykes/car-evaluation-project/tree/Main/packages/car_evaluation_model"
9 | EMAIL = "kurtispykes@gmail.com"
10 | AUTHOR = "Kurtis Pykes"
11 | REQUIRES_PYTHON = ">=3.8.0"
12 |
13 | long_description = DESCRIPTION
14 |
15 | # Load the package's VERSION file as a dictionary.
16 | about = {}
17 | ROOT_DIR = Path(__file__).resolve().parent
18 | REQUIREMENTS_DIR = ROOT_DIR / 'requirements'
19 | PACKAGE_DIR = ROOT_DIR / 'car_evaluation_model'
20 | with open(PACKAGE_DIR / "VERSION") as f:
21 | _version = f.read().strip()
22 | about["__version__"] = _version
23 |
24 |
25 | # What packages are required for this module to be executed?
26 | def list_reqs(fname="requirements.txt"):
27 | with open(REQUIREMENTS_DIR / fname) as fd:
28 | return fd.read().splitlines()
29 |
30 | # Where the magic happens:
31 | setup(
32 | name=NAME,
33 | version=about["__version__"],
34 | description=DESCRIPTION,
35 | long_description=long_description,
36 | long_description_content_type="text/markdown",
37 | author=AUTHOR,
38 | author_email=EMAIL,
39 | python_requires=REQUIRES_PYTHON,
40 | url=URL,
41 | packages=find_packages(exclude=("tests",)),
42 | package_data={"car_evaluation_model": ["VERSION"]},
43 | install_requires=list_reqs(),
44 | extras_require={},
45 | include_package_data=True,
46 | license="BSD-3",
47 | classifiers=[
48 | # Trove classifiers
49 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
50 | "License :: OSI Approved :: MIT License",
51 | "Programming Language :: Python",
52 | "Programming Language :: Python :: 3",
53 | "Programming Language :: Python :: 3.6",
54 | "Programming Language :: Python :: 3.7",
55 | "Programming Language :: Python :: 3.8",
56 | "Programming Language :: Python :: 3.9",
57 | "Programming Language :: Python :: Implementation :: CPython",
58 | "Programming Language :: Python :: Implementation :: PyPy",
59 | ],
60 | )
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 | packages/car_evaluation_streamlit/venv
93 |
94 | # Spyder project settings
95 | .spyderproject
96 | .spyproject
97 |
98 | # Rope project settings
99 | .ropeproject
100 |
101 | # mkdocs documentation
102 | /site
103 |
104 | # mypy
105 | .mypy_cache/
106 |
107 | # pycharm
108 | .idea/
109 |
110 | # trained models
111 | *.pkl
112 | packages/car_evaluation_model/car_evaluation_model/models/*.h5
113 | packages/car_evaluation_model/car_evaluation_model/models/*.pkl
114 |
115 | # datafiles
116 | *.csv
117 | data/*.csv
118 | packages/car_evaluation_model/car_evaluation_model/data/interim/*.csv
119 | packages/car_evaluation_model/car_evaluation_model/data/raw/*.csv
120 | packages/car_evaluation_model/car_evaluation_model/data/raw/*.data
--------------------------------------------------------------------------------
/packages/car_evaluation_api/app/config.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import sys
3 | from types import FrameType
4 | from typing import List, cast
5 |
6 | from loguru import logger
7 | from pydantic import AnyHttpUrl, BaseSettings
8 |
9 |
10 | class LoggingSettings(BaseSettings):
11 | LOGGING_LEVEL: int = logging.INFO # logging levels are type int
12 |
13 |
14 | class Settings(BaseSettings):
15 | API_V1_STR: str = "/api/v1"
16 |
17 | # Meta
18 | logging: LoggingSettings = LoggingSettings()
19 |
20 | # BACKEND_CORS_ORIGINS is a comma-separated list of origins
21 | # e.g: http://localhost,http://localhost:4200,http://localhost:3000
22 | BACKEND_CORS_ORIGINS: List[AnyHttpUrl] = [
23 | "http://localhost:3000",
24 | "http://localhost:8000",
25 | "https://localhost:3000",
26 | "https://localhost:8000",
27 | ]
28 |
29 | PROJECT_NAME: str = "Car Evaluation API"
30 |
31 | class Config:
32 | case_sensitive = True
33 |
34 |
35 | # See: https://loguru.readthedocs.io/en/stable/overview.html#entirely-compatible-with-standard-logging # noqa
36 | class InterceptHandler(logging.Handler):
37 | def emit(self, record: logging.LogRecord) -> None: # pragma: no cover
38 | # Get corresponding Loguru level if it exists
39 | try:
40 | level = logger.level(record.levelname).name
41 | except ValueError:
42 | level = str(record.levelno)
43 |
44 | # Find caller from where originated the logged message
45 | frame, depth = logging.currentframe(), 2
46 | while frame.f_code.co_filename == logging.__file__: # noqa: WPS609
47 | frame = cast(FrameType, frame.f_back)
48 | depth += 1
49 |
50 | logger.opt(depth=depth, exception=record.exc_info).log(
51 | level,
52 | record.getMessage(),
53 | )
54 |
55 |
56 | def setup_app_logging(config: Settings) -> None:
57 | """Prepare custom logging for our application."""
58 |
59 | LOGGERS = ("uvicorn.asgi", "uvicorn.access")
60 | logging.getLogger().handlers = [InterceptHandler()]
61 | for logger_name in LOGGERS:
62 | logging_logger = logging.getLogger(logger_name)
63 | logging_logger.handlers = [InterceptHandler(level=config.logging.LOGGING_LEVEL)]
64 |
65 | logger.configure(
66 | handlers=[{"sink": sys.stderr, "level": config.logging.LOGGING_LEVEL}]
67 | )
68 |
69 |
70 | settings = Settings()
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/config/core.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import strictyaml
4 | from pydantic import BaseModel
5 |
6 | import car_evaluation_model
7 |
8 | # Project directories
9 | PACKAGE_ROOT = Path(car_evaluation_model.__file__).resolve().parent
10 | ROOT = PACKAGE_ROOT.parent
11 | CONFIG_FILE_PATH = PACKAGE_ROOT / "config.yml"
12 | DATASET_DIR = PACKAGE_ROOT / "data"
13 | RAW_DATA_DIR = DATASET_DIR / "raw"
14 | INTERIM_DATA_DIR = DATASET_DIR / "interim"
15 | TRAINED_MODEL_DIR = PACKAGE_ROOT / "models"
16 |
17 |
18 | class AppConfig(BaseModel):
19 | """
20 | Application level configuration.
21 | """
22 | package_name: str
23 | pipeline_name: str
24 | pipeline_save_file: str
25 | data: str
26 | train_data: str
27 | test_data: str
28 |
29 | class ModelConfig(BaseModel):
30 | """
31 | All configuration relevant to model
32 | training and feature engineering
33 | """
34 | features: list
35 | column_names: list
36 | target: str
37 | buying: str
38 | maint: str
39 | doors: str
40 | persons: str
41 | lug_boot: str
42 | safety: str
43 | buying_and_maint: list
44 | buying_and_maint_mappings: dict
45 | doors_mappings: dict
46 | persons_mappings: dict
47 | lug_boot_mappings: dict
48 | safety_mappings: dict
49 | class_mappings: dict
50 | random_state: int
51 | test_size: float
52 |
53 | class Config(BaseModel):
54 | """Master config object"""
55 | app_config: AppConfig
56 | model_config: ModelConfig
57 |
58 | def find_config_file():
59 | """Locate the configuration file"""
60 | if CONFIG_FILE_PATH.is_file():
61 | return CONFIG_FILE_PATH
62 | raise Exception(f"Config not found at {CONFIG_FILE_PATH}")
63 |
64 | def fetch_config_from_yaml(cfg_path= None):
65 | "Parse YAML containing the package configuration."
66 |
67 | if not cfg_path:
68 | cfg_path = find_config_file()
69 |
70 | if cfg_path:
71 | with open(cfg_path, "r") as conf_file:
72 | parsed_config = strictyaml.load(conf_file.read())
73 | return parsed_config
74 | raise OSError(f"Did not find confige file at path: {cfg_path}")
75 |
76 | def create_and_validate_config(parsed_config= None):
77 | """Run validation on config values."""
78 | if parsed_config is None:
79 | parsed_config = fetch_config_from_yaml()
80 |
81 | # specify the data attribute from the strictyaml YAML type.
82 | _config = Config(
83 | app_config=AppConfig(**parsed_config.data),
84 | model_config=ModelConfig(**parsed_config.data),
85 | )
86 | return _config
87 |
88 |
89 | config = create_and_validate_config()
--------------------------------------------------------------------------------
/packages/car_evaluation_model/car_evaluation_model/processing/data_manager.py:
--------------------------------------------------------------------------------
1 | import joblib
2 | import pandas as pd
3 | from pathlib import Path
4 | from car_evaluation_model.config.core import INTERIM_DATA_DIR, TRAINED_MODEL_DIR, config
5 | from sklearn.model_selection import train_test_split
6 |
7 | from car_evaluation_model import __version__ as _version
8 |
9 |
10 | def load_dataset(path_to_data, names=None):
11 | """
12 | Load the data into memory
13 |
14 | Parameters
15 | ----------
16 | :param path_to_data: data file location.
17 | :param names: specify the give names of each column.
18 | :return the dataset that has been loaded into memory.
19 | """
20 | if names:
21 | dataset = pd.read_csv(
22 | path_to_data,
23 | names = config.model_config.column_names
24 | )
25 | else:
26 | dataset = pd.read_csv(path_to_data)
27 |
28 | return dataset
29 |
30 | def create_train_and_test(data):
31 | """
32 | Create the training and test set. Both datasets are
33 | automatically saved as interim data.
34 |
35 | :param data: the data to split into train and test sets
36 | """
37 | data = data.copy()
38 |
39 | # Convert labels to numeric
40 | data.loc[:, config.model_config.target] = data.loc[:, config.model_config.target].map(
41 | config.model_config.class_mappings
42 | )
43 |
44 | # Split data into features and labels
45 | X = data[config.model_config.features]
46 | y = data[config.model_config.target]
47 |
48 | # Create train and test splits
49 | X_train, X_test, y_train, y_test = train_test_split(
50 | X,
51 | y,
52 | test_size = config.model_config.test_size,
53 | random_state = config.model_config.random_state
54 | )
55 |
56 | # Join the labels to the features to create one dataframe
57 | train_data = pd.concat([X_train, y_train], axis=1)
58 | test_data = pd.concat([X_test, y_test], axis=1)
59 |
60 | # Save data files.
61 | train_data.to_csv(
62 | Path(f"{INTERIM_DATA_DIR}/{config.app_config.train_data}"),
63 | index=False
64 | )
65 | test_data.to_csv(
66 | Path(f"{INTERIM_DATA_DIR}/{config.app_config.test_data}"),
67 | index=False)
68 |
69 | return X_train, X_test, y_train, y_test
70 |
71 |
72 | def save_pipeline(*, pipeline_to_persist):
73 | """
74 | Persist the pipeline. Saves the versioned model, and overwrites
75 | any previous saved models. This ensures that when the package is
76 | published, there is only one trained model that can be
77 | called, and we know exactly how it was built.
78 | """
79 |
80 | # Prepare versioned save file name
81 | save_file_name = f"{config.app_config.pipeline_save_file}{_version}.pkl"
82 | save_path = TRAINED_MODEL_DIR / save_file_name
83 |
84 | remove_old_pipelines(files_to_keep=[save_file_name])
85 | joblib.dump(pipeline_to_persist, save_path)
86 |
87 |
88 | def load_pipeline(*, file_name):
89 | """Load a persisted pipeline."""
90 |
91 | file_path = TRAINED_MODEL_DIR / file_name
92 | trained_model = joblib.load(filename=file_path)
93 | return trained_model
94 |
95 |
96 | def remove_old_pipelines(*, files_to_keep) -> None:
97 | """
98 | Remove old model pipelines.
99 | This is to ensure there is a simple one-to-one
100 | mapping between the package version and the model
101 | version to be imported and used by other applications.
102 | """
103 | do_not_delete = files_to_keep + ["__init__.py"]
104 | for model_file in TRAINED_MODEL_DIR.iterdir():
105 | if model_file.name not in do_not_delete:
106 | model_file.unlink()
107 |
--------------------------------------------------------------------------------