├── .env
├── Jenkinsfile
├── README.md
├── api
├── README.md
├── images
│ └── deployment_pipeline.png
├── triton_client.py
└── upload_model_to_minio.py
├── constants.py
├── deployments
├── mwt.yaml
├── triton-isvc.yaml
└── triton-servingruntime.yaml
├── distributed_training
├── Dockerfile
├── README.md
├── build.sh
├── images
│ └── training_pipeline.png
├── mwt.py
├── nets
│ └── nn.py
├── test
│ └── test.yaml
├── utils
│ ├── config.py
│ ├── dataset.py
│ └── image_utils.py
└── weights
│ └── model.h5
├── docker-compose.yml
├── images
├── PipelineAllcode.png
├── add_credential.png
├── add_credential_dockerhub.png
├── add_token_dockerhub.png
├── architecutre_overview.png
├── bus.jpg
├── check_request_github_jenkins.png
├── connector.png
├── data_pipeline.png
├── diagram_pipe.gif
├── error_log_pod.png
├── false_modelmesh_deploy.png
├── generate_token_docker_hub.png
├── get_token_github.png
├── github_tokens.png
├── instal_docker_jenkins.png
├── install_docker_success.png
├── isvc.png
├── jenkins_container.png
├── jenkins_portal.png
├── jenkins_ui.png
├── messenger.png
├── minio-credentials.png
├── mlflow _modelregistry.png
├── modelmesh-serving-installation.png
├── ngrok.png
├── ngrok_forwarding.png
├── password_jenkins.png
├── result.png
├── result_connect_jenkins_github.png
├── result_push_dockerhub.png
├── result_train_pod.png
├── strategy.png
├── strategy_scope.png
├── structure_data.png
├── structure_training.png
├── topic_tab.png
├── train_process.png
├── ui_build_jenkins.png
├── validate_connect_repo.png
└── webhook_github.png
├── mlflow
└── Dockerfile
├── model_repo
└── yolov8n_car
│ ├── 1
│ └── model.onnx
│ └── config.pbtxt
├── notebooks
└── debug.ipynb
├── requirements.txt
└── streaming
├── Dockerfile
├── README.md
├── docker-compose.yml
├── images
└── data-pipeline.png
├── kafka_connector
└── connect-timescaledb-sink.json
├── produce.py
└── run.sh
/.env:
--------------------------------------------------------------------------------
1 | MINIO_ACCESS_KEY=AKIAIOSFODNN7EXAMPLE
2 | MINIO_SECRET_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
1 | pipeline {
2 | agent any
3 |
4 | options{
5 | buildDiscarder(logRotator(numToKeepStr: '5', daysToKeepStr: '5'))
6 | timestamps()
7 | }
8 |
9 | environment{
10 | registry = '6666688889/distributed_training'
11 | registryCredential = 'dockerhub'
12 | }
13 |
14 | stages {
15 | stage('Build') {
16 | steps {
17 | script {
18 | echo 'Building image for deployment..'
19 | def dockerImage = docker.build("${registry}:${BUILD_NUMBER}", "\"Trainning Pipeline/train/.\"")
20 | echo 'Pushing image to dockerhub..'
21 | docker.withRegistry( '', registryCredential ) {
22 | dockerImage.push()
23 | dockerImage.push('latest')
24 | }
25 | }
26 | }
27 | }
28 | // stage('Deploy') {
29 | // steps {
30 | // echo 'Deploying models..'
31 | // echo 'Running a script to trigger pull and start a docker container'
32 | // }
33 | // }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Scalable ML System for Car Detection
3 |
4 | ## 📕 Table Of Contents
5 | - 🗣️ [Introduction](#Introduction)
6 | - 🚀 [Challenge](#Challenge)
7 | - 🌟 [System Architecture](#System-architecture)
8 | - 📁 [Repository Structure](#repository-structure)
9 | - 🔍 [How to Guide](#how-to-guide)
10 |
11 | ## 🗣️ Introduction:
12 |
13 | This project implements an advanced car detection system using a comprehensive machine learning pipeline. Our solution leverages state-of-the-art technologies to process data, train models, and deploy them efficiently at scale.
14 |
15 | ## 🚀 **Challenge:**
16 | This project faced several challenges including ensuring data consistency and scalability during ingestion, managing resources and synchronization in distributed training, automating CI/CD pipelines, converting and deploying models efficiently, ensuring data privacy and security, optimizing performance, and handling the complexities of debugging and troubleshooting in a distributed system.
17 |
18 |
19 | ## 🌟 System Architecture
20 | 
21 |
22 | The pipeline consists of two main components:
23 |
24 | - **Data Pipeline**: This part of the system handles the ingestion, preprocessing, and feature extraction of car detection data. It includes steps like loading the dataset, performing preprocessing tasks, and extracting relevant features using tools like Apache Flink and Redis.
25 | - **Training and Deployment Pipeline**: The training and deployment pipeline focuses on the model development and deployment processes. It includes steps like saving the trained model and artifacts, evaluating the model, and deploying the model using tools like MLflow, Jenkins, and Kubernetes.
26 |
27 |
28 | **Key features of our pipeline include:**
29 |
30 | - Data ingestion and preprocessing using Airflow and Kafka for stream processing
31 | - Feature storage in Redis and an offline data store
32 | - Distributed model training with TensorFlow on Kubeflow
33 | - Model versioning and artifact management with MLflow
34 | - Automated deployment pipeline using Jenkins and Kubernetes
35 | - Scalable model serving with KServe API server
36 |
37 |
38 | ## 📁 Repository Structure
39 | ```
40 | 📦
41 | ├─ .env # Environment variables used across the project
42 | ├─ Jenkinsfile # Configuration for a Jenkins CI/CD pipeline
43 | ├─ README.md # General project documentation
44 | ├─ api # Contains code related to the API layer
45 | │ ├─ README_serve.md # Documentation for the API serving component
46 | │ ├─ triton_client.py # Code for interacting with the Triton Inference Server
47 | │ └─ upload_model_to_minio.py # Script to upload the trained model to Minio storage
48 | ├─ constants.py # Shared constants and configurations used across the project
49 | ├─ deployments # Kubernetes configurations
50 | │ ├─ mwt.yaml # Configuration for the Multi-Worker Training (MWT) component
51 | │ ├─ triton-isvc.yaml # Configuration for the Triton Inference Service
52 | │ └─ triton-servingruntime.yaml # Configuration for the Triton Inference Server runtime
53 | ├─ distributed_training # Code and configuration for distributed training
54 | │ ├─ Dockerfile # Dockerfile for the distributed training component
55 | │ ├─ README_distributed.md # Documentation for the distributed training component
56 | │ ├─ build.sh # Script to build the distributed training Docker image
57 | │ ├─ mwt.py # Main logic for the Multi-Worker Training component
58 | │ ├─ nets # Neural network architecture definitions
59 | │ │ └─ nn.py # Neural network model implementation
60 | │ ├─ test # Test configuration for the distributed training
61 | │ │ └─ test.yaml # Test deployment configuration
62 | │ ├─ utils # Utility functions for the distributed training
63 | │ │ ├─ config.py # Configuration handling for the distributed training
64 | │ │ ├─ dataset.py # Dataset-related utilities
65 | │ │ └─ image_utils.py # Image processing utilities
66 | │ └─ weights # Folder containing a pre-trained model
67 | │ └─ model.h5 # Saved weights for the pre-trained model
68 | ├─ docker-compose.yml # Docker Compose configuration for the entire project
69 | ├─ images # Folder for storing project-related images
70 | ├─ mlflow # Code and configuration for the MLflow component
71 | │ └─ Dockerfile # Dockerfile for the MLflow component
72 | ├─ model_repo # Repository for storing the trained model
73 | │ └─ yolov8n_car # Folder for the YOLOv8 car detection model
74 | │ ├─ 1 # Version 1 of the model
75 | │ │ └─ model.onnx # ONNX format of the trained model
76 | │ └─ config.pbtxt # Triton Inference Server configuration for the model
77 | ├─ notebooks # Folder for Jupyter Notebooks (likely for debugging/exploration)
78 | │ └─ debug.ipynb # Sample Jupyter Notebook for debugging
79 | ├─ requirements.txt # Python dependencies for the project
80 | └─ streaming # Code and configuration for the data streaming component
81 | ├─ Dockerfile # Dockerfile for the streaming component
82 | ├─ README_streaming.md # Documentation for the streaming component
83 | ├─ docker-compose.yml # Docker Compose configuration for the streaming component
84 | ├─ kafka_connector # Configuration for the Kafka connector
85 | │ └─ connect-timescaledb-sink.json # Kafka connector configuration for TimescaleDB sink
86 | ├─ produce.py # Script to produce sample data for the streaming component
87 | └─ run.sh # Script to run the streaming component
88 | ```
89 |
90 | ## 🔍 How to Guide:
91 |
92 | ### 1. Data Pipeline:
93 | - The data pipeline starts with the Car Detection Dataset Source.
94 | - Images are loaded, preprocessed, and features are extracted using Airflow.
95 | - We also use Kafka for stream processing of fake stream data, which is then processed by Apache Flink.
96 | - Data is stored in Redis (online store) and synced to PostgreSQL (an offline store).
97 |
98 | To get started with the *Data pipeline* component:
99 | ```shell
100 | cd streaming
101 | ```
102 |
103 | And read the respective README file: [Data Pipeline Guide](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/streaming/README.md)
104 |
105 | ### 2. Training Pipeline:
106 | Our training pipeline utilizes Kubeflow and TensorFlow for distributed training. Here's an overview of the process:
107 |
108 | 1. Data Preparation: Features are pulled from the offline store and prepared using Kubeflow.
109 |
110 | 2. Distributed Training: We use TensorFlow for distributed training, which allows us to process large datasets efficiently across multiple nodes.
111 |
112 | 3. Model Evaluation: After training, the model is evaluated to ensure it meets performance criteria.
113 |
114 | 4. Artifact Management: The trained model and associated artifacts are saved to the MLflow model registry for versioning and easy retrieval.
115 |
116 | Key features of our distributed training approach:
117 | - Scalability: Easily scale training across multiple nodes using Kubeflow.
118 | - Efficiency: Utilize TensorFlow's distributed training capabilities for faster processing.
119 | - Version Control: Track experiments and models using MLflow for reproducibility.
120 |
121 | To get started with the training pipeline:
122 |
123 | ```shell
124 | cd distributed_training
125 | ```
126 |
127 | For detailed instructions on setting up and running the distributed training, please refer to our [Distributed Training Guide](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/distributed_training/README.md).
128 |
129 |
130 | Here's an updated version of the serving pipeline section in your README to match the style of the previous parts you've edited:
131 |
132 | ### 3. Deployment Pipeline:
133 | The serving pipeline deploys the trained model for inference, ensuring that it can handle various workloads efficiently. Below are the key aspects of our serving approach:
134 |
135 | 1. **Scalability**: ModelMesh scales the serving infrastructure dynamically to accommodate varying loads and large volumes of requests, ensuring reliable performance even under heavy demand.
136 |
137 | 2. **Multi-Model Support**: ModelMesh can manage and serve multiple models simultaneously, providing flexibility in deployment strategies and enabling seamless model updates.
138 |
139 | 3. **Efficient Resource Utilization**: By dynamically allocating resources based on the demand for different models, ModelMesh optimizes the use of computational resources, reducing costs and improving efficiency.
140 |
141 | To get started with the serving pipeline:
142 |
143 | ```shell
144 | cd api
145 | ```
146 |
147 | For detailed instructions on setting up and managing the serving infrastructure, please refer to our [Deployment Pipeline Guide](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/api/README.md).
--------------------------------------------------------------------------------
/api/README.md:
--------------------------------------------------------------------------------
1 | # Deployment Pipeline Guide
2 |
3 | ## Table of Contents
4 |
5 | 1. [Prerequisites](#prerequisites)
6 | - [Install kustomize](#install-kustomize)
7 | - [Install modelmesh-serving](#install-modelmesh-serving)
8 | 2. [Deployment Pipeline Overview](deployment-pipeline-overview)
9 | 3. [Getting Started](#getting-started)
10 | 4. [Making Prediction](#making-prediction)
11 |
12 | ---
13 |
14 | ## Prerequisites
15 |
16 | Before getting started, ensure that your environment meets the following prerequisites:
17 |
18 | - GKE Version: Use GKE version 1.29
19 |
20 | ### Install kustomize
21 |
22 | [Kustomize](https://kubectl.docs.kubernetes.io/) is an alternative tool to Helm for installing applications on Kubernetes. Install it by running the following commands:
23 |
24 | ```shell
25 | curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
26 | sudo mv kustomize /usr/local/bin/
27 | ```
28 |
29 | ### Install modelmesh-serving
30 |
31 | Clone the modelmesh-serving repository:
32 |
33 | ```shell
34 | RELEASE=release-0.9
35 | git clone -b $RELEASE --depth 1 --single-branch https://github.com/kserve/modelmesh-serving.git
36 | cd modelmesh-serving
37 | ```
38 |
39 | Create a new namespace and install modelmesh-serving:
40 |
41 | ```shell
42 | kubectl create namespace modelmesh-serving
43 | ./scripts/install.sh --namespace modelmesh-serving --quickstart
44 |
45 | ```
46 |
47 | After a few minutes, you should see the following output:
48 |
49 | 
50 |
51 | ## Deployment Pipeline Overview
52 |
53 | The following diagram provides an overview of the deployment pipeline, detailing each step from model optimization to deployment and scaling.
54 |
55 | 
56 |
57 | ### Key Components:
58 |
59 | 1. Model Optimization (ONNX):
60 | - Optimizes the model for serving, converting it into ONNX format.
61 |
62 | 2. Model Testing:
63 |
64 | - Runs tests to ensure that the optimized model meets the necessary performance and accuracy criteria.
65 |
66 | 3. Runtime Containerization:
67 |
68 | - Packages the model into a containerized runtime environment.
69 |
70 | 4. Ingest Serving-Model to S3:
71 |
72 | - The containerized model is uploaded to an S3-compatible storage, such as MinIO.
73 | 5. Deployment and Scaling:
74 |
75 | - The model is deployed and scaled using Kubernetes (K8s), managed through `kubectl`.
76 |
77 | 6. Model Serving API:
78 |
79 | - The deployed model is accessible via an API, allowing users to make predictions.
80 |
81 | ## Getting Started
82 |
83 | ### Port-forward the `MinIO` Service
84 |
85 | To access MinIO locally, use the following command:
86 |
87 | ```shell
88 | kubectl port-forward svc/minio 9000:9000 -n modelmesh-serving
89 | ```
90 |
91 | ### Access MinIO Credentials
92 |
93 | Obtain the `MINIO_ACCESS_KEY` and `MINIO_SECRET_KEY` required to sign in and upload your models to MinIO:
94 |
95 | ```shell
96 | kubectl get po minio-676b8dcf45-nw2zw -o json | jq -r '.spec.containers[0].env[] | select(.name == "MINIO_ACCESS_KEY") | .value'
97 |
98 | kubectl get po minio-676b8dcf45-nw2zw -o json | jq -r '.spec.containers[0].env[] | select(.name == "MINIO_SECRET_KEY") | .value'
99 | ```
100 |
101 | You can see that in my case, `MINIO_ACCESS_KEY` is `AKIAIOSFODNN7EXAMPLE`, and `MINIO_SECRET_KEY` is `wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY`.
102 |
103 | .
104 |
105 | Access localhost:9000 to open MINIO and upload the model to the MINIO bucket. The structure for storing our ONNX model and the config.pbtxt file should look like this. Remember to use the model we trained in the previous step for serving. The format should be ONNX, so please convert the weight file to ONNX before uploading it to the bucket
106 |
107 | 
108 |
109 | 
110 |
111 |
112 | ### Upload Model to MinIO:
113 |
114 | You can manually upload the model or use the following script:
115 | ``` shell
116 | python api/upload_model_to_minio.py
117 | ```
118 |
119 | ### Deploy the ONNX Model:
120 |
121 | Deploy the model using the following commands:
122 |
123 | ```shell
124 | kubectl get p
125 | kubectl apply -f deployments/triton-isvc.yaml
126 | kubectl apply -f deployments/triton-servingruntime.yaml
127 | ```
128 |
129 | ### Verify the Service Readiness:
130 |
131 | Check if the service is ready:
132 |
133 | ```shell
134 | kubectl get isvc
135 | ```
136 |
137 | You can see that is `false`:
138 |
139 | 
140 |
141 | It should take several minutes for our service to become READY.
142 |
143 | If it doesn’t, please check the logs of the `mm` container in the pod corresponding to triton using the following command to check logs:
144 |
145 | ```shell
146 | kubectl describe pod modelmesh-serving-triton-2.x-6c4978d6db-5k59z
147 | ```
148 |
149 | 
150 |
151 |
152 | Once the service is ready, you should see the following result:
153 |
154 | 
155 | 
156 |
157 |
158 | ## Making Prediction:
159 |
160 | To make a prediction, follow these steps:
161 |
162 | 1. Port-forward `modelmesh-serving` service
163 | ```shell
164 | kubectl port-forward --address 0.0.0.0 service/modelmesh-serving 8008 -n modelmesh-serving
165 | ```
166 | 2. Test your newly created modelmesh-serving service
167 | ```shell
168 | python api/triton_client.py
169 | ```
170 |
--------------------------------------------------------------------------------
/api/images/deployment_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/api/images/deployment_pipeline.png
--------------------------------------------------------------------------------
/api/triton_client.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import numpy as np
4 | import wget
5 | import os
6 | import cv2
7 | from constants import CLASSES # Import danh sách CLASSES từ file constants.py
8 |
9 |
10 | def preprocess(cv2_image, model_shape=(640, 640)):
11 | image_rgb = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
12 | resized = cv2.resize(image_rgb, model_shape)
13 |
14 | # Scale input pixel value to 0 to 1
15 | input_image = resized / 255.0
16 | input_image = input_image.transpose(2, 0, 1)
17 | result = input_image[np.newaxis, :, :, :].astype(np.float32)
18 |
19 | return result
20 |
21 |
22 | def xywh2xyxy(x):
23 | # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
24 | y = np.copy(x)
25 | y[..., 0] = x[..., 0] - x[..., 2] / 2
26 | y[..., 1] = x[..., 1] - x[..., 3] / 2
27 | y[..., 2] = x[..., 0] + x[..., 2] / 2
28 | y[..., 3] = x[..., 1] + x[..., 3] / 2
29 | return y
30 |
31 |
32 | def nms(boxes, scores, iou_threshold):
33 | # Sort by score
34 | sorted_indices = np.argsort(scores)[::-1]
35 |
36 | keep_boxes = []
37 | while sorted_indices.size > 0:
38 | # Pick the last box
39 | box_id = sorted_indices[0]
40 | keep_boxes.append(box_id)
41 |
42 | # Compute IoU of the picked box with the rest
43 | ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
44 |
45 | # Remove boxes with IoU over the threshold
46 | keep_indices = np.where(ious < iou_threshold)[0]
47 |
48 | sorted_indices = sorted_indices[keep_indices + 1]
49 |
50 | return keep_boxes
51 |
52 |
53 | def compute_iou(box, boxes):
54 | # Compute xmin, ymin, xmax, ymax for both boxes
55 | xmin = np.maximum(box[0], boxes[:, 0])
56 | ymin = np.maximum(box[1], boxes[:, 1])
57 | xmax = np.minimum(box[2], boxes[:, 2])
58 | ymax = np.minimum(box[3], boxes[:, 3])
59 |
60 | # Compute intersection area
61 | intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
62 |
63 | # Compute union area
64 | box_area = (box[2] - box[0]) * (box[3] - box[1])
65 | boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
66 | union_area = box_area + boxes_area - intersection_area
67 |
68 | # Compute IoU
69 | iou = intersection_area / union_area
70 |
71 | return iou
72 |
73 |
74 | def postprocess(outputs, original_shape, model_shape=(640, 640), threshold=0.8):
75 | model_height, model_width = model_shape
76 | original_height, original_width = original_shape[:2]
77 | outputs = np.array(outputs[0]["data"]).reshape(outputs[0]["shape"])
78 | predictions = np.squeeze(outputs).T
79 |
80 | # Filter out object confidence scores below threshold
81 | scores = np.max(predictions[:, 4:], axis=1)
82 | predictions = predictions[scores > threshold, :]
83 | scores = scores[scores > threshold]
84 | class_ids = np.argmax(predictions[:, 4:], axis=1)
85 |
86 | # Get bounding boxes for each object
87 | bboxes = predictions[:, :4]
88 |
89 | # Rescale bboxes
90 | model_shape = np.array([model_width, model_height, model_width, model_height])
91 | original_shape = np.array(
92 | [original_width, original_height, original_width, original_height]
93 | )
94 | bboxes = np.divide(bboxes, model_shape, dtype=np.float32)
95 | bboxes *= original_shape
96 | bboxes = bboxes.astype(np.int32)
97 |
98 | # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
99 | indices = nms(bboxes, scores, 0.3)
100 |
101 | return bboxes[indices], scores[indices], class_ids[indices]
102 |
103 |
104 | def draw_image(image, bboxes, scores, class_ids):
105 | image_draw = image.copy()
106 | for bbox, score, label in zip(xywh2xyxy(bboxes), scores, class_ids):
107 | bbox = bbox.round().astype(np.int32).tolist()
108 | cls_id = int(label)
109 | cls = CLASSES[cls_id]
110 | color = (0, 255, 0)
111 | cv2.rectangle(image_draw, tuple(bbox[:2]), tuple(bbox[2:]), color, 2)
112 | cv2.putText(
113 | image_draw,
114 | f"{cls}:{int(score*100)}",
115 | (bbox[0], bbox[1] - 2),
116 | cv2.FONT_HERSHEY_SIMPLEX,
117 | 0.60,
118 | [225, 255, 255],
119 | thickness=1,
120 | )
121 | cv2.imwrite("drawed.jpg", image_draw)
122 |
123 |
124 | def main():
125 | image_url = "https://ultralytics.com/images/bus.jpg"
126 | image_name = os.path.basename(image_url)
127 | if not os.path.exists(image_name):
128 | wget.download(image_url)
129 |
130 | original_image = cv2.imread(image_name)
131 | image = preprocess(original_image)
132 |
133 | request_data = {
134 | "inputs": [
135 | {
136 | "name": "images",
137 | "shape": image.shape,
138 | "datatype": "FP32",
139 | "data": image.flatten().tolist(), # Flatten the image and convert to list
140 | }
141 | ]
142 | }
143 |
144 | headers = {
145 | "Content-Type": "application/json", # Change content type to JSON
146 | }
147 |
148 | response = requests.post(
149 | "http://localhost:8008/v2/models/onnx/infer",
150 | headers=headers,
151 | data=json.dumps(request_data),
152 | verify=False,
153 | ).json()
154 |
155 | result = response["outputs"]
156 | bboxes, scores, class_ids = postprocess(result, original_image.shape)
157 | print(bboxes)
158 | print(scores)
159 | print(class_ids)
160 | draw_image(
161 | original_image, bboxes, scores, class_ids
162 | ) # Use DRAWED_PATH instead of "drawed.jpg"
163 |
164 |
165 | if __name__ == "__main__":
166 | main()
167 |
--------------------------------------------------------------------------------
/api/upload_model_to_minio.py:
--------------------------------------------------------------------------------
1 | from minio import Minio
2 | from minio.error import S3Error
3 | from dotenv import load_dotenv
4 | import os
5 |
6 | def main():
7 | # Create a client with the MinIO server playground, its access key
8 | # and secret key.
9 | client = Minio(
10 | "http://localhost:9000",
11 | access_key=os.getenv("MINIO_ACCESS_KEY"),
12 | secret_key=os.getenv("MINIO_SECRET_KEY"),
13 | )
14 |
15 | # Make 'modelmesh-models' bucket if not exist.
16 | bucket_name = "modelmesh-models"
17 | found = client.bucket_exists(bucket_name)
18 | if not found:
19 | client.make_bucket(bucket_name)
20 | else:
21 | print(f"Bucket {bucket_name} already exists")
22 |
23 | # Upload './models/mnist-svm.joblib' (or whatever)
24 | # as object name to our newly created bucket 'modelmesh-models'.
25 | client.fput_object(
26 | bucket_name=bucket_name,
27 | file_path=f"./model_repo/yolov8n_car/",
28 | )
29 | print(f"Model and config are successfully uploaded to bucket '{bucket_name}'.")
30 |
31 |
32 | if __name__ == "__main__":
33 | try:
34 | main()
35 | except S3Error as exc:
36 | print("error occurred.", exc)
37 |
--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
1 | # constants.py
2 | CLASSES = [
3 | "person",
4 | "bicycle",
5 | "car",
6 | "motorcycle",
7 | "airplane",
8 | "bus",
9 | "train",
10 | "truck",
11 | "boat",
12 | "traffic light",
13 | "fire hydrant",
14 | "street sign",
15 | "stop sign",
16 | "parking meter",
17 | "bench",
18 | "bird",
19 | "cat",
20 | "dog",
21 | "horse",
22 | "sheep",
23 | "cow",
24 | "elephant",
25 | "bear",
26 | "zebra",
27 | "giraffe",
28 | "hat",
29 | "backpack",
30 | "umbrella",
31 | "shoe",
32 | "eye glasses",
33 | "handbag",
34 | "tie",
35 | "suitcase",
36 | "frisbee",
37 | "skis",
38 | "snowboard",
39 | "sports ball",
40 | "kite",
41 | "baseball bat",
42 | "baseball glove",
43 | "skateboard",
44 | "surfboard",
45 | "tennis racket",
46 | "bottle",
47 | "plate",
48 | "wine glass",
49 | "cup",
50 | "fork",
51 | "knife",
52 | "spoon",
53 | "bowl",
54 | "banana",
55 | "apple",
56 | "sandwich",
57 | "orange",
58 | "broccoli",
59 | "carrot",
60 | "hot dog",
61 | "pizza",
62 | "donut",
63 | "cake",
64 | "chair",
65 | "couch",
66 | "potted plant",
67 | "bed",
68 | "mirror",
69 | "dining table",
70 | "window",
71 | "desk",
72 | "toilet",
73 | "door",
74 | "tv",
75 | "laptop",
76 | "mouse",
77 | "remote",
78 | "keyboard",
79 | "cell phone",
80 | "microwave",
81 | "oven",
82 | "toaster",
83 | "sink",
84 | "refrigerator",
85 | "blender",
86 | "book",
87 | "clock",
88 | "vase",
89 | "scissors",
90 | "teddy bear",
91 | "hair drier",
92 | "toothbrush",
93 | ]
--------------------------------------------------------------------------------
/deployments/mwt.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kubeflow.org/v1
2 | kind: TFJob
3 | metadata:
4 | name: multi-worker
5 | namespace: distributed-training
6 | spec:
7 | tfReplicaSpecs:
8 | Worker:
9 | replicas: 2
10 | restartPolicy: Never
11 | template:
12 | spec:
13 | containers:
14 | - name: tensorflow
15 | image: 6666688889/distributed_training:0.0.13
16 | volumeMounts:
17 | - mountPath: /train
18 | name: training
19 | readOnly: true
20 | volumes:
21 | - name: training
22 | persistentVolumeClaim:
23 | claimName: mwt-volume
24 | readOnly: true
25 | ---
26 | apiVersion: v1
27 | kind: PersistentVolumeClaim
28 | metadata:
29 | name: mwt-volume
30 | namespace: distributed-training
31 | spec:
32 | accessModes:
33 | - ReadWriteMany
34 | resources:
35 | requests:
36 | storage: 10Gi
--------------------------------------------------------------------------------
/deployments/triton-isvc.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: serving.kserve.io/v1beta1
2 | kind: InferenceService
3 | metadata:
4 | name: cardetection-mm
5 | namespace: modelmesh-serving
6 | annotations:
7 | serving.kserve.io/deploymentMode: ModelMesh
8 | serving.kserve.io/secretKey: localMinIO
9 | spec:
10 | predictor:
11 | model:
12 | modelFormat:
13 | name: onnx
14 | runtime: triton-2.x
15 | storageUri: s3://modelmesh-example-models/cardetect/yolov8n_car
--------------------------------------------------------------------------------
/deployments/triton-servingruntime.yaml:
--------------------------------------------------------------------------------
1 | # Copyright 2021 IBM Corporation
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | apiVersion: serving.kserve.io/v1alpha1
15 | kind: ServingRuntime
16 | metadata:
17 | name: triton-2.x
18 | labels:
19 | name: modelmesh-serving-triton-2.x-SR
20 | annotations:
21 | maxLoadingConcurrency: "2"
22 | serving.kserve.io/autoscalerClass: hpa
23 | serving.kserve.io/targetUtilizationPercentage: "75"
24 | serving.kserve.io/metrics: "cpu"
25 | serving.kserve.io/min-scale: "2"
26 | serving.kserve.io/max-scale: "3"
27 | spec:
28 | supportedModelFormats:
29 | - name: keras
30 | version: "2" # 2.6.0
31 | autoSelect: true
32 | - name: onnx
33 | version: "1" # 1.5.3
34 | autoSelect: true
35 | - name: pytorch
36 | version: "1" # 1.8.0a0+17f8c32
37 | autoSelect: true
38 | - name: tensorflow
39 | version: "1" # 1.15.4
40 | autoSelect: true
41 | - name: tensorflow
42 | version: "2" # 2.3.1
43 | autoSelect: true
44 | - name: tensorrt
45 | version: "7" # 7.2.1
46 | autoSelect: true
47 |
48 | protocolVersions:
49 | - grpc-v2
50 | multiModel: true
51 | replicas: 1
52 | grpcEndpoint: "port:8085"
53 | grpcDataEndpoint: "port:8001"
54 |
55 | containers:
56 | - name: triton
57 | image: nvcr.io/nvidia/tritonserver:23.09-py3
58 | command: [/bin/sh]
59 | args:
60 | - -c
61 | - 'mkdir -p /models/_triton_models;
62 | chmod 777 /models/_triton_models;
63 | exec tritonserver
64 | "--model-repository=/models/_triton_models"
65 | "--model-control-mode=explicit"
66 | "--strict-model-config=false"
67 | "--strict-readiness=false"
68 | "--allow-http=true"
69 | "--allow-sagemaker=false"
70 | '
71 | resources:
72 | requests:
73 | cpu: 500m
74 | memory: 1Gi
75 | limits:
76 | cpu: "5"
77 | memory: 1Gi
78 | livenessProbe:
79 | # the server is listening only on 127.0.0.1, so an httpGet probe sent
80 | # from the kublet running on the node cannot connect to the server
81 | # (not even with the Host header or host field)
82 | # exec a curl call to have the request originate from localhost in the
83 | # container
84 | exec:
85 | command:
86 | - curl
87 | - --fail
88 | - --silent
89 | - --show-error
90 | - --max-time
91 | - "9"
92 | - http://localhost:8000/v2/health/live
93 | initialDelaySeconds: 5
94 | periodSeconds: 30
95 | timeoutSeconds: 10
96 | builtInAdapter:
97 | serverType: triton
98 | runtimeManagementPort: 8001
99 | memBufferBytes: -134217728
100 | modelLoadingTimeoutMillis: 90000
--------------------------------------------------------------------------------
/distributed_training/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM tensorflow/tensorflow:2.12.0
2 |
3 | # Update the package list
4 | RUN apt-get update
5 |
6 | # Install necessary packages
7 | RUN apt install -y libgl1-mesa-glx
8 |
9 | # Install Python dependencies
10 | RUN pip install opencv-python-headless==4.5.3.56
11 | RUN pip install mlflow==2.14.1
12 | RUN pip install tqdm
13 |
14 | # Copy the application code to the container
15 | COPY . /app
16 |
17 | # Set the working directory
18 | WORKDIR /app
19 |
20 | # Run mwt.py with the --train argument
21 | CMD ["python", "mwt.py", "--train"]
22 |
--------------------------------------------------------------------------------
/distributed_training/README.md:
--------------------------------------------------------------------------------
1 | # Distributed Training Pipeline
2 |
3 | ## Overview:
4 |
5 | This pipeline leverages a combination of Redis for online feature storage, PostgreSQL for offline storage, TensorFlow for distributed training, and MLflow for model tracking and registry. Kubeflow orchestrates the entire process, ensuring a seamless flow from data preparation to model deployment.
6 |
7 | ## Table of Contents
8 |
9 | - [Dataset Preparation](#dataset-preparation)
10 | - [Deploying Multi-Worker Training Jobs](#deploying-multi-worker-training-jobs)
11 | - [Monitoring and Investigating Models](#monitoring-and-investigating-models)
12 | - [Running MLflow with Docker Compose:](#running-mlflow-with-docker-compose)
13 | - [Important Considerations](#important-considerations)
14 | - [Integrating Jenkins for Continuous Integration](#integrating-jenkins-for-continuous-integration)
15 | - [References](#references)
16 |
17 | ## Dataset Preparation:
18 |
19 | Begin by downloading the dataset required for the training job from the following link: [Download Dataset](https://drive.google.com/drive/folders/12ncEAoWT_kwuPT8YRdFysqgS54XJwre7?usp=drive_link). The folder structure should resemble the following:
20 |
21 |
22 |

23 |
24 |
25 | ## Deploying Multi-Worker Training Jobs
26 |
27 | To deploy multi-worker training jobs, apply the configuration using Kubernetes:
28 |
29 | ``` shell
30 | kubectl apply -f deployments/mwt.yaml
31 | ```
32 |
33 | ## Monitoring and Investigating Models
34 |
35 | To monitor the training process and inspect the models, update the `persistentVolumeClaim` in the `tests/nginx.yaml` file:
36 |
37 | ```shell
38 | kubectl apply -f tests/nginx.yaml
39 | ```
40 |
41 | This setup creates a pod that shares a volume with other training pods, allowing them to write and read from a common source. This shared volume facilitates easy access to logs and other critical data.
42 |
43 | You can access the pod to check and read logs using the following command:
44 |
45 | ```shell
46 | kubectl exec -ti nginx bash
47 | ```
48 |
49 | ## Running MLflow with Docker Compose
50 |
51 | For a proof-of-concept (POC) or limited resource environments, you can opt to run the MLflow service using Docker:
52 |
53 | ```shell
54 | docker compose -f docker-compose.yml up --d --build
55 | ```
56 |
57 | ## Important Considerations:
58 |
59 | 👉 If multiple GPUs are not available, consider using an alternative strategy, as illustrated below:
60 |
61 | 
62 |
63 | 👉 Customize the script to run the training job according to your requirements. If the job fails, you can diagnose the issue by checking the pod error logs:
64 |
65 | ```shell
66 | kubectl get TFjob
67 | ```
68 |
69 | Please check the pod error log and fix it.
70 | 
71 |
72 | 👉 In your training script, ensure the model definition and dataset loading are encapsulated within the strategy scope:
73 |
74 | 
75 |
76 | 👉 To monitor the training process, you can exec into the pod or container (if using Docker) to observe the training job in real-time:
77 |
78 | 
79 |
80 | 👉 The trained model versions will be stored and managed in MLflow:
81 |
82 | 
83 |
84 |
85 | ## Integrating Jenkins for Continuous Integration
86 |
87 | For automated retraining when new data is available, you can integrate Jenkins into your CI/CD pipeline.
88 |
89 | 1. Install Ngrok:
90 |
91 | ```shell
92 | curl -s https://ngrok-agent.s3.amazonaws.com/ngrok.asc
93 | sudo tee /etc/apt/trusted.gpg.d/ngrok.asc >/dev/null
94 | echo "deb https://ngrok-agent.s3.amazonaws.com buster main"
95 | sudo tee /etc/apt/sources.list.d/ngrok.list
96 | sudo apt update
97 | sudo apt install ngrok
98 | ```
99 |
100 |
101 | 2. Test Ngrok Installation: Run `ngrok` in the terminal to verify the installation:
102 |
103 | 
104 |
105 | 3. Retrieve Jenkins Password: Access Jenkins by retrieving the password as shown below:
106 |
107 | 
108 |
109 | 4. Configure Jenkins:
110 |
111 | - Open browser `localhost:8081` to open `Jenkins` -> `Manage Jenkins` -> `Plugins and Type` : `Docker Pipeline` and `Docker` and choose `Install without start`
112 | 
113 | - Install necessary plugins like `Docker Pipeline` and `Docker`.
114 | 
115 |
116 | 5. Expose Jenkins with Ngrok:
117 |
118 | - Run `ngrok http 8081` to expose Jenkins:
119 | 
120 |
121 | 6. Set Up GitHub Webhook:
122 |
123 | - Open your Github repository: In this case is Capstone-Model-Serving-pipeline -> go to `Settings` of repository -> `Webhook` -> `Add Webhook` and paste the Forwarding url in step above to Payload Url and concat "/github-webhook/", Content Type: choose `Applycation/json`. In the part "Which events would you like to trigger this webhook" choose `Push` and `Pull`. Finally, wait for the webhook status to show a green mark, indicating that it is working correctly
124 |
125 | 
126 |
127 | - Check the connection. If Jenkins is successfully connected to GitHub, it will appear like this in the GitHub UI (with a green mark on the webhook)
128 |
129 | 
130 |
131 | 7. Configure Jenkins Multibranch Pipeline:
132 |
133 | - Back to Jenkins -> choose `Dashboard` -> `New Item` then enter the name of your project and choose `Multibranch Pipeline` and `OK`
134 |
135 | - Add name Project -> `Branch Source` and `Add Source` you choose Github
136 |
137 | 
138 |
139 | - In `Github Credential` -> Choose the Project Name you create above -> and Type the User Name of your Github Account store the Repository (Model-mesh-serving-pipeline blabla ) and in The Password -> Back to your Github Repository -> `Developer settings` -> Personal access tokens then choose `Token classic` -> Generate a New token classic and choose all options for a demo with no error copy the token generated to `Jenkins Password` and `Add`
140 |
141 | 
142 |
143 | - Copy the repository we are working on using the repository's HTTPS URL
144 | - Check all information, `Validate it`, and if everything looks correct, then `Save`
145 |
146 | 
147 |
148 | - Choose the `Credential` and then choose the `Scope of our project` and `Add the project credential create a new Credential` -> in Username you type the user of DockerHub
149 | 
150 |
151 |
152 | - For the Password: Go to DockerHub (where you store your Docker images) then navigate to `Account Setting` -> `Security` -> Generate new token. Copy this token and paste it into Jenkins credentials, using 'dockerhub' as the ID.
153 |
154 | 
155 |
156 | - Choose `Manage Jenkins` -> `System and go to Github part` -> In Github API usage rate limiting strategy -> Never check rate limit (NOT RECOMMENDED) and `Save`
157 | - Finally, go to the repository in Jenkins -> `Configure and Github Credential` Select the Github Credential you created in step above then `Save`
158 | - Click `Scan Repository Now` to check if all connections are correct. If they are not, restart Jenkins and try again
159 |
160 | - The result of the build on Jenkins will look like this
161 | 
162 |
163 | - As you can see, the application version will increase
164 | 
165 |
166 | # References
167 |
168 | For more information, please take a look at examples [here](https://github.com/kubeflow/training-operator/tree/master/examples) and [here](https://github.com/kubeflow/examples/tree/master/github_issue_summarization).
169 |
170 | Some other useful examples:
171 | - https://henning.kropponline.de/2017/03/19/distributing-tensorflow/
172 | - https://www.cs.cornell.edu/courses/cs4787/2019sp/notes/lecture22.pdf
173 | - https://web.eecs.umich.edu/~mosharaf/Readings/Parameter-Server.pdf
174 | - https://s3.us.cloud-object-storage.appdomain.cloud/developer/default/series/os-kubeflow-2020/static/kubeflow06.pdf
175 | - https://xzhu0027.gitbook.io/blog/ml-system/sys-ml-index/parameter-servers
176 | - http://www.juyang.co/distributed-model-training-ii-parameter-server-and-allreduce/
177 |
--------------------------------------------------------------------------------
/distributed_training/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | IMAGE=6666688889/distributed_training:0.0.13
3 | docker build -t $IMAGE .
4 | docker push $IMAGE
--------------------------------------------------------------------------------
/distributed_training/images/training_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/distributed_training/images/training_pipeline.png
--------------------------------------------------------------------------------
/distributed_training/mwt.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import multiprocessing
3 | import os
4 | import sys
5 |
6 | import cv2
7 | import numpy as np
8 |
9 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
10 | os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
11 |
12 | import tensorflow as tf
13 | import tqdm
14 |
15 | from nets import nn
16 | from utils import config
17 | from distributed_trainning.utils import image_utils
18 | from utils.dataset import input_fn, DataLoader
19 | import posixpath
20 |
21 | np.random.seed(12345)
22 | tf.random.set_seed(12345)
23 |
24 | tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
25 |
26 | import mlflow
27 | import mlflow.tensorflow
28 |
29 | # Set the MLflow tracking URI
30 | mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI", "http://mlflow:5000"))
31 |
32 |
33 | def train():
34 | strategy = tf.distribute.MultiWorkerMirroredStrategy()
35 |
36 | image_path = posixpath.join(config.data_dir, config.image_dir, "train")
37 | label_path = posixpath.join(config.data_dir, config.label_dir, "train")
38 |
39 | image_files = [
40 | os.path.splitext(file_name)[0]
41 | for file_name in os.listdir(image_path)
42 | if file_name.lower().endswith(".jpg")
43 | ]
44 | label_files = [
45 | os.path.splitext(file_name)[0]
46 | for file_name in os.listdir(label_path)
47 | if file_name.lower().endswith(".txt")
48 | ]
49 |
50 | file_names = list(set(image_files) & set(label_files))
51 |
52 | steps = len(file_names) // config.batch_size
53 | if os.path.exists(os.path.join(config.data_dir, "TF")):
54 | dataset = DataLoader().input_fn(file_names)
55 | else:
56 | dataset = input_fn(file_names)
57 | dataset = strategy.experimental_distribute_dataset(dataset)
58 |
59 | with strategy.scope():
60 | model = nn.build_model()
61 | model.summary()
62 | optimizer = tf.keras.optimizers.Adam(nn.CosineLR(steps), 0.937)
63 |
64 | with strategy.scope():
65 | loss_object = nn.ComputeLoss()
66 |
67 | def compute_loss(y_true, y_pred):
68 | total_loss = loss_object(y_pred, y_true)
69 | return tf.reduce_sum(total_loss) / config.batch_size
70 |
71 | with strategy.scope():
72 |
73 | def train_step(image, y_true):
74 | with tf.GradientTape() as tape:
75 | y_pred = model(image, training=True)
76 | loss = compute_loss(y_true, y_pred)
77 | variables = model.trainable_variables
78 | gradients = tape.gradient(loss, variables)
79 | optimizer.apply_gradients(zip(gradients, variables))
80 | return loss
81 |
82 | with strategy.scope():
83 |
84 | @tf.function
85 | def distributed_train_step(image, y_true):
86 | per_replica_losses = strategy.run(train_step, args=(image, y_true))
87 | return strategy.reduce(
88 | tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None
89 | )
90 |
91 | def train_fn():
92 | if not os.path.exists("weights"):
93 | os.makedirs("weights")
94 | pb = tf.keras.utils.Progbar(steps, stateful_metrics=["loss"])
95 | print(f"[INFO] {len(file_names)} data points")
96 |
97 | # Start MLflow run
98 | with mlflow.start_run():
99 | mlflow.log_param("batch_size", config.batch_size)
100 | mlflow.log_param("num_epochs", config.num_epochs)
101 |
102 | for step, inputs in enumerate(dataset):
103 | if step % steps == 0:
104 | print(f"Epoch {step // steps + 1}/{config.num_epochs}")
105 | pb = tf.keras.utils.Progbar(steps, stateful_metrics=["loss"])
106 | step += 1
107 | image, y_true_1, y_true_2, y_true_3 = inputs
108 | y_true = (y_true_1, y_true_2, y_true_3)
109 | loss = distributed_train_step(image, y_true)
110 | pb.add(1, [("loss", loss.numpy())])
111 |
112 | # Log loss to MLflow
113 | mlflow.log_metric("loss", loss.numpy(), step=step)
114 |
115 | if step % steps == 0:
116 | model.save_weights(
117 | os.path.join("weights", f"model_{config.version}.h5")
118 | )
119 | # Log model checkpoint to MLflow
120 | mlflow.log_artifact(
121 | os.path.join("weights", f"model_{config.version}.h5")
122 | )
123 | if step // steps == config.num_epochs:
124 | mlflow.tensorflow.log_model(model, "model")
125 | sys.exit("--- Stop Training ---")
126 |
127 | train_fn()
128 |
129 |
130 | # Rest of your script remains unchanged
131 | def test():
132 | def draw_bbox(image, boxes):
133 | for box in boxes:
134 | coordinate = np.array(box[:4], dtype=np.int32)
135 | c1, c2 = (coordinate[0], coordinate[1]), (coordinate[2], coordinate[3])
136 | cv2.rectangle(image, c1, c2, (255, 0, 0), 1)
137 | return image
138 |
139 | def test_fn():
140 | if not os.path.exists("results"):
141 | os.makedirs("results")
142 | image_path = posixpath.join(config.data_dir, config.image_dir, "valid")
143 | label_path = posixpath.join(config.data_dir, config.label_dir, "valid")
144 |
145 | image_files = [
146 | os.path.splitext(file_name)[0]
147 | for file_name in os.listdir(image_path)
148 | if file_name.lower().endswith(".jpg")
149 | ]
150 | label_files = [
151 | os.path.splitext(file_name)[0]
152 | for file_name in os.listdir(label_path)
153 | if file_name.lower().endswith(".txt")
154 | ]
155 |
156 | file_names = list(set(image_files) & set(label_files))
157 |
158 | model = nn.build_model(training=False)
159 | model.load_weights(f"weights/model_{config.version}.h5", True)
160 |
161 | for file_name in tqdm.tqdm(file_names):
162 | image = cv2.imread(
163 | posixpath.join(
164 | config.data_dir, config.image_dir, "valid", file_name + ".jpg"
165 | )
166 | )
167 | image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
168 |
169 | image_np, scale, dw, dh = image_utils.resize(image_np)
170 | image_np = image_np.astype(np.float32) / 255.0
171 |
172 | boxes, scores, labels = model.predict(image_np[np.newaxis, ...])
173 |
174 | boxes, scores, labels = (
175 | np.squeeze(boxes, 0),
176 | np.squeeze(scores, 0),
177 | np.squeeze(labels, 0),
178 | )
179 |
180 | boxes[:, [0, 2]] = (boxes[:, [0, 2]] - dw) / scale
181 | boxes[:, [1, 3]] = (boxes[:, [1, 3]] - dh) / scale
182 | image = draw_bbox(image, boxes)
183 | cv2.imwrite(f"results/{file_name}.jpg", image)
184 |
185 | test_fn()
186 |
187 |
188 | def write_tf_record(queue, sentinel):
189 | def byte_feature(value):
190 | if not isinstance(value, bytes):
191 | if not isinstance(value, list):
192 | value = value.encode("utf-8")
193 | else:
194 | value = [val.encode("utf-8") for val in value]
195 | if not isinstance(value, list):
196 | value = [value]
197 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
198 |
199 | while True:
200 | file_name = queue.get()
201 |
202 | if file_name == sentinel:
203 | break
204 | in_image = image_utils.load_image(file_name)[:, :, ::-1]
205 | boxes, label = image_utils.load_label(file_name)
206 |
207 | in_image, boxes = image_utils.resize(in_image, boxes)
208 |
209 | y_true_1, y_true_2, y_true_3 = image_utils.process_box(boxes, label)
210 |
211 | in_image = in_image.astype("float32")
212 | y_true_1 = y_true_1.astype("float32")
213 | y_true_2 = y_true_2.astype("float32")
214 | y_true_3 = y_true_3.astype("float32")
215 |
216 | in_image = in_image.tobytes()
217 | y_true_1 = y_true_1.tobytes()
218 | y_true_2 = y_true_2.tobytes()
219 | y_true_3 = y_true_3.tobytes()
220 |
221 | features = tf.train.Features(
222 | feature={
223 | "in_image": byte_feature(in_image),
224 | "y_true_1": byte_feature(y_true_1),
225 | "y_true_2": byte_feature(y_true_2),
226 | "y_true_3": byte_feature(y_true_3),
227 | }
228 | )
229 | tf_example = tf.train.Example(features=features)
230 | opt = tf.io.TFRecordOptions("GZIP")
231 | with tf.io.TFRecordWriter(
232 | os.path.join(config.data_dir, "TF", file_name + ".tf"), opt
233 | ) as writer:
234 | writer.write(tf_example.SerializeToString())
235 |
236 |
237 | def generate_tf_record():
238 | if not os.path.exists(os.path.join(config.data_dir, "TF")):
239 | os.makedirs(os.path.join(config.data_dir, "TF"))
240 | file_names = []
241 | with open(os.path.join(config.data_dir, "train.txt")) as reader:
242 | for line in reader.readlines():
243 | file_names.append(line.rstrip().split(" ")[0])
244 | sentinel = ("", [])
245 | queue = multiprocessing.Manager().Queue()
246 | for file_name in tqdm.tqdm(file_names):
247 | queue.put(file_name)
248 | for _ in range(os.cpu_count()):
249 | queue.put(sentinel)
250 | print("[INFO] generating TF record")
251 | process_pool = []
252 | for i in range(os.cpu_count()):
253 | process = multiprocessing.Process(
254 | target=write_tf_record, args=(queue, sentinel)
255 | )
256 | process_pool.append(process)
257 | process.start()
258 | for process in process_pool:
259 | process.join()
260 |
261 |
262 | class AnchorGenerator:
263 | def __init__(self, num_cluster):
264 | self.num_cluster = num_cluster
265 |
266 | def iou(self, boxes, clusters): # 1 box -> k clusters
267 | n = boxes.shape[0]
268 | k = self.num_cluster
269 |
270 | box_area = boxes[:, 0] * boxes[:, 1]
271 | box_area = box_area.repeat(k)
272 | box_area = np.reshape(box_area, (n, k))
273 |
274 | cluster_area = clusters[:, 0] * clusters[:, 1]
275 | cluster_area = np.tile(cluster_area, [1, n])
276 | cluster_area = np.reshape(cluster_area, (n, k))
277 |
278 | box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))
279 | cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))
280 | min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
281 |
282 | box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
283 | cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
284 | min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
285 | inter_area = np.multiply(min_w_matrix, min_h_matrix)
286 |
287 | return inter_area / (box_area + cluster_area - inter_area)
288 |
289 | def avg_iou(self, boxes, clusters):
290 | accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
291 | return accuracy
292 |
293 | def generator(self, boxes, k, dist=np.median):
294 | box_number = boxes.shape[0]
295 | last_nearest = np.zeros((box_number,))
296 | clusters = boxes[
297 | np.random.choice(box_number, k, replace=False)
298 | ] # init k clusters
299 | while True:
300 | distances = 1 - self.iou(boxes, clusters)
301 |
302 | current_nearest = np.argmin(distances, axis=1)
303 | if (last_nearest == current_nearest).all():
304 | break # clusters won't change
305 | for cluster in range(k):
306 | clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0)
307 | last_nearest = current_nearest
308 |
309 | return clusters
310 |
311 | def generate_anchor(self):
312 | boxes = self.get_boxes()
313 | result = self.generator(boxes, k=self.num_cluster)
314 | result = result[np.lexsort(result.T[0, None])]
315 | print("\nAnchors: \n{}".format(result))
316 | print("\nFitness: {:.4f}".format(self.avg_iou(boxes, result)))
317 |
318 | @staticmethod
319 | def get_boxes():
320 | boxes = []
321 | file_names = [
322 | file_name[:-4]
323 | for file_name in os.listdir(
324 | posixpath.join(config.data_dir, config.label_dir)
325 | )
326 | ]
327 | for file_name in file_names:
328 | for box in image_utils.load_label(file_name)[0]:
329 | boxes.append([box[2] - box[0], box[3] - box[1]])
330 | return np.array(boxes)
331 |
332 |
333 | if __name__ == "__main__":
334 | parser = argparse.ArgumentParser()
335 | parser.add_argument("--anchor", action="store_true")
336 | parser.add_argument("--record", action="store_true")
337 | parser.add_argument("--train", action="store_true")
338 | parser.add_argument("--test", action="store_true")
339 |
340 | args = parser.parse_args()
341 | if args.anchor:
342 | AnchorGenerator(9).generate_anchor()
343 | if args.record:
344 | generate_tf_record()
345 | if args.train:
346 | train()
347 | if args.test:
348 | test()
349 |
--------------------------------------------------------------------------------
/distributed_training/nets/nn.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import tensorflow as tf
4 | from tensorflow.keras import backend
5 | from tensorflow.keras import layers
6 |
7 | from utils import config
8 |
9 | initializer = tf.random_normal_initializer(stddev=0.01)
10 | l2 = tf.keras.regularizers.l2(4e-5)
11 |
12 |
13 | def conv(x, filters, k=1, s=1):
14 | if s == 2:
15 | x = layers.ZeroPadding2D(((1, 0), (1, 0)))(x)
16 | padding = "valid"
17 | else:
18 | padding = "same"
19 | x = layers.Conv2D(
20 | filters,
21 | k,
22 | s,
23 | padding,
24 | use_bias=False,
25 | kernel_initializer=initializer,
26 | kernel_regularizer=l2,
27 | )(x)
28 | x = layers.BatchNormalization(momentum=0.03)(x)
29 | x = layers.Activation(tf.nn.swish)(x)
30 | return x
31 |
32 |
33 | def residual(x, filters, add=True):
34 | inputs = x
35 | if add:
36 | x = conv(x, filters, 1)
37 | x = conv(x, filters, 3)
38 | x = inputs + x
39 | else:
40 | x = conv(x, filters, 1)
41 | x = conv(x, filters, 3)
42 | return x
43 |
44 |
45 | def csp(x, filters, n, add=True):
46 | y = conv(x, filters // 2)
47 | for _ in range(n):
48 | y = residual(y, filters // 2, add)
49 |
50 | x = conv(x, filters // 2)
51 | x = layers.concatenate([x, y])
52 |
53 | x = conv(x, filters)
54 | return x
55 |
56 |
57 | def build_model(training=True):
58 | depth = config.depth[config.versions.index(config.version)]
59 | width = config.width[config.versions.index(config.version)]
60 |
61 | inputs = layers.Input([config.image_size, config.image_size, 3])
62 | x = tf.nn.space_to_depth(inputs, 2)
63 | x = conv(x, int(round(width * 64)), 3)
64 | x = conv(x, int(round(width * 128)), 3, 2)
65 | x = csp(x, int(round(width * 128)), int(round(depth * 3)))
66 |
67 | x = conv(x, int(round(width * 256)), 3, 2)
68 | x = csp(x, int(round(width * 256)), int(round(depth * 9)))
69 | x1 = x
70 |
71 | x = conv(x, int(round(width * 512)), 3, 2)
72 | x = csp(x, int(round(width * 512)), int(round(depth * 9)))
73 | x2 = x
74 |
75 | x = conv(x, int(round(width * 1024)), 3, 2)
76 | x = conv(x, int(round(width * 512)), 1, 1)
77 | x = layers.concatenate(
78 | [
79 | x,
80 | tf.nn.max_pool(x, 5, 1, "SAME"),
81 | tf.nn.max_pool(x, 9, 1, "SAME"),
82 | tf.nn.max_pool(x, 13, 1, "SAME"),
83 | ]
84 | )
85 | x = conv(x, int(round(width * 1024)), 1, 1)
86 | x = csp(x, int(round(width * 1024)), int(round(depth * 3)), False)
87 |
88 | x = conv(x, int(round(width * 512)), 1)
89 | x3 = x
90 | x = layers.UpSampling2D()(x)
91 | x = layers.concatenate([x, x2])
92 | x = csp(x, int(round(width * 512)), int(round(depth * 3)), False)
93 |
94 | x = conv(x, int(round(width * 256)), 1)
95 | x4 = x
96 | x = layers.UpSampling2D()(x)
97 | x = layers.concatenate([x, x1])
98 | x = csp(x, int(round(width * 256)), int(round(depth * 3)), False)
99 | p3 = layers.Conv2D(
100 | 3 * (len(config.class_dict) + 5),
101 | 1,
102 | name=f"p3_{len(config.class_dict)}",
103 | kernel_initializer=initializer,
104 | kernel_regularizer=l2,
105 | )(x)
106 |
107 | x = conv(x, int(round(width * 256)), 3, 2)
108 | x = layers.concatenate([x, x4])
109 | x = csp(x, int(round(width * 512)), int(round(depth * 3)), False)
110 | p4 = layers.Conv2D(
111 | 3 * (len(config.class_dict) + 5),
112 | 1,
113 | name=f"p4_{len(config.class_dict)}",
114 | kernel_initializer=initializer,
115 | kernel_regularizer=l2,
116 | )(x)
117 |
118 | x = conv(x, int(round(width * 512)), 3, 2)
119 | x = layers.concatenate([x, x3])
120 | x = csp(x, int(round(width * 1024)), int(round(depth * 3)), False)
121 | p5 = layers.Conv2D(
122 | 3 * (len(config.class_dict) + 5),
123 | 1,
124 | name=f"p5_{len(config.class_dict)}",
125 | kernel_initializer=initializer,
126 | kernel_regularizer=l2,
127 | )(x)
128 |
129 | if training:
130 | return tf.keras.Model(inputs, [p5, p4, p3])
131 | else:
132 | return tf.keras.Model(inputs, Predict()([p5, p4, p3]))
133 |
134 |
135 | def process_layer(feature_map, anchors):
136 | grid_size = tf.shape(feature_map)[1:3]
137 | ratio = tf.cast(
138 | tf.constant([config.image_size, config.image_size]) / grid_size, tf.float32
139 | )
140 | rescaled_anchors = [
141 | (anchor[0] / ratio[1], anchor[1] / ratio[0]) for anchor in anchors
142 | ]
143 |
144 | feature_map = tf.reshape(
145 | feature_map, [-1, grid_size[0], grid_size[1], 3, 5 + len(config.class_dict)]
146 | )
147 |
148 | box_centers, box_sizes, conf, prob = tf.split(
149 | feature_map, [2, 2, 1, len(config.class_dict)], axis=-1
150 | )
151 | box_centers = tf.nn.sigmoid(box_centers)
152 |
153 | grid_x = tf.range(grid_size[1], dtype=tf.int32)
154 | grid_y = tf.range(grid_size[0], dtype=tf.int32)
155 | grid_x, grid_y = tf.meshgrid(grid_x, grid_y)
156 | x_offset = tf.reshape(grid_x, (-1, 1))
157 | y_offset = tf.reshape(grid_y, (-1, 1))
158 | x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
159 | x_y_offset = tf.cast(
160 | tf.reshape(x_y_offset, [grid_size[0], grid_size[1], 1, 2]), tf.float32
161 | )
162 |
163 | box_centers = box_centers + x_y_offset
164 | box_centers = box_centers * ratio[::-1]
165 |
166 | box_sizes = tf.exp(box_sizes) * rescaled_anchors
167 | box_sizes = box_sizes * ratio[::-1]
168 |
169 | boxes = tf.concat([box_centers, box_sizes], axis=-1)
170 |
171 | return x_y_offset, boxes, conf, prob
172 |
173 |
174 | def box_iou(pred_boxes, valid_true_boxes):
175 | pred_box_xy = pred_boxes[..., 0:2]
176 | pred_box_wh = pred_boxes[..., 2:4]
177 |
178 | pred_box_xy = tf.expand_dims(pred_box_xy, -2)
179 | pred_box_wh = tf.expand_dims(pred_box_wh, -2)
180 |
181 | true_box_xy = valid_true_boxes[:, 0:2]
182 | true_box_wh = valid_true_boxes[:, 2:4]
183 |
184 | intersect_min = tf.maximum(
185 | pred_box_xy - pred_box_wh / 2.0, true_box_xy - true_box_wh / 2.0
186 | )
187 | intersect_max = tf.minimum(
188 | pred_box_xy + pred_box_wh / 2.0, true_box_xy + true_box_wh / 2.0
189 | )
190 |
191 | intersect_wh = tf.maximum(intersect_max - intersect_min, 0.0)
192 |
193 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
194 | pred_box_area = pred_box_wh[..., 0] * pred_box_wh[..., 1]
195 | true_box_area = true_box_wh[..., 0] * true_box_wh[..., 1]
196 | true_box_area = tf.expand_dims(true_box_area, axis=0)
197 |
198 | return intersect_area / (pred_box_area + true_box_area - intersect_area + 1e-10)
199 |
200 |
201 | def compute_nms(args):
202 | boxes, classification = args
203 |
204 | def nms_fn(score, label):
205 | score_indices = tf.where(backend.greater(score, config.threshold))
206 |
207 | filtered_boxes = tf.gather_nd(boxes, score_indices)
208 | filtered_scores = backend.gather(score, score_indices)[:, 0]
209 |
210 | nms_indices = tf.image.non_max_suppression(
211 | filtered_boxes, filtered_scores, config.max_boxes, 0.1
212 | )
213 | score_indices = backend.gather(score_indices, nms_indices)
214 |
215 | label = tf.gather_nd(label, score_indices)
216 | score_indices = backend.stack([score_indices[:, 0], label], axis=1)
217 |
218 | return score_indices
219 |
220 | all_indices = []
221 | for c in range(int(classification.shape[1])):
222 | scores = classification[:, c]
223 | labels = c * tf.ones((backend.shape(scores)[0],), dtype="int64")
224 | all_indices.append(nms_fn(scores, labels))
225 | indices = backend.concatenate(all_indices, axis=0)
226 |
227 | scores = tf.gather_nd(classification, indices)
228 | labels = indices[:, 1]
229 | scores, top_indices = tf.nn.top_k(
230 | scores, k=backend.minimum(config.max_boxes, backend.shape(scores)[0])
231 | )
232 |
233 | indices = backend.gather(indices[:, 0], top_indices)
234 | boxes = backend.gather(boxes, indices)
235 | labels = backend.gather(labels, top_indices)
236 |
237 | pad_size = backend.maximum(0, config.max_boxes - backend.shape(scores)[0])
238 |
239 | boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1)
240 | scores = tf.pad(scores, [[0, pad_size]], constant_values=-1)
241 | labels = tf.pad(labels, [[0, pad_size]], constant_values=-1)
242 | labels = backend.cast(labels, "int32")
243 |
244 | boxes.set_shape([config.max_boxes, 4])
245 | scores.set_shape([config.max_boxes])
246 | labels.set_shape([config.max_boxes])
247 |
248 | return [boxes, scores, labels]
249 |
250 |
251 | class ComputeLoss(object):
252 | def __init__(self):
253 | super().__init__()
254 |
255 | @staticmethod
256 | def compute_loss(y_pred, y_true, anchors):
257 | grid_size = tf.shape(y_pred)[1:3]
258 | ratio = tf.cast(
259 | tf.constant([config.image_size, config.image_size]) / grid_size, tf.float32
260 | )
261 | batch_size = tf.cast(tf.shape(y_pred)[0], tf.float32)
262 |
263 | x_y_offset, pred_boxes, pred_conf, pred_prob = process_layer(y_pred, anchors)
264 |
265 | object_mask = y_true[..., 4:5]
266 |
267 | def cond(idx, _):
268 | return tf.less(idx, tf.cast(batch_size, tf.int32))
269 |
270 | def body(idx, mask):
271 | valid_true_boxes = tf.boolean_mask(
272 | y_true[idx, ..., 0:4], tf.cast(object_mask[idx, ..., 0], "bool")
273 | )
274 | iou = box_iou(pred_boxes[idx], valid_true_boxes)
275 | return idx + 1, mask.write(
276 | idx, tf.cast(tf.reduce_max(iou, axis=-1) < 0.2, tf.float32)
277 | )
278 |
279 | ignore_mask = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
280 |
281 | _, ignore_mask = tf.while_loop(cond=cond, body=body, loop_vars=[0, ignore_mask])
282 | ignore_mask = ignore_mask.stack()
283 | ignore_mask = tf.expand_dims(ignore_mask, -1)
284 |
285 | true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset
286 | pred_xy = pred_boxes[..., 0:2] / ratio[::-1] - x_y_offset
287 |
288 | true_tw_th = y_true[..., 2:4] / anchors
289 | pred_tw_th = pred_boxes[..., 2:4] / anchors
290 | true_tw_th = tf.where(
291 | tf.equal(true_tw_th, 0), tf.ones_like(true_tw_th), true_tw_th
292 | )
293 | pred_tw_th = tf.where(
294 | tf.equal(pred_tw_th, 0), tf.ones_like(pred_tw_th), pred_tw_th
295 | )
296 | true_tw_th = tf.math.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9))
297 | pred_tw_th = tf.math.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9))
298 |
299 | box_loss_scale = y_true[..., 2:3] * y_true[..., 3:4]
300 | box_loss_scale = 2.0 - box_loss_scale / tf.cast(
301 | config.image_size**2, tf.float32
302 | )
303 |
304 | xy_loss = tf.reduce_sum(
305 | tf.square(true_xy - pred_xy) * object_mask * box_loss_scale
306 | )
307 | wh_loss = tf.reduce_sum(
308 | tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale
309 | )
310 |
311 | conf_pos_mask = object_mask
312 | conf_neg_mask = (1 - object_mask) * ignore_mask
313 | conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(
314 | labels=object_mask, logits=pred_conf
315 | )
316 | conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(
317 | labels=object_mask, logits=pred_conf
318 | )
319 |
320 | conf_loss = tf.reduce_sum((conf_loss_pos + conf_loss_neg))
321 |
322 | true_conf = y_true[..., 5:]
323 |
324 | class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(
325 | true_conf, pred_prob
326 | )
327 | class_loss = tf.reduce_sum(class_loss)
328 |
329 | return xy_loss + wh_loss + conf_loss + class_loss
330 |
331 | def __call__(self, y_pred, y_true):
332 | loss = 0.0
333 | anchor_group = [config.anchors[6:9], config.anchors[3:6], config.anchors[0:3]]
334 |
335 | for i in range(len(y_pred)):
336 | loss += self.compute_loss(y_pred[i], y_true[i], anchor_group[i])
337 | return loss
338 |
339 |
340 | class CosineLR(tf.optimizers.schedules.LearningRateSchedule):
341 | def __init__(self, steps):
342 | super().__init__()
343 | self.lr = 0.008 * config.batch_size / 64
344 | self.warmup_init = 0.0008
345 | self.warmup_step = steps
346 | self.decay_steps = tf.cast(
347 | (config.num_epochs - 1) * self.warmup_step, tf.float32
348 | )
349 |
350 | def __call__(self, step):
351 | linear_warmup = (
352 | tf.cast(step, dtype=tf.float32)
353 | / self.warmup_step
354 | * (self.lr - self.warmup_init)
355 | )
356 | cosine_lr = (
357 | 0.5
358 | * self.lr
359 | * (1 + tf.cos(math.pi * tf.cast(step, tf.float32) / self.decay_steps))
360 | )
361 | return tf.where(
362 | step < self.warmup_step, self.warmup_init + linear_warmup, cosine_lr
363 | )
364 |
365 | def get_config(self):
366 | pass
367 |
368 |
369 | class Predict(layers.Layer):
370 | def __init__(self):
371 | super().__init__()
372 |
373 | def call(self, inputs, **kwargs):
374 | y_pred = [
375 | (inputs[0], config.anchors[6:9]),
376 | (inputs[1], config.anchors[3:6]),
377 | (inputs[2], config.anchors[0:3]),
378 | ]
379 |
380 | boxes_list, conf_list, prob_list = [], [], []
381 | for result in [
382 | process_layer(feature_map, anchors) for (feature_map, anchors) in y_pred
383 | ]:
384 | x_y_offset, box, conf, prob = result
385 | grid_size = tf.shape(x_y_offset)[:2]
386 | box = tf.reshape(box, [-1, grid_size[0] * grid_size[1] * 3, 4])
387 | conf = tf.reshape(conf, [-1, grid_size[0] * grid_size[1] * 3, 1])
388 | prob = tf.reshape(
389 | prob, [-1, grid_size[0] * grid_size[1] * 3, len(config.class_dict)]
390 | )
391 | boxes_list.append(box)
392 | conf_list.append(tf.sigmoid(conf))
393 | prob_list.append(tf.sigmoid(prob))
394 |
395 | boxes = tf.concat(boxes_list, axis=1)
396 | conf = tf.concat(conf_list, axis=1)
397 | prob = tf.concat(prob_list, axis=1)
398 |
399 | center_x, center_y, w, h = tf.split(boxes, [1, 1, 1, 1], axis=-1)
400 | x_min = center_x - w / 2
401 | y_min = center_y - h / 2
402 | x_max = center_x + w / 2
403 | y_max = center_y + h / 2
404 |
405 | boxes = tf.concat([x_min, y_min, x_max, y_max], axis=-1)
406 |
407 | outputs = tf.map_fn(
408 | fn=compute_nms,
409 | elems=[boxes, conf * prob],
410 | dtype=["float32", "float32", "int32"],
411 | parallel_iterations=100,
412 | )
413 |
414 | return outputs
415 |
416 | def compute_output_shape(self, input_shape):
417 | return [
418 | (input_shape[0][0], config.max_boxes, 4),
419 | (input_shape[1][0], config.max_boxes),
420 | (input_shape[1][0], config.max_boxes),
421 | ]
422 |
423 | def compute_mask(self, inputs, mask=None):
424 | return (len(inputs) + 1) * [None]
425 |
426 | def get_config(self):
427 | return super().get_config()
428 |
--------------------------------------------------------------------------------
/distributed_training/test/test.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: nginx
5 | spec:
6 | containers:
7 | - name: nginx
8 | image: nginx
9 | volumeMounts:
10 | - mountPath: /train
11 | name: training
12 | volumes:
13 | - name: training
14 | persistentVolumeClaim:
15 | claimName: mwt-volume
16 |
--------------------------------------------------------------------------------
/distributed_training/utils/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy
3 | import posixpath
4 |
5 | width = [0.50, 0.75, 1.0, 1.25]
6 | depth = [0.33, 0.67, 1.0, 1.33]
7 |
8 | versions = ["s", "m", "l", "x"]
9 | # data_dir = os.path.join('..', 'Dataset')
10 | data_dir = posixpath.join(".", "Dataset")
11 |
12 | threshold = 0.3
13 | max_boxes = 150
14 | image_dir = "images"
15 | label_dir = "labels"
16 |
17 | num_epochs = 2
18 | batch_size = 32
19 | image_size = 640
20 | class_dict = {
21 | "person": 0,
22 | "bicycle": 1,
23 | "car": 2,
24 | "motorcycle": 3,
25 | "airplane": 4,
26 | "bus": 5,
27 | "train": 6,
28 | "truck": 7,
29 | "boat": 8,
30 | "traffic light": 9,
31 | "fire hydrant": 10,
32 | "stop sign": 11,
33 | "parking meter": 12,
34 | "bench": 13,
35 | "bird": 14,
36 | "cat": 15,
37 | "dog": 16,
38 | "horse": 17,
39 | "sheep": 18,
40 | "cow": 19,
41 | "elephant": 20,
42 | "bear": 21,
43 | "zebra": 22,
44 | "giraffe": 23,
45 | "backpack": 24,
46 | "umbrella": 25,
47 | "handbag": 26,
48 | "tie": 27,
49 | "suitcase": 28,
50 | "frisbee": 29,
51 | "skis": 30,
52 | "snowboard": 31,
53 | "sports ball": 32,
54 | "kite": 33,
55 | "baseball bat": 34,
56 | "baseball glove": 35,
57 | "skateboard": 36,
58 | "surfboard": 37,
59 | "tennis racket": 38,
60 | "bottle": 39,
61 | "wine glass": 40,
62 | "cup": 41,
63 | "fork": 42,
64 | "knife": 43,
65 | "spoon": 44,
66 | "bowl": 45,
67 | "banana": 46,
68 | "apple": 47,
69 | "sandwich": 48,
70 | "orange": 49,
71 | "broccoli": 50,
72 | "carrot": 51,
73 | "hot dog": 52,
74 | "pizza": 53,
75 | "donut": 54,
76 | "cake": 55,
77 | "chair": 56,
78 | "couch": 57,
79 | "potted plant": 58,
80 | "bed": 59,
81 | "dining table": 60,
82 | "toilet": 61,
83 | "tv": 62,
84 | "laptop": 63,
85 | "mouse": 64,
86 | "remote": 65,
87 | "keyboard": 66,
88 | "cell phone": 67,
89 | "microwave": 68,
90 | "oven": 69,
91 | "toaster": 70,
92 | "sink": 71,
93 | "refrigerator": 72,
94 | "book": 73,
95 | "clock": 74,
96 | "vase": 75,
97 | "scissors": 76,
98 | "teddy bear": 77,
99 | "hair drier": 78,
100 | "toothbrush": 79,
101 | }
102 |
103 | version = "s"
104 | anchors = numpy.array(
105 | [
106 | [8.0, 9.0],
107 | [16.0, 24.0],
108 | [28.0, 58.0],
109 | [41.0, 25.0],
110 | [58.0, 125.0],
111 | [71.0, 52.0],
112 | [129.0, 97.0],
113 | [163.0, 218.0],
114 | [384.0, 347.0],
115 | ],
116 | numpy.float32,
117 | )
118 |
--------------------------------------------------------------------------------
/distributed_training/utils/dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy
3 | import tensorflow as tf
4 | from tensorflow.keras import utils
5 | import posixpath
6 | from distributed_trainning.utils import image_utils
7 | from utils import config
8 |
9 |
10 | class Generator(utils.Sequence):
11 | def __init__(self, file_names):
12 | self.file_names = file_names
13 |
14 | def __len__(self):
15 | return int(numpy.floor(len(self.file_names) / config.batch_size))
16 |
17 | def __getitem__(self, index):
18 | image = image_utils.load_image(self.file_names[index])
19 | boxes = image_utils.load_label(self.file_names[index])
20 | image, boxes = image_utils.resize(image, boxes)
21 | # image, boxes = util.random_flip(image, boxes)
22 |
23 | image = image[:, :, ::-1].astype(numpy.float32)
24 | image = image / 255.0
25 | y_true_1, y_true_2, y_true_3 = image_utils.process_box(boxes)
26 | return image, y_true_1, y_true_2, y_true_3
27 |
28 | def on_epoch_end(self):
29 | numpy.random.shuffle(self.file_names)
30 |
31 |
32 | def input_fn(file_names):
33 | def generator_fn():
34 | generator = utils.OrderedEnqueuer(Generator(file_names), True)
35 | generator.start(workers=min(os.cpu_count(), config.batch_size))
36 | while True:
37 | image, y_true_1, y_true_2, y_true_3 = generator.get().__next__()
38 | yield image, y_true_1, y_true_2, y_true_3
39 |
40 | output_types = (tf.float32, tf.float32, tf.float32, tf.float32)
41 | output_shapes = (
42 | (config.image_size, config.image_size, 3),
43 | (
44 | config.image_size // 32,
45 | config.image_size // 32,
46 | 3,
47 | len(config.class_dict) + 5,
48 | ),
49 | (
50 | config.image_size // 16,
51 | config.image_size // 16,
52 | 3,
53 | len(config.class_dict) + 5,
54 | ),
55 | (config.image_size // 8, config.image_size // 8, 3, len(config.class_dict) + 5),
56 | )
57 |
58 | dataset = tf.data.Dataset.from_generator(
59 | generator=generator_fn, output_types=output_types, output_shapes=output_shapes
60 | )
61 |
62 | dataset = dataset.repeat(config.num_epochs + 1)
63 | dataset = dataset.batch(config.batch_size)
64 | dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
65 | return dataset
66 |
67 |
68 | class DataLoader:
69 | def __init__(self):
70 | super().__init__()
71 | self.description = {
72 | "in_image": tf.io.FixedLenFeature([], tf.string),
73 | "y_true_1": tf.io.FixedLenFeature([], tf.string),
74 | "y_true_2": tf.io.FixedLenFeature([], tf.string),
75 | "y_true_3": tf.io.FixedLenFeature([], tf.string),
76 | }
77 |
78 | def parse_data(self, tf_record):
79 | features = tf.io.parse_single_example(tf_record, self.description)
80 |
81 | in_image = tf.io.decode_raw(features["in_image"], tf.float32)
82 | in_image = tf.reshape(in_image, (config.image_size, config.image_size, 3))
83 | in_image = in_image / 255.0
84 |
85 | y_true_1 = tf.io.decode_raw(features["y_true_1"], tf.float32)
86 | y_true_1 = tf.reshape(
87 | y_true_1,
88 | (
89 | config.image_size // 32,
90 | config.image_size // 32,
91 | 3,
92 | 5 + len(config.class_dict),
93 | ),
94 | )
95 |
96 | y_true_2 = tf.io.decode_raw(features["y_true_2"], tf.float32)
97 | y_true_2 = tf.reshape(
98 | y_true_2,
99 | (
100 | config.image_size // 16,
101 | config.image_size // 16,
102 | 3,
103 | 5 + len(config.class_dict),
104 | ),
105 | )
106 |
107 | y_true_3 = tf.io.decode_raw(features["y_true_3"], tf.float32)
108 | y_true_3 = tf.reshape(
109 | y_true_3,
110 | (
111 | config.image_size // 8,
112 | config.image_size // 8,
113 | 3,
114 | 5 + len(config.class_dict),
115 | ),
116 | )
117 |
118 | return in_image, y_true_1, y_true_2, y_true_3
119 |
120 | def input_fn(self, file_names):
121 | dataset = tf.data.TFRecordDataset(file_names, "GZIP")
122 | dataset = dataset.map(self.parse_data, os.cpu_count())
123 | dataset = dataset.repeat(config.num_epochs + 1)
124 | dataset = dataset.batch(config.batch_size)
125 | dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
126 | return dataset
127 |
--------------------------------------------------------------------------------
/distributed_training/utils/image_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import xml.etree.ElementTree
3 | import posixpath
4 | import cv2
5 | import numpy
6 |
7 | from utils import config
8 |
9 |
10 | def load_image(file_name, split="train"):
11 | path = posixpath.join(config.data_dir, config.image_dir, split, file_name + ".jpg")
12 | image = cv2.imread(path)
13 | return image
14 |
15 |
16 | def load_label(file_name, split="train"):
17 | # Construct the new path for the label file
18 | path = posixpath.join(config.data_dir, config.label_dir, split, file_name + ".txt")
19 |
20 | boxes = []
21 |
22 | # Read the text file line by line
23 | with open(path, "r") as f:
24 | for line in f:
25 | # Split the line into coordinates
26 | _, x_min, y_min, x_max, y_max = line.strip().split()
27 | x_min = float(x_min)
28 | y_min = float(y_min)
29 | x_max = float(x_max)
30 | y_max = float(y_max)
31 |
32 | boxes.append([x_min, y_min, x_max, y_max])
33 |
34 | boxes = numpy.asarray(boxes, numpy.float32)
35 | return boxes
36 |
37 |
38 | def resize(image, boxes=None):
39 | shape = image.shape[:2]
40 | scale = min(config.image_size / shape[1], config.image_size / shape[0])
41 | image = cv2.resize(image, (int(scale * shape[1]), int(scale * shape[0])))
42 |
43 | image_padded = numpy.zeros([config.image_size, config.image_size, 3], numpy.uint8)
44 |
45 | dw = (config.image_size - int(scale * shape[1])) // 2
46 | dh = (config.image_size - int(scale * shape[0])) // 2
47 |
48 | image_padded[
49 | dh : int(scale * shape[0]) + dh, dw : int(scale * shape[1]) + dw, :
50 | ] = image.copy()
51 |
52 | if boxes is None:
53 | return image_padded, scale, dw, dh
54 |
55 | else:
56 | boxes[:, [0, 2]] = boxes[:, [0, 2]] * scale + dw
57 | boxes[:, [1, 3]] = boxes[:, [1, 3]] * scale + dh
58 |
59 | return image_padded, boxes
60 |
61 |
62 | def random_flip(image, boxes):
63 | if numpy.random.uniform() < 0.5:
64 | image = cv2.flip(image, 1)
65 | boxes[:, 0] = image.shape[1] - boxes[:, 2]
66 | boxes[:, 2] = image.shape[1] - boxes[:, 0]
67 | return image, boxes
68 |
69 |
70 | def process_box(boxes):
71 | anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
72 | anchors = config.anchors
73 | box_centers = (boxes[:, 0:2] + boxes[:, 2:4]) / 2
74 | box_size = boxes[:, 2:4] - boxes[:, 0:2]
75 |
76 | y_true_1 = numpy.zeros(
77 | (
78 | config.image_size // 32,
79 | config.image_size // 32,
80 | 3,
81 | 5 + len(config.class_dict),
82 | ),
83 | numpy.float32,
84 | )
85 | y_true_2 = numpy.zeros(
86 | (
87 | config.image_size // 16,
88 | config.image_size // 16,
89 | 3,
90 | 5 + len(config.class_dict),
91 | ),
92 | numpy.float32,
93 | )
94 | y_true_3 = numpy.zeros(
95 | (config.image_size // 8, config.image_size // 8, 3, 5 + len(config.class_dict)),
96 | numpy.float32,
97 | )
98 |
99 | y_true = [y_true_1, y_true_2, y_true_3]
100 |
101 | box_size = numpy.expand_dims(box_size, 1)
102 |
103 | min_np = numpy.maximum(-box_size / 2, -anchors / 2)
104 | max_np = numpy.minimum(box_size / 2, anchors / 2)
105 |
106 | whs = max_np - min_np
107 |
108 | overlap = whs[:, :, 0] * whs[:, :, 1]
109 | union = (
110 | box_size[:, :, 0] * box_size[:, :, 1]
111 | + anchors[:, 0] * anchors[:, 1]
112 | - whs[:, :, 0] * whs[:, :, 1]
113 | + 1e-10
114 | )
115 |
116 | iou = overlap / union
117 | best_match_idx = numpy.argmax(iou, axis=1)
118 |
119 | ratio_dict = {1.0: 8.0, 2.0: 16.0, 3.0: 32.0}
120 | for i, idx in enumerate(best_match_idx):
121 | feature_map_group = 2 - idx // 3
122 | ratio = ratio_dict[numpy.ceil((idx + 1) / 3.0)]
123 | x = int(numpy.floor(box_centers[i, 0] / ratio))
124 | y = int(numpy.floor(box_centers[i, 1] / ratio))
125 | k = anchors_mask[feature_map_group].index(idx)
126 | # c = labels[i]
127 |
128 | y_true[feature_map_group][y, x, k, :2] = box_centers[i]
129 | y_true[feature_map_group][y, x, k, 2:4] = box_size[i]
130 | y_true[feature_map_group][y, x, k, 4] = 1.0
131 | # y_true[feature_map_group][y, x, k, 5 + c] = 1.
132 |
133 | return y_true_1, y_true_2, y_true_3
134 |
--------------------------------------------------------------------------------
/distributed_training/weights/model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/distributed_training/weights/model.h5
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.8'
2 |
3 | services:
4 | mlflow:
5 | container_name: mlflow
6 | build:
7 | context: ./mlflow
8 | dockerfile: Dockerfile
9 | ports:
10 | - "5000:5000"
11 | volumes:
12 | - mlflow-artifacts:/mlflow
13 | environment:
14 | - MLFLOW_TRACKING_URI=http://0.0.0.0:5000
15 | - MLFLOW_ARTIFACT_ROOT=/mlflow
16 |
17 | train-service:
18 | container_name: train
19 | build:
20 | context: ./train
21 | dockerfile: Dockerfile
22 | volumes:
23 | - ./train:/app
24 | depends_on:
25 | - mlflow
26 | environment:
27 | - MLFLOW_TRACKING_URI=http://mlflow:5000
28 |
29 |
30 | jenkins:
31 | image: fullstackdatascience/jenkins:lts
32 | container_name: jenkins
33 | restart: unless-stopped
34 | privileged: true
35 | user: root
36 | ports:
37 | - 8081:8080
38 | - 50000:50000
39 | volumes:
40 | - jenkins_home:/var/jenkins_home
41 | - /var/run/docker.sock:/var/run/docker.sock
42 |
43 |
44 | volumes:
45 | mlflow-artifacts:
46 | jenkins_home:
47 |
--------------------------------------------------------------------------------
/images/PipelineAllcode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/PipelineAllcode.png
--------------------------------------------------------------------------------
/images/add_credential.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/add_credential.png
--------------------------------------------------------------------------------
/images/add_credential_dockerhub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/add_credential_dockerhub.png
--------------------------------------------------------------------------------
/images/add_token_dockerhub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/add_token_dockerhub.png
--------------------------------------------------------------------------------
/images/architecutre_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/architecutre_overview.png
--------------------------------------------------------------------------------
/images/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/bus.jpg
--------------------------------------------------------------------------------
/images/check_request_github_jenkins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/check_request_github_jenkins.png
--------------------------------------------------------------------------------
/images/connector.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/connector.png
--------------------------------------------------------------------------------
/images/data_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/data_pipeline.png
--------------------------------------------------------------------------------
/images/diagram_pipe.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/diagram_pipe.gif
--------------------------------------------------------------------------------
/images/error_log_pod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/error_log_pod.png
--------------------------------------------------------------------------------
/images/false_modelmesh_deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/false_modelmesh_deploy.png
--------------------------------------------------------------------------------
/images/generate_token_docker_hub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/generate_token_docker_hub.png
--------------------------------------------------------------------------------
/images/get_token_github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/get_token_github.png
--------------------------------------------------------------------------------
/images/github_tokens.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/github_tokens.png
--------------------------------------------------------------------------------
/images/instal_docker_jenkins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/instal_docker_jenkins.png
--------------------------------------------------------------------------------
/images/install_docker_success.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/install_docker_success.png
--------------------------------------------------------------------------------
/images/isvc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/isvc.png
--------------------------------------------------------------------------------
/images/jenkins_container.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/jenkins_container.png
--------------------------------------------------------------------------------
/images/jenkins_portal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/jenkins_portal.png
--------------------------------------------------------------------------------
/images/jenkins_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/jenkins_ui.png
--------------------------------------------------------------------------------
/images/messenger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/messenger.png
--------------------------------------------------------------------------------
/images/minio-credentials.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/minio-credentials.png
--------------------------------------------------------------------------------
/images/mlflow _modelregistry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/mlflow _modelregistry.png
--------------------------------------------------------------------------------
/images/modelmesh-serving-installation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/modelmesh-serving-installation.png
--------------------------------------------------------------------------------
/images/ngrok.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/ngrok.png
--------------------------------------------------------------------------------
/images/ngrok_forwarding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/ngrok_forwarding.png
--------------------------------------------------------------------------------
/images/password_jenkins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/password_jenkins.png
--------------------------------------------------------------------------------
/images/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/result.png
--------------------------------------------------------------------------------
/images/result_connect_jenkins_github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/result_connect_jenkins_github.png
--------------------------------------------------------------------------------
/images/result_push_dockerhub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/result_push_dockerhub.png
--------------------------------------------------------------------------------
/images/result_train_pod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/result_train_pod.png
--------------------------------------------------------------------------------
/images/strategy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/strategy.png
--------------------------------------------------------------------------------
/images/strategy_scope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/strategy_scope.png
--------------------------------------------------------------------------------
/images/structure_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/structure_data.png
--------------------------------------------------------------------------------
/images/structure_training.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/structure_training.png
--------------------------------------------------------------------------------
/images/topic_tab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/topic_tab.png
--------------------------------------------------------------------------------
/images/train_process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/train_process.png
--------------------------------------------------------------------------------
/images/ui_build_jenkins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/ui_build_jenkins.png
--------------------------------------------------------------------------------
/images/validate_connect_repo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/validate_connect_repo.png
--------------------------------------------------------------------------------
/images/webhook_github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/webhook_github.png
--------------------------------------------------------------------------------
/mlflow/Dockerfile:
--------------------------------------------------------------------------------
1 | # Dockerfile.mlflow
2 | FROM python:3.9-slim
3 |
4 | RUN pip install mlflow
5 |
6 | CMD ["mlflow", "server", "--host", "0.0.0.0", "--port", "5000"]
--------------------------------------------------------------------------------
/model_repo/yolov8n_car/1/model.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/model_repo/yolov8n_car/1/model.onnx
--------------------------------------------------------------------------------
/model_repo/yolov8n_car/config.pbtxt:
--------------------------------------------------------------------------------
1 | # Model configuration file (optional)
2 | # https://github.com/triton-inference-server/tutorials/blob/main/Conceptual_Guide/Part_1-model_deployment/README.md#model-configuration
3 | name: "yolov8n_car"
4 | backend: "onnxruntime" # Select the backend to run the model https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton
5 | max_batch_size : 2 # Max batch size the model can support
6 | # In most cases, Triton can help to extract `input` and `output`
7 | # but we should declare it explicitly
8 | input [
9 | {
10 | name: "images"
11 | data_type: TYPE_FP32
12 | dims: [ 3, 640, 640 ] # If no batch, pls use [ 1, 640, 640 ]
13 | }
14 | ]
15 | output [
16 | {
17 | name: "output0"
18 | data_type: TYPE_FP32
19 | dims: [ -1, -1 ] # If no batch, pls use [ 84, 8400 ]
20 | }
21 | ]
22 |
23 | instance_group [ { kind: KIND_CPU } ]
--------------------------------------------------------------------------------
/notebooks/debug.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import os\n",
10 | "import xml.etree.ElementTree\n",
11 | "# import cv2\n",
12 | "import numpy\n",
13 | "from utils import config\n",
14 | "import posixpath"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "def load_image(file_name):\n",
24 | " path = posixpath.join(config.data_dir, config.image_dir, file_name + '.jpg')\n",
25 | " image = cv2.imread(path)\n",
26 | " return image\n",
27 | "\n",
28 | "\n",
29 | "def load_label(file_name, split='train'):\n",
30 | " # Construct the new path for the label file\n",
31 | " path = posixpath.join(config.data_dir, config.label_dir, split, file_name + '.txt')\n",
32 | " \n",
33 | " boxes = []\n",
34 | " \n",
35 | " # Read the text file line by line\n",
36 | " with open(path, 'r') as f:\n",
37 | " for line in f:\n",
38 | " # Split the line into coordinates\n",
39 | " _, x_min, y_min, x_max, y_max = line.strip().split()\n",
40 | " x_min = float(x_min)\n",
41 | " y_min = float(y_min)\n",
42 | " x_max = float(x_max)\n",
43 | " y_max = float(y_max)\n",
44 | "\n",
45 | " boxes.append([x_min, y_min, x_max, y_max])\n",
46 | " \n",
47 | " boxes = numpy.asarray(boxes, numpy.float32) \n",
48 | " return boxes"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 3,
54 | "metadata": {},
55 | "outputs": [
56 | {
57 | "name": "stdout",
58 | "output_type": "stream",
59 | "text": [
60 | "0.562590448668639 0.6324806949999999 0.38422575976331363 0.2265122263157895\n",
61 | "0.9337916063609467 0.564913127368421 0.13241678727810652 0.17631917631578944\n"
62 | ]
63 | }
64 | ],
65 | "source": [
66 | "path = \"Dataset/labels/valid/vid_4_700.txt\"\n",
67 | "with open(path, 'r') as f:\n",
68 | " for line in f:\n",
69 | " # print(line)\n",
70 | " _, x_min, y_min, x_max, y_max = line.strip().split()\n",
71 | " print(x_min, y_min, x_max, y_max)"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 4,
77 | "metadata": {},
78 | "outputs": [],
79 | "source": [
80 | "# file_names = []\n",
81 | "# with open(os.path.join(config.data_dir, 'train.txt')) as f:\n",
82 | "# for file_name in f.readlines():\n",
83 | "# image_path = os.path.join(config.data_dir, config.image_dir, file_name.rstrip() + '.jpg')\n",
84 | "# label_path = os.path.join(config.data_dir, config.label_dir, file_name.rstrip() + '.xml')\n",
85 | "# if os.path.exists(image_path) and os.path.exists(label_path):\n",
86 | "# if os.path.exists(os.path.join(config.data_dir, 'TF')):\n",
87 | "# file_names.append(os.path.join(config.data_dir, 'TF', file_name.rstrip() + '.tf'))\n",
88 | "# else:\n",
89 | "# file_names.append(file_name.rstrip())\n",
90 | " \n",
91 | "# print(file_names)"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 5,
97 | "metadata": {},
98 | "outputs": [
99 | {
100 | "data": {
101 | "text/plain": [
102 | "(284, 338, 284)"
103 | ]
104 | },
105 | "execution_count": 5,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "\n",
112 | "image_path = posixpath.join(config.data_dir, config.image_dir, 'train')\n",
113 | "label_path = posixpath.join(config.data_dir, config.label_dir, 'train')\n",
114 | "\n",
115 | "image_files = [os.path.splitext(file_name)[0] for file_name in os.listdir(image_path) if file_name.lower().endswith('.jpg')]\n",
116 | "label_files = [os.path.splitext(file_name)[0] for file_name in os.listdir(label_path) if file_name.lower().endswith('.txt')]\n",
117 | "\n",
118 | "file_names = list(set(image_files) & set(label_files))\n",
119 | "len(file_names), len(image_files), len(label_files)"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 6,
125 | "metadata": {},
126 | "outputs": [
127 | {
128 | "data": {
129 | "text/plain": [
130 | ""
131 | ]
132 | },
133 | "execution_count": 6,
134 | "metadata": {},
135 | "output_type": "execute_result"
136 | }
137 | ],
138 | "source": [
139 | "from utils import config, util\n",
140 | "from tensorflow.keras import utils\n",
141 | "import tensorflow as tf\n",
142 | "\n",
143 | "\n",
144 | "class Generator(utils.Sequence):\n",
145 | " def __init__(self, file_names):\n",
146 | " self.file_names = file_names\n",
147 | "\n",
148 | " def __len__(self):\n",
149 | " return int(numpy.floor(len(self.file_names) / config.batch_size))\n",
150 | "\n",
151 | " def __getitem__(self, index):\n",
152 | " image = util.load_image(self.file_names[index])\n",
153 | " boxes = util.load_label(self.file_names[index])\n",
154 | " image, boxes = util.resize(image, boxes)\n",
155 | " # image, boxes = util.random_flip(image, boxes)\n",
156 | "\n",
157 | " image = image[:, :, ::-1].astype(numpy.float32)\n",
158 | " image = image / 255.0\n",
159 | " y_true_1, y_true_2, y_true_3 = util.process_box(boxes)\n",
160 | " return image, y_true_1, y_true_2, y_true_3\n",
161 | "\n",
162 | " def on_epoch_end(self):\n",
163 | " numpy.random.shuffle(self.file_names)\n",
164 | "\n",
165 | "\n",
166 | "def input_fn(file_names):\n",
167 | " # def generator_fn():\n",
168 | " generator = utils.OrderedEnqueuer(Generator(file_names), True)\n",
169 | " # generator.start(workers=min(os.cpu_count(), config.batch_size))\n",
170 | " while True:\n",
171 | " image, y_true_1, y_true_2, y_true_3 = generator.get().__next__()\n",
172 | " print(image, y_true_1, y_true_2, y_true_3)\n",
173 | " yield image, y_true_1, y_true_2, y_true_3\n",
174 | " \n",
175 | " \n",
176 | "input_fn(file_names)"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 7,
182 | "metadata": {},
183 | "outputs": [
184 | {
185 | "name": "stdout",
186 | "output_type": "stream",
187 | "text": [
188 | "vid_4_6380\n",
189 | "[[[0. 0. 0.]\n",
190 | " [0. 0. 0.]\n",
191 | " [0. 0. 0.]\n",
192 | " ...\n",
193 | " [0. 0. 0.]\n",
194 | " [0. 0. 0.]\n",
195 | " [0. 0. 0.]]\n",
196 | "\n",
197 | " [[0. 0. 0.]\n",
198 | " [0. 0. 0.]\n",
199 | " [0. 0. 0.]\n",
200 | " ...\n",
201 | " [0. 0. 0.]\n",
202 | " [0. 0. 0.]\n",
203 | " [0. 0. 0.]]\n",
204 | "\n",
205 | " [[0. 0. 0.]\n",
206 | " [0. 0. 0.]\n",
207 | " [0. 0. 0.]\n",
208 | " ...\n",
209 | " [0. 0. 0.]\n",
210 | " [0. 0. 0.]\n",
211 | " [0. 0. 0.]]\n",
212 | "\n",
213 | " ...\n",
214 | "\n",
215 | " [[0. 0. 0.]\n",
216 | " [0. 0. 0.]\n",
217 | " [0. 0. 0.]\n",
218 | " ...\n",
219 | " [0. 0. 0.]\n",
220 | " [0. 0. 0.]\n",
221 | " [0. 0. 0.]]\n",
222 | "\n",
223 | " [[0. 0. 0.]\n",
224 | " [0. 0. 0.]\n",
225 | " [0. 0. 0.]\n",
226 | " ...\n",
227 | " [0. 0. 0.]\n",
228 | " [0. 0. 0.]\n",
229 | " [0. 0. 0.]]\n",
230 | "\n",
231 | " [[0. 0. 0.]\n",
232 | " [0. 0. 0.]\n",
233 | " [0. 0. 0.]\n",
234 | " ...\n",
235 | " [0. 0. 0.]\n",
236 | " [0. 0. 0.]\n",
237 | " [0. 0. 0.]]] [[[[0. 0. 0. ... 0. 0. 0.]\n",
238 | " [0. 0. 0. ... 0. 0. 0.]\n",
239 | " [0. 0. 0. ... 0. 0. 0.]]\n",
240 | "\n",
241 | " [[0. 0. 0. ... 0. 0. 0.]\n",
242 | " [0. 0. 0. ... 0. 0. 0.]\n",
243 | " [0. 0. 0. ... 0. 0. 0.]]\n",
244 | "\n",
245 | " [[0. 0. 0. ... 0. 0. 0.]\n",
246 | " [0. 0. 0. ... 0. 0. 0.]\n",
247 | " [0. 0. 0. ... 0. 0. 0.]]\n",
248 | "\n",
249 | " ...\n",
250 | "\n",
251 | " [[0. 0. 0. ... 0. 0. 0.]\n",
252 | " [0. 0. 0. ... 0. 0. 0.]\n",
253 | " [0. 0. 0. ... 0. 0. 0.]]\n",
254 | "\n",
255 | " [[0. 0. 0. ... 0. 0. 0.]\n",
256 | " [0. 0. 0. ... 0. 0. 0.]\n",
257 | " [0. 0. 0. ... 0. 0. 0.]]\n",
258 | "\n",
259 | " [[0. 0. 0. ... 0. 0. 0.]\n",
260 | " [0. 0. 0. ... 0. 0. 0.]\n",
261 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
262 | "\n",
263 | "\n",
264 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
265 | " [0. 0. 0. ... 0. 0. 0.]\n",
266 | " [0. 0. 0. ... 0. 0. 0.]]\n",
267 | "\n",
268 | " [[0. 0. 0. ... 0. 0. 0.]\n",
269 | " [0. 0. 0. ... 0. 0. 0.]\n",
270 | " [0. 0. 0. ... 0. 0. 0.]]\n",
271 | "\n",
272 | " [[0. 0. 0. ... 0. 0. 0.]\n",
273 | " [0. 0. 0. ... 0. 0. 0.]\n",
274 | " [0. 0. 0. ... 0. 0. 0.]]\n",
275 | "\n",
276 | " ...\n",
277 | "\n",
278 | " [[0. 0. 0. ... 0. 0. 0.]\n",
279 | " [0. 0. 0. ... 0. 0. 0.]\n",
280 | " [0. 0. 0. ... 0. 0. 0.]]\n",
281 | "\n",
282 | " [[0. 0. 0. ... 0. 0. 0.]\n",
283 | " [0. 0. 0. ... 0. 0. 0.]\n",
284 | " [0. 0. 0. ... 0. 0. 0.]]\n",
285 | "\n",
286 | " [[0. 0. 0. ... 0. 0. 0.]\n",
287 | " [0. 0. 0. ... 0. 0. 0.]\n",
288 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
289 | "\n",
290 | "\n",
291 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
292 | " [0. 0. 0. ... 0. 0. 0.]\n",
293 | " [0. 0. 0. ... 0. 0. 0.]]\n",
294 | "\n",
295 | " [[0. 0. 0. ... 0. 0. 0.]\n",
296 | " [0. 0. 0. ... 0. 0. 0.]\n",
297 | " [0. 0. 0. ... 0. 0. 0.]]\n",
298 | "\n",
299 | " [[0. 0. 0. ... 0. 0. 0.]\n",
300 | " [0. 0. 0. ... 0. 0. 0.]\n",
301 | " [0. 0. 0. ... 0. 0. 0.]]\n",
302 | "\n",
303 | " ...\n",
304 | "\n",
305 | " [[0. 0. 0. ... 0. 0. 0.]\n",
306 | " [0. 0. 0. ... 0. 0. 0.]\n",
307 | " [0. 0. 0. ... 0. 0. 0.]]\n",
308 | "\n",
309 | " [[0. 0. 0. ... 0. 0. 0.]\n",
310 | " [0. 0. 0. ... 0. 0. 0.]\n",
311 | " [0. 0. 0. ... 0. 0. 0.]]\n",
312 | "\n",
313 | " [[0. 0. 0. ... 0. 0. 0.]\n",
314 | " [0. 0. 0. ... 0. 0. 0.]\n",
315 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
316 | "\n",
317 | "\n",
318 | " ...\n",
319 | "\n",
320 | "\n",
321 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
322 | " [0. 0. 0. ... 0. 0. 0.]\n",
323 | " [0. 0. 0. ... 0. 0. 0.]]\n",
324 | "\n",
325 | " [[0. 0. 0. ... 0. 0. 0.]\n",
326 | " [0. 0. 0. ... 0. 0. 0.]\n",
327 | " [0. 0. 0. ... 0. 0. 0.]]\n",
328 | "\n",
329 | " [[0. 0. 0. ... 0. 0. 0.]\n",
330 | " [0. 0. 0. ... 0. 0. 0.]\n",
331 | " [0. 0. 0. ... 0. 0. 0.]]\n",
332 | "\n",
333 | " ...\n",
334 | "\n",
335 | " [[0. 0. 0. ... 0. 0. 0.]\n",
336 | " [0. 0. 0. ... 0. 0. 0.]\n",
337 | " [0. 0. 0. ... 0. 0. 0.]]\n",
338 | "\n",
339 | " [[0. 0. 0. ... 0. 0. 0.]\n",
340 | " [0. 0. 0. ... 0. 0. 0.]\n",
341 | " [0. 0. 0. ... 0. 0. 0.]]\n",
342 | "\n",
343 | " [[0. 0. 0. ... 0. 0. 0.]\n",
344 | " [0. 0. 0. ... 0. 0. 0.]\n",
345 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
346 | "\n",
347 | "\n",
348 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
349 | " [0. 0. 0. ... 0. 0. 0.]\n",
350 | " [0. 0. 0. ... 0. 0. 0.]]\n",
351 | "\n",
352 | " [[0. 0. 0. ... 0. 0. 0.]\n",
353 | " [0. 0. 0. ... 0. 0. 0.]\n",
354 | " [0. 0. 0. ... 0. 0. 0.]]\n",
355 | "\n",
356 | " [[0. 0. 0. ... 0. 0. 0.]\n",
357 | " [0. 0. 0. ... 0. 0. 0.]\n",
358 | " [0. 0. 0. ... 0. 0. 0.]]\n",
359 | "\n",
360 | " ...\n",
361 | "\n",
362 | " [[0. 0. 0. ... 0. 0. 0.]\n",
363 | " [0. 0. 0. ... 0. 0. 0.]\n",
364 | " [0. 0. 0. ... 0. 0. 0.]]\n",
365 | "\n",
366 | " [[0. 0. 0. ... 0. 0. 0.]\n",
367 | " [0. 0. 0. ... 0. 0. 0.]\n",
368 | " [0. 0. 0. ... 0. 0. 0.]]\n",
369 | "\n",
370 | " [[0. 0. 0. ... 0. 0. 0.]\n",
371 | " [0. 0. 0. ... 0. 0. 0.]\n",
372 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
373 | "\n",
374 | "\n",
375 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
376 | " [0. 0. 0. ... 0. 0. 0.]\n",
377 | " [0. 0. 0. ... 0. 0. 0.]]\n",
378 | "\n",
379 | " [[0. 0. 0. ... 0. 0. 0.]\n",
380 | " [0. 0. 0. ... 0. 0. 0.]\n",
381 | " [0. 0. 0. ... 0. 0. 0.]]\n",
382 | "\n",
383 | " [[0. 0. 0. ... 0. 0. 0.]\n",
384 | " [0. 0. 0. ... 0. 0. 0.]\n",
385 | " [0. 0. 0. ... 0. 0. 0.]]\n",
386 | "\n",
387 | " ...\n",
388 | "\n",
389 | " [[0. 0. 0. ... 0. 0. 0.]\n",
390 | " [0. 0. 0. ... 0. 0. 0.]\n",
391 | " [0. 0. 0. ... 0. 0. 0.]]\n",
392 | "\n",
393 | " [[0. 0. 0. ... 0. 0. 0.]\n",
394 | " [0. 0. 0. ... 0. 0. 0.]\n",
395 | " [0. 0. 0. ... 0. 0. 0.]]\n",
396 | "\n",
397 | " [[0. 0. 0. ... 0. 0. 0.]\n",
398 | " [0. 0. 0. ... 0. 0. 0.]\n",
399 | " [0. 0. 0. ... 0. 0. 0.]]]] [[[[0. 0. 0. ... 0. 0. 0.]\n",
400 | " [0. 0. 0. ... 0. 0. 0.]\n",
401 | " [0. 0. 0. ... 0. 0. 0.]]\n",
402 | "\n",
403 | " [[0. 0. 0. ... 0. 0. 0.]\n",
404 | " [0. 0. 0. ... 0. 0. 0.]\n",
405 | " [0. 0. 0. ... 0. 0. 0.]]\n",
406 | "\n",
407 | " [[0. 0. 0. ... 0. 0. 0.]\n",
408 | " [0. 0. 0. ... 0. 0. 0.]\n",
409 | " [0. 0. 0. ... 0. 0. 0.]]\n",
410 | "\n",
411 | " ...\n",
412 | "\n",
413 | " [[0. 0. 0. ... 0. 0. 0.]\n",
414 | " [0. 0. 0. ... 0. 0. 0.]\n",
415 | " [0. 0. 0. ... 0. 0. 0.]]\n",
416 | "\n",
417 | " [[0. 0. 0. ... 0. 0. 0.]\n",
418 | " [0. 0. 0. ... 0. 0. 0.]\n",
419 | " [0. 0. 0. ... 0. 0. 0.]]\n",
420 | "\n",
421 | " [[0. 0. 0. ... 0. 0. 0.]\n",
422 | " [0. 0. 0. ... 0. 0. 0.]\n",
423 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
424 | "\n",
425 | "\n",
426 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
427 | " [0. 0. 0. ... 0. 0. 0.]\n",
428 | " [0. 0. 0. ... 0. 0. 0.]]\n",
429 | "\n",
430 | " [[0. 0. 0. ... 0. 0. 0.]\n",
431 | " [0. 0. 0. ... 0. 0. 0.]\n",
432 | " [0. 0. 0. ... 0. 0. 0.]]\n",
433 | "\n",
434 | " [[0. 0. 0. ... 0. 0. 0.]\n",
435 | " [0. 0. 0. ... 0. 0. 0.]\n",
436 | " [0. 0. 0. ... 0. 0. 0.]]\n",
437 | "\n",
438 | " ...\n",
439 | "\n",
440 | " [[0. 0. 0. ... 0. 0. 0.]\n",
441 | " [0. 0. 0. ... 0. 0. 0.]\n",
442 | " [0. 0. 0. ... 0. 0. 0.]]\n",
443 | "\n",
444 | " [[0. 0. 0. ... 0. 0. 0.]\n",
445 | " [0. 0. 0. ... 0. 0. 0.]\n",
446 | " [0. 0. 0. ... 0. 0. 0.]]\n",
447 | "\n",
448 | " [[0. 0. 0. ... 0. 0. 0.]\n",
449 | " [0. 0. 0. ... 0. 0. 0.]\n",
450 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
451 | "\n",
452 | "\n",
453 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
454 | " [0. 0. 0. ... 0. 0. 0.]\n",
455 | " [0. 0. 0. ... 0. 0. 0.]]\n",
456 | "\n",
457 | " [[0. 0. 0. ... 0. 0. 0.]\n",
458 | " [0. 0. 0. ... 0. 0. 0.]\n",
459 | " [0. 0. 0. ... 0. 0. 0.]]\n",
460 | "\n",
461 | " [[0. 0. 0. ... 0. 0. 0.]\n",
462 | " [0. 0. 0. ... 0. 0. 0.]\n",
463 | " [0. 0. 0. ... 0. 0. 0.]]\n",
464 | "\n",
465 | " ...\n",
466 | "\n",
467 | " [[0. 0. 0. ... 0. 0. 0.]\n",
468 | " [0. 0. 0. ... 0. 0. 0.]\n",
469 | " [0. 0. 0. ... 0. 0. 0.]]\n",
470 | "\n",
471 | " [[0. 0. 0. ... 0. 0. 0.]\n",
472 | " [0. 0. 0. ... 0. 0. 0.]\n",
473 | " [0. 0. 0. ... 0. 0. 0.]]\n",
474 | "\n",
475 | " [[0. 0. 0. ... 0. 0. 0.]\n",
476 | " [0. 0. 0. ... 0. 0. 0.]\n",
477 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
478 | "\n",
479 | "\n",
480 | " ...\n",
481 | "\n",
482 | "\n",
483 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
484 | " [0. 0. 0. ... 0. 0. 0.]\n",
485 | " [0. 0. 0. ... 0. 0. 0.]]\n",
486 | "\n",
487 | " [[0. 0. 0. ... 0. 0. 0.]\n",
488 | " [0. 0. 0. ... 0. 0. 0.]\n",
489 | " [0. 0. 0. ... 0. 0. 0.]]\n",
490 | "\n",
491 | " [[0. 0. 0. ... 0. 0. 0.]\n",
492 | " [0. 0. 0. ... 0. 0. 0.]\n",
493 | " [0. 0. 0. ... 0. 0. 0.]]\n",
494 | "\n",
495 | " ...\n",
496 | "\n",
497 | " [[0. 0. 0. ... 0. 0. 0.]\n",
498 | " [0. 0. 0. ... 0. 0. 0.]\n",
499 | " [0. 0. 0. ... 0. 0. 0.]]\n",
500 | "\n",
501 | " [[0. 0. 0. ... 0. 0. 0.]\n",
502 | " [0. 0. 0. ... 0. 0. 0.]\n",
503 | " [0. 0. 0. ... 0. 0. 0.]]\n",
504 | "\n",
505 | " [[0. 0. 0. ... 0. 0. 0.]\n",
506 | " [0. 0. 0. ... 0. 0. 0.]\n",
507 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
508 | "\n",
509 | "\n",
510 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
511 | " [0. 0. 0. ... 0. 0. 0.]\n",
512 | " [0. 0. 0. ... 0. 0. 0.]]\n",
513 | "\n",
514 | " [[0. 0. 0. ... 0. 0. 0.]\n",
515 | " [0. 0. 0. ... 0. 0. 0.]\n",
516 | " [0. 0. 0. ... 0. 0. 0.]]\n",
517 | "\n",
518 | " [[0. 0. 0. ... 0. 0. 0.]\n",
519 | " [0. 0. 0. ... 0. 0. 0.]\n",
520 | " [0. 0. 0. ... 0. 0. 0.]]\n",
521 | "\n",
522 | " ...\n",
523 | "\n",
524 | " [[0. 0. 0. ... 0. 0. 0.]\n",
525 | " [0. 0. 0. ... 0. 0. 0.]\n",
526 | " [0. 0. 0. ... 0. 0. 0.]]\n",
527 | "\n",
528 | " [[0. 0. 0. ... 0. 0. 0.]\n",
529 | " [0. 0. 0. ... 0. 0. 0.]\n",
530 | " [0. 0. 0. ... 0. 0. 0.]]\n",
531 | "\n",
532 | " [[0. 0. 0. ... 0. 0. 0.]\n",
533 | " [0. 0. 0. ... 0. 0. 0.]\n",
534 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
535 | "\n",
536 | "\n",
537 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
538 | " [0. 0. 0. ... 0. 0. 0.]\n",
539 | " [0. 0. 0. ... 0. 0. 0.]]\n",
540 | "\n",
541 | " [[0. 0. 0. ... 0. 0. 0.]\n",
542 | " [0. 0. 0. ... 0. 0. 0.]\n",
543 | " [0. 0. 0. ... 0. 0. 0.]]\n",
544 | "\n",
545 | " [[0. 0. 0. ... 0. 0. 0.]\n",
546 | " [0. 0. 0. ... 0. 0. 0.]\n",
547 | " [0. 0. 0. ... 0. 0. 0.]]\n",
548 | "\n",
549 | " ...\n",
550 | "\n",
551 | " [[0. 0. 0. ... 0. 0. 0.]\n",
552 | " [0. 0. 0. ... 0. 0. 0.]\n",
553 | " [0. 0. 0. ... 0. 0. 0.]]\n",
554 | "\n",
555 | " [[0. 0. 0. ... 0. 0. 0.]\n",
556 | " [0. 0. 0. ... 0. 0. 0.]\n",
557 | " [0. 0. 0. ... 0. 0. 0.]]\n",
558 | "\n",
559 | " [[0. 0. 0. ... 0. 0. 0.]\n",
560 | " [0. 0. 0. ... 0. 0. 0.]\n",
561 | " [0. 0. 0. ... 0. 0. 0.]]]] [[[[0. 0. 0. ... 0. 0. 0.]\n",
562 | " [0. 0. 0. ... 0. 0. 0.]\n",
563 | " [0. 0. 0. ... 0. 0. 0.]]\n",
564 | "\n",
565 | " [[0. 0. 0. ... 0. 0. 0.]\n",
566 | " [0. 0. 0. ... 0. 0. 0.]\n",
567 | " [0. 0. 0. ... 0. 0. 0.]]\n",
568 | "\n",
569 | " [[0. 0. 0. ... 0. 0. 0.]\n",
570 | " [0. 0. 0. ... 0. 0. 0.]\n",
571 | " [0. 0. 0. ... 0. 0. 0.]]\n",
572 | "\n",
573 | " ...\n",
574 | "\n",
575 | " [[0. 0. 0. ... 0. 0. 0.]\n",
576 | " [0. 0. 0. ... 0. 0. 0.]\n",
577 | " [0. 0. 0. ... 0. 0. 0.]]\n",
578 | "\n",
579 | " [[0. 0. 0. ... 0. 0. 0.]\n",
580 | " [0. 0. 0. ... 0. 0. 0.]\n",
581 | " [0. 0. 0. ... 0. 0. 0.]]\n",
582 | "\n",
583 | " [[0. 0. 0. ... 0. 0. 0.]\n",
584 | " [0. 0. 0. ... 0. 0. 0.]\n",
585 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
586 | "\n",
587 | "\n",
588 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
589 | " [0. 0. 0. ... 0. 0. 0.]\n",
590 | " [0. 0. 0. ... 0. 0. 0.]]\n",
591 | "\n",
592 | " [[0. 0. 0. ... 0. 0. 0.]\n",
593 | " [0. 0. 0. ... 0. 0. 0.]\n",
594 | " [0. 0. 0. ... 0. 0. 0.]]\n",
595 | "\n",
596 | " [[0. 0. 0. ... 0. 0. 0.]\n",
597 | " [0. 0. 0. ... 0. 0. 0.]\n",
598 | " [0. 0. 0. ... 0. 0. 0.]]\n",
599 | "\n",
600 | " ...\n",
601 | "\n",
602 | " [[0. 0. 0. ... 0. 0. 0.]\n",
603 | " [0. 0. 0. ... 0. 0. 0.]\n",
604 | " [0. 0. 0. ... 0. 0. 0.]]\n",
605 | "\n",
606 | " [[0. 0. 0. ... 0. 0. 0.]\n",
607 | " [0. 0. 0. ... 0. 0. 0.]\n",
608 | " [0. 0. 0. ... 0. 0. 0.]]\n",
609 | "\n",
610 | " [[0. 0. 0. ... 0. 0. 0.]\n",
611 | " [0. 0. 0. ... 0. 0. 0.]\n",
612 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
613 | "\n",
614 | "\n",
615 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
616 | " [0. 0. 0. ... 0. 0. 0.]\n",
617 | " [0. 0. 0. ... 0. 0. 0.]]\n",
618 | "\n",
619 | " [[0. 0. 0. ... 0. 0. 0.]\n",
620 | " [0. 0. 0. ... 0. 0. 0.]\n",
621 | " [0. 0. 0. ... 0. 0. 0.]]\n",
622 | "\n",
623 | " [[0. 0. 0. ... 0. 0. 0.]\n",
624 | " [0. 0. 0. ... 0. 0. 0.]\n",
625 | " [0. 0. 0. ... 0. 0. 0.]]\n",
626 | "\n",
627 | " ...\n",
628 | "\n",
629 | " [[0. 0. 0. ... 0. 0. 0.]\n",
630 | " [0. 0. 0. ... 0. 0. 0.]\n",
631 | " [0. 0. 0. ... 0. 0. 0.]]\n",
632 | "\n",
633 | " [[0. 0. 0. ... 0. 0. 0.]\n",
634 | " [0. 0. 0. ... 0. 0. 0.]\n",
635 | " [0. 0. 0. ... 0. 0. 0.]]\n",
636 | "\n",
637 | " [[0. 0. 0. ... 0. 0. 0.]\n",
638 | " [0. 0. 0. ... 0. 0. 0.]\n",
639 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
640 | "\n",
641 | "\n",
642 | " ...\n",
643 | "\n",
644 | "\n",
645 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
646 | " [0. 0. 0. ... 0. 0. 0.]\n",
647 | " [0. 0. 0. ... 0. 0. 0.]]\n",
648 | "\n",
649 | " [[0. 0. 0. ... 0. 0. 0.]\n",
650 | " [0. 0. 0. ... 0. 0. 0.]\n",
651 | " [0. 0. 0. ... 0. 0. 0.]]\n",
652 | "\n",
653 | " [[0. 0. 0. ... 0. 0. 0.]\n",
654 | " [0. 0. 0. ... 0. 0. 0.]\n",
655 | " [0. 0. 0. ... 0. 0. 0.]]\n",
656 | "\n",
657 | " ...\n",
658 | "\n",
659 | " [[0. 0. 0. ... 0. 0. 0.]\n",
660 | " [0. 0. 0. ... 0. 0. 0.]\n",
661 | " [0. 0. 0. ... 0. 0. 0.]]\n",
662 | "\n",
663 | " [[0. 0. 0. ... 0. 0. 0.]\n",
664 | " [0. 0. 0. ... 0. 0. 0.]\n",
665 | " [0. 0. 0. ... 0. 0. 0.]]\n",
666 | "\n",
667 | " [[0. 0. 0. ... 0. 0. 0.]\n",
668 | " [0. 0. 0. ... 0. 0. 0.]\n",
669 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
670 | "\n",
671 | "\n",
672 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
673 | " [0. 0. 0. ... 0. 0. 0.]\n",
674 | " [0. 0. 0. ... 0. 0. 0.]]\n",
675 | "\n",
676 | " [[0. 0. 0. ... 0. 0. 0.]\n",
677 | " [0. 0. 0. ... 0. 0. 0.]\n",
678 | " [0. 0. 0. ... 0. 0. 0.]]\n",
679 | "\n",
680 | " [[0. 0. 0. ... 0. 0. 0.]\n",
681 | " [0. 0. 0. ... 0. 0. 0.]\n",
682 | " [0. 0. 0. ... 0. 0. 0.]]\n",
683 | "\n",
684 | " ...\n",
685 | "\n",
686 | " [[0. 0. 0. ... 0. 0. 0.]\n",
687 | " [0. 0. 0. ... 0. 0. 0.]\n",
688 | " [0. 0. 0. ... 0. 0. 0.]]\n",
689 | "\n",
690 | " [[0. 0. 0. ... 0. 0. 0.]\n",
691 | " [0. 0. 0. ... 0. 0. 0.]\n",
692 | " [0. 0. 0. ... 0. 0. 0.]]\n",
693 | "\n",
694 | " [[0. 0. 0. ... 0. 0. 0.]\n",
695 | " [0. 0. 0. ... 0. 0. 0.]\n",
696 | " [0. 0. 0. ... 0. 0. 0.]]]\n",
697 | "\n",
698 | "\n",
699 | " [[[0. 0. 0. ... 0. 0. 0.]\n",
700 | " [0. 0. 0. ... 0. 0. 0.]\n",
701 | " [0. 0. 0. ... 0. 0. 0.]]\n",
702 | "\n",
703 | " [[0. 0. 0. ... 0. 0. 0.]\n",
704 | " [0. 0. 0. ... 0. 0. 0.]\n",
705 | " [0. 0. 0. ... 0. 0. 0.]]\n",
706 | "\n",
707 | " [[0. 0. 0. ... 0. 0. 0.]\n",
708 | " [0. 0. 0. ... 0. 0. 0.]\n",
709 | " [0. 0. 0. ... 0. 0. 0.]]\n",
710 | "\n",
711 | " ...\n",
712 | "\n",
713 | " [[0. 0. 0. ... 0. 0. 0.]\n",
714 | " [0. 0. 0. ... 0. 0. 0.]\n",
715 | " [0. 0. 0. ... 0. 0. 0.]]\n",
716 | "\n",
717 | " [[0. 0. 0. ... 0. 0. 0.]\n",
718 | " [0. 0. 0. ... 0. 0. 0.]\n",
719 | " [0. 0. 0. ... 0. 0. 0.]]\n",
720 | "\n",
721 | " [[0. 0. 0. ... 0. 0. 0.]\n",
722 | " [0. 0. 0. ... 0. 0. 0.]\n",
723 | " [0. 0. 0. ... 0. 0. 0.]]]]\n"
724 | ]
725 | }
726 | ],
727 | "source": [
728 | "index = 0\n",
729 | "print(file_names[index])\n",
730 | "image = util.load_image(file_names[index])\n",
731 | "boxes = util.load_label(file_names[index])\n",
732 | "image, boxes = util.resize(image, boxes)\n",
733 | "# image, boxes = util.random_flip(image, boxes)\n",
734 | "\n",
735 | "image = image[:, :, ::-1].astype(numpy.float32)\n",
736 | "image = image / 255.0\n",
737 | "y_true_1, y_true_2, y_true_3 = util.process_box(boxes)\n",
738 | "print(image, y_true_1, y_true_2, y_true_3)\n"
739 | ]
740 | },
741 | {
742 | "cell_type": "code",
743 | "execution_count": 8,
744 | "metadata": {},
745 | "outputs": [],
746 | "source": [
747 | "# # Create an iterator for the dataset\n",
748 | "# iterator = iter(dataset)\n",
749 | "\n",
750 | "# # Get one sample from the dataset\n",
751 | "# sample = next(iterator)\n",
752 | "\n",
753 | "# # Unpack the sample\n",
754 | "# image, y_true_1, y_true_2, y_true_3 = sample"
755 | ]
756 | }
757 | ],
758 | "metadata": {
759 | "kernelspec": {
760 | "display_name": "Python 3",
761 | "language": "python",
762 | "name": "python3"
763 | },
764 | "language_info": {
765 | "codemirror_mode": {
766 | "name": "ipython",
767 | "version": 3
768 | },
769 | "file_extension": ".py",
770 | "mimetype": "text/x-python",
771 | "name": "python",
772 | "nbconvert_exporter": "python",
773 | "pygments_lexer": "ipython3",
774 | "version": "3.10.13"
775 | }
776 | },
777 | "nbformat": 4,
778 | "nbformat_minor": 2
779 | }
780 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | minio==7.1.17
2 | ultralytics==8.0.202
3 | onnx==1.15.0
4 | tritonclient[http]==2.39.0
5 | onnxsim==0.4.35
6 | onnxruntime-gpu==1.16.1
7 | tensorrt==8.6.1
8 | cuda-python==12.3.0
9 |
--------------------------------------------------------------------------------
/streaming/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.8-slim
2 |
3 | # Copy app handler code
4 |
5 | # Install dependencies
6 | RUN pip3 install kafka-python==2.0.2
7 | RUN pip3 install avro==1.11.1
8 | RUN pip3 install pandas==1.5.1
9 | RUN pip3 install pyarrow==10.0.1
10 | RUN pip3 install python-schema-registry-client==2.4.1
11 | RUN pip3 install pymongo==4.5.0
12 |
13 | # Copy app handler code
14 | COPY produce.py produce.py
15 | # COPY kafka_producer/generate_schemas.py generate_schemas.py
16 | # COPY run.sh .
17 | # # Uncomment this to generate a random schema
18 | # RUN chmod +x /run.sh && ./run.sh generate_schemas
19 |
20 | CMD [ "python", "-u", "produce.py", "--mode", "setup", "--bootstrap_servers", "broker:29092"]
--------------------------------------------------------------------------------
/streaming/README.md:
--------------------------------------------------------------------------------
1 | # Data Pipeline Guide
2 | ## Table of Contents
3 | 1. [Introduction](#introduction)
4 | 2. [Dataset Setup](#dataset-setup)
5 | - [Downloading the Dataset](#downloading-the-dataset)
6 | - [Folder Structure](#folder-structure)
7 | 3. [Kafka and Flink Setup](#kafka-and-flink-setup)
8 | - [Starting Docker Compose](#starting-docker-compose)
9 | - [Accessing Kafka Control Center](#accessing-kafka-control-center)
10 | 4. [Viewing Kafka Topics](#viewing-kafka-topics)
11 | - [Accessing the Topics Tab](#accessing-the-topics-tab)
12 | - [Viewing Topic Messages](#viewing-topic-messages)
13 | 5. [Adding Kafka Connector](#adding-kafka-connector)
14 | 6. [Verifying Data in PostgreSQL](#verifying-data-in-postgresql)
15 | 7. [Note](#note)
16 |
17 | ---
18 | 
19 | ## Introduction
20 | In this step, we focus on the real-time data processing component of our pipeline. The goal is to simulate and process streaming data to enhance the robustness of our data pipeline.
21 |
22 | 👉 We achieve this by leveraging **Kafka** for stream ingestion and **Apache Flink** for stream processing.
23 |
24 | **Note:** In this project, Kafka acts as the backbone for stream data ingestion, handling both real and simulated (fake) stream data. **Apache Flink** processes this data in real-time, ensuring that the processed data is available in our **Redis** online store. Redis, in turn, is synced with our **PostgreSQL** offline store, providing a unified data storage solution.
25 |
26 | Key steps in this process include:
27 |
28 | 1. **Kafka Producer Setup**: The Kafka producer service is responsible for continuously sending data streams. You can customize the message format, bind data to messages, and specify the Kafka topic for message distribution.
29 |
30 | 2. **Stream Processing with Apache Flink**: Flink processes the incoming data streams, transforming them into a format suitable for storage in Redis. This ensures that data is available for immediate use in both real-time and batch processing scenarios.
31 |
32 | 3. **Data Syncing**: Redis, serving as the online store, is synced with PostgreSQL to maintain consistency between real-time and offline data.
33 |
34 | For more details on setting up the Kafka producer and configuring Flink for stream processing, refer to the [Confluent PostgreSQL Sink Guide](https://docs.confluent.io/cloud/current/connectors/cc-postgresql-sink.html#step-6-check-the-results-in-postgresql).
35 |
36 |
37 | # Dataset Setup
38 |
39 | ## Downloading the Dataset:
40 | To begin, download the dataset required for streaming from the following link: [Dataset Link](https://drive.google.com/drive/folders/12ncEAoWT_kwuPT8YRdFysqgS54XJwre7?usp=drive_link)
41 |
42 | ## Folder Structure
43 | The structure of the folder will be like this:
44 |
45 |
46 |

47 |
48 |
49 | # Kafka Flink Setup
50 |
51 | ## Starting Docker Compose
52 | If you haven't already done so in previous steps, start the Docker Compose setup to launch the necessary services.
53 |
54 | ## Accessing Kafka Control Center
55 | Once Docker Compose is running, you can access the Kafka Control Center by navigating to `https://localhost:9021`. This interface allows you to manage and monitor your Kafka cluster.
56 |
57 | # Viewing Kafka Topics
58 |
59 | ## Accessing the Topics Tab
60 | To view the available Kafka topics, click on the `Topics` tab within the Kafka Control Center. You can follow the steps outlined in the image below:
61 |
62 | 
63 |
64 | ## Viewing Topic Messages
65 | Select a specific topic (e.g., `image 0`) to view the messages being transmitted:
66 |
67 | 
68 |
69 | # Adding Kafka Connector
70 | To ensure that messages are forwarded to PostgreSQL, you will need to add a Kafka connector. An example configuration file, `connect-timescaledb-sink.json`, is provided in this repository for your reference:
71 |
72 | 
73 |
74 | # Verifying Data in PostgreSQL
75 | Finally, after setting up the Kafka connector, verify that the data has been successfully transferred to PostgreSQL. You can do this by querying the database using SQL to confirm that the data is correctly stored and ready for training.
76 |
77 | # Note
78 | Before proceeding to data verification, ensure that the Kafka connector is properly configured and operational. This step is crucial for the successful transmission of data from Kafka to PostgreSQL.
79 |
--------------------------------------------------------------------------------
/streaming/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | zookeeper:
3 | image: confluentinc/cp-zookeeper:7.5.0
4 | # hostname: zookeeper
5 | container_name: flink-zookeeper
6 | ports:
7 | - "2181:2181"
8 | healthcheck:
9 | test: echo srvr | nc zookeeper 2181 || exit 1
10 | start_period: 10s
11 | retries: 20
12 | interval: 10s
13 | environment:
14 | ZOOKEEPER_CLIENT_PORT: 2181
15 | ZOOKEEPER_TICK_TIME: 2000
16 |
17 | # Kafka broker
18 | broker:
19 | image: confluentinc/cp-server:7.5.0
20 | # hostname: broker
21 | container_name: flink-broker
22 | depends_on:
23 | - zookeeper
24 | ports:
25 | - "9092:9092"
26 | - "9101:9101"
27 | healthcheck:
28 | test: nc -z localhost 9092 || exit -1
29 | start_period: 15s
30 | interval: 5s
31 | timeout: 10s
32 | retries: 10
33 | environment:
34 | KAFKA_BROKER_ID: 1
35 | KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
36 | KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
37 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
38 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
39 |
40 | # For managing Avro schemas
41 | schema-registry:
42 | image: confluentinc/cp-schema-registry:7.5.0
43 | # hostname: schema-registry
44 | container_name: flink-schema-registry
45 | depends_on:
46 | - broker
47 | ports:
48 | - "8081:8081"
49 | healthcheck:
50 | start_period: 10s
51 | interval: 10s
52 | retries: 20
53 | test: curl --user superUser:superUser --fail --silent --insecure http://localhost:8081/subjects --output /dev/null || exit 1
54 | environment:
55 | SCHEMA_REGISTRY_HOST_NAME: schema-registry
56 | SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:29092'
57 | SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
58 |
59 | # For connecting to offline store
60 | connect:
61 | image: confluentinc/cp-kafka-connect:7.5.0
62 | # hostname: connect
63 | container_name: flink-connect
64 | depends_on:
65 | broker:
66 | condition: service_healthy
67 | schema-registry:
68 | condition: service_healthy
69 | zookeeper:
70 | condition: service_healthy
71 | ports:
72 | - "8083:8083"
73 | environment:
74 | CONNECT_BOOTSTRAP_SERVERS: 'broker:29092'
75 | CONNECT_REST_ADVERTISED_HOST_NAME: connect
76 | CONNECT_REST_PORT: 8083
77 | CONNECT_GROUP_ID: compose-connect-group
78 | CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs
79 | CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1
80 | CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000
81 | CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets
82 | CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1
83 | CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status
84 | CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1
85 | CONNECT_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter
86 | CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter
87 | CONNECT_KEY_CONVERTER_SCHEMAS_ENABLE: true
88 | CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: true
89 | CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
90 | CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
91 | CONNECT_PLUGIN_PATH: '/usr/share/java,/etc/kafka-connect/jars'
92 |
93 | volumes:
94 | - /Car-detection-serving-model/streaming/kafka_connector/jars
95 |
96 | # Confluent control center to manage Kafka
97 | control-center:
98 | image: confluentinc/cp-enterprise-control-center:7.5.0
99 | # hostname: control-center
100 | container_name: flink-control-center
101 | depends_on:
102 | - broker
103 | - schema-registry
104 | - connect
105 | ports:
106 | - "9021:9021"
107 | healthcheck:
108 | test: ["CMD", "curl", "-f", "http://localhost:9021/healthcheck"] # Adjust the URL and options as needed
109 | interval: 30s
110 | timeout: 10s
111 | retries: 3
112 | environment:
113 | CONTROL_CENTER_BOOTSTRAP_SERVERS: 'broker:29092'
114 | CONTROL_CENTER_CONNECT_CONNECT-DEFAULT_CLUSTER: 'connect:8083'
115 | # CONTROL_CENTER_KSQL_KSQLDB1_URL: "http://ksqldb-server:8088"
116 | # CONTROL_CENTER_KSQL_KSQLDB1_ADVERTISED_URL: "http://localhost:8088"
117 | CONTROL_CENTER_SCHEMA_REGISTRY_URL: "http://schema-registry:8081"
118 | CONTROL_CENTER_REPLICATION_FACTOR: 1
119 | CONTROL_CENTER_INTERNAL_TOPICS_PARTITIONS: 1
120 | # CONTROL_CENTER_MONITORING_INTERCEPTOR_TOPIC_PARTITIONS: 1
121 | CONTROL_CENTER_CONNECT_HEALTHCHECK_ENDPOINT: '/connectors'
122 | CONFLUENT_METRICS_TOPIC_REPLICATION: 1
123 | # PORT: 9021
124 |
125 | # Offline store
126 | timescaledb:
127 | image: timescale/timescaledb:latest-pg13
128 | command: postgres -c shared_preload_libraries=timescaledb
129 | container_name: flink-timescaledb
130 | ports:
131 | - "5432:5432"
132 | healthcheck:
133 | test: ['CMD', 'psql', '-U', 'k6', '-c', 'SELECT 1']
134 | interval: 10s
135 | timeout: 5s
136 | retries: 5
137 | environment:
138 | - PGDATA=/var/lib/postgresql/data/timescaledb
139 | - POSTGRES_DB=k6
140 | - POSTGRES_USER=k6
141 | - POSTGRES_PASSWORD=k6
142 |
143 | # Simulation of sending messages to Kafka topics
144 | kafka_producer:
145 | build:
146 | context: .
147 | dockerfile: /Car-detection-serving-model/streaming/Dockerfile
148 | depends_on:
149 | broker:
150 | condition: service_healthy
151 | timescaledb:
152 | condition: service_healthy
153 | container_name: flink-kafka-producer
154 | volumes:
155 | - /Car-detection-serving-model/streaming/Dataset/images/train:/images
156 |
--------------------------------------------------------------------------------
/streaming/images/data-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/streaming/images/data-pipeline.png
--------------------------------------------------------------------------------
/streaming/kafka_connector/connect-timescaledb-sink.json :
--------------------------------------------------------------------------------
1 | {
2 | "name": "images-sink",
3 | "config": {
4 | "connector.class": "io.confluent.connect.jdbc.JdbcSinkConnector",
5 | "tasks.max": "1",
6 | "topics": "sink_images_0",
7 | "connection.url": "jdbc:postgresql://host.docker.internal:5432/k6",
8 | "connection.user": "k6",
9 | "connection.password": "k6",
10 | "auto.create": true
11 | }
12 | }
--------------------------------------------------------------------------------
/streaming/produce.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import io
3 | import json
4 | import os
5 | from datetime import datetime
6 | from time import sleep
7 | import random
8 |
9 | import numpy as np
10 | from bson import json_util
11 | from kafka import KafkaAdminClient, KafkaProducer
12 | from kafka.admin import NewTopic
13 |
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument(
16 | "-m",
17 | "--mode",
18 | default="setup",
19 | choices=["setup", "teardown"],
20 | help="Whether to setup or teardown a Kafka topic with driver stats events. Setup will teardown before beginning emitting events.",
21 | )
22 | parser.add_argument(
23 | "-b",
24 | "--bootstrap_servers",
25 | default="localhost:9092",
26 | help="Where the bootstrap server is",
27 | )
28 | parser.add_argument(
29 | "-c",
30 | "--schemas_path",
31 | default="./avro_schemas",
32 | help="Folder containing all generated avro schemas",
33 | )
34 | parser.add_argument(
35 | "-i",
36 | "--image_dir",
37 | default="./images",
38 | help="Directory containing the images to send",
39 | )
40 |
41 | args = parser.parse_args()
42 |
43 | image_id_counter = 1
44 |
45 | def create_topic(admin, topic_name):
46 | # Create topic if not exists
47 | try:
48 | # Create Kafka topic
49 | topic = NewTopic(name=topic_name, num_partitions=1, replication_factor=1)
50 | admin.create_topics([topic])
51 | print(f"A new topic {topic_name} has been created!")
52 | except Exception:
53 | print(f"Topic {topic_name} already exists. Skipping creation!")
54 | pass
55 |
56 |
57 | def create_streams(servers, schemas_path, image_dir):
58 | producer = None
59 | admin = None
60 | for _ in range(10):
61 | try:
62 | producer = KafkaProducer(bootstrap_servers=servers)
63 | admin = KafkaAdminClient(bootstrap_servers=servers)
64 | print("SUCCESS: instantiated Kafka admin and producer")
65 | break
66 | except Exception as e:
67 | print(
68 | f"Trying to instantiate admin and producer with bootstrap servers {servers} with error {e}"
69 | )
70 | sleep(10)
71 | pass
72 |
73 | image_files = [
74 | os.path.join(image_dir, f)
75 | for f in os.listdir(image_dir)
76 | if os.path.isfile(os.path.join(image_dir, f))
77 | ]
78 | image_index = 0
79 |
80 | while True:
81 | image_file = image_files[image_index]
82 | image_index = (image_index + 1) % len(image_files)
83 |
84 | with open(image_file, "rb") as img_file:
85 | image_data = img_file.read()
86 |
87 | record = {
88 | "schema": {
89 | "type": "struct",
90 | "fields": [
91 | {"type": "int64", "optional": False, "field": "image_id"},
92 | {"type": "bytes", "optional": False, "field": "image_data"},
93 | ],
94 | }
95 | }
96 | record["payload"] = {}
97 |
98 | record["payload"]["image_id"] = image_id_counter
99 | image_id_counter += 1 # tanc chi so image id
100 | record["payload"]["image_data"] = image_data
101 |
102 |
103 | # Get topic name for this image
104 | topic_name = f"image_0"
105 |
106 | # Create a new topic for this image if not exists
107 | create_topic(admin, topic_name=topic_name)
108 |
109 | # Send messages to this topic
110 | producer.send(
111 | topic_name, json.dumps(record, default=json_util.default).encode("utf-8")
112 | )
113 | print(record)
114 | sleep(2)
115 |
116 |
117 | def teardown_stream(topic_name, servers=["localhost:9092"]):
118 | try:
119 | admin = KafkaAdminClient(bootstrap_servers=servers)
120 | print(admin.delete_topics([topic_name]))
121 | print(f"Topic {topic_name} deleted")
122 | except Exception as e:
123 | print(str(e))
124 | pass
125 |
126 |
127 | if __name__ == "__main__":
128 | parsed_args = vars(args)
129 | mode = parsed_args["mode"]
130 | servers = parsed_args["bootstrap_servers"]
131 | image_dir = parsed_args["image_dir"]
132 |
133 | # Tear down all previous streams
134 | print("Tearing down all existing topics!")
135 | for image_id in range(NUM_IMAGE):
136 | try:
137 | teardown_stream(f"image_{image_id}", [servers])
138 | except Exception as e:
139 | print(f"Topic image_{image_id} does not exist. Skipping...!")
140 |
141 | if mode == "setup":
142 | schemas_path = parsed_args["schemas_path"]
143 | create_streams([servers], schemas_path, image_dir)
144 |
--------------------------------------------------------------------------------
/streaming/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cmd=$1
3 |
4 | usage() {
5 | echo "run.sh "
6 | echo "Available commands:"
7 | echo " register_connector register a new Kafka connector"
8 | echo " start_streaming start streaming to Kafka"
9 | echo " stop_streaming stop streaming to Kafka"
10 | echo "Available arguments:"
11 | echo " [connector config path] path to connector config, for command register_connector only"
12 | }
13 |
14 | if [[ -z "$cmd" ]]; then
15 | echo "Missing command"
16 | usage
17 | exit 1
18 | fi
19 |
20 | case $cmd in
21 | register_connector)
22 | if [[ -z "$2" ]]; then
23 | echo "Missing connector config path"
24 | usage
25 | exit 1
26 | else
27 | echo "Registering a new connector from $2"
28 | # Assign a connector config path such as: kafka_connect_jdbc/configs/connect-timescaledb-sink.json
29 | curl -s -X POST -H 'Content-Type: application/json' --data @$2 http://localhost:8083/connectors
30 | fi
31 | ;;
32 | generate_schemas)
33 | # Generate data for 1 device with number of features in the range from 2 to 10
34 | python generate_schemas.py --min_features 2 --max_features 10 --num_schemas 1
35 | ;;
36 | *)
37 | echo -n "Unknown command: $cmd"
38 | usage
39 | exit 1
40 | ;;
41 | esac
--------------------------------------------------------------------------------