├── .env
├── Jenkinsfile
├── README.md
├── api
    ├── README.md
    ├── images
    │   └── deployment_pipeline.png
    ├── triton_client.py
    └── upload_model_to_minio.py
├── constants.py
├── deployments
    ├── mwt.yaml
    ├── triton-isvc.yaml
    └── triton-servingruntime.yaml
├── distributed_training
    ├── Dockerfile
    ├── README.md
    ├── build.sh
    ├── images
    │   └── training_pipeline.png
    ├── mwt.py
    ├── nets
    │   └── nn.py
    ├── test
    │   └── test.yaml
    ├── utils
    │   ├── config.py
    │   ├── dataset.py
    │   └── image_utils.py
    └── weights
    │   └── model.h5
├── docker-compose.yml
├── images
    ├── PipelineAllcode.png
    ├── add_credential.png
    ├── add_credential_dockerhub.png
    ├── add_token_dockerhub.png
    ├── architecutre_overview.png
    ├── bus.jpg
    ├── check_request_github_jenkins.png
    ├── connector.png
    ├── data_pipeline.png
    ├── diagram_pipe.gif
    ├── error_log_pod.png
    ├── false_modelmesh_deploy.png
    ├── generate_token_docker_hub.png
    ├── get_token_github.png
    ├── github_tokens.png
    ├── instal_docker_jenkins.png
    ├── install_docker_success.png
    ├── isvc.png
    ├── jenkins_container.png
    ├── jenkins_portal.png
    ├── jenkins_ui.png
    ├── messenger.png
    ├── minio-credentials.png
    ├── mlflow _modelregistry.png
    ├── modelmesh-serving-installation.png
    ├── ngrok.png
    ├── ngrok_forwarding.png
    ├── password_jenkins.png
    ├── result.png
    ├── result_connect_jenkins_github.png
    ├── result_push_dockerhub.png
    ├── result_train_pod.png
    ├── strategy.png
    ├── strategy_scope.png
    ├── structure_data.png
    ├── structure_training.png
    ├── topic_tab.png
    ├── train_process.png
    ├── ui_build_jenkins.png
    ├── validate_connect_repo.png
    └── webhook_github.png
├── mlflow
    └── Dockerfile
├── model_repo
    └── yolov8n_car
    │   ├── 1
    │       └── model.onnx
    │   └── config.pbtxt
├── notebooks
    └── debug.ipynb
├── requirements.txt
└── streaming
    ├── Dockerfile
    ├── README.md
    ├── docker-compose.yml
    ├── images
        └── data-pipeline.png
    ├── kafka_connector
        └── connect-timescaledb-sink.json 
    ├── produce.py
    └── run.sh


/.env:
--------------------------------------------------------------------------------
1 | MINIO_ACCESS_KEY=AKIAIOSFODNN7EXAMPLE
2 | MINIO_SECRET_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | pipeline {
 2 |     agent any
 3 | 
 4 |     options{
 5 |         buildDiscarder(logRotator(numToKeepStr: '5', daysToKeepStr: '5'))
 6 |         timestamps()
 7 |     }
 8 | 
 9 |     environment{
10 |         registry = '6666688889/distributed_training'
11 |         registryCredential = 'dockerhub'      
12 |     }
13 | 
14 |     stages {
15 |         stage('Build') {
16 |             steps {
17 |                 script {
18 |                     echo 'Building image for deployment..'
19 |                     def dockerImage = docker.build("${registry}:${BUILD_NUMBER}", "\"Trainning Pipeline/train/.\"")
20 |                     echo 'Pushing image to dockerhub..'
21 |                     docker.withRegistry( '', registryCredential ) {
22 |                         dockerImage.push()
23 |                         dockerImage.push('latest')
24 |                     }
25 |                 }
26 |             }
27 |         }
28 |         // stage('Deploy') {
29 |         //     steps {
30 |         //         echo 'Deploying models..'
31 |         //         echo 'Running a script to trigger pull and start a docker container'
32 |         //     }
33 |         // }
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | #  Scalable ML System for Car Detection
  3 | 
  4 | ## 📕 Table Of Contents
  5 | - 🗣️ [Introduction](#Introduction)
  6 | - 🚀 [Challenge](#Challenge)
  7 | - 🌟 [System Architecture](#System-architecture)
  8 | - 📁 [Repository Structure](#repository-structure)
  9 | - 🔍 [How to Guide](#how-to-guide)
 10 | 
 11 | ## 🗣️ Introduction:
 12 | 
 13 | This project implements an advanced car detection system using a comprehensive machine learning pipeline. Our solution leverages state-of-the-art technologies to process data, train models, and deploy them efficiently at scale.
 14 | 
 15 | ## 🚀 **Challenge:**
 16 | This project faced several challenges including ensuring data consistency and scalability during ingestion, managing resources and synchronization in distributed training, automating CI/CD pipelines, converting and deploying models efficiently, ensuring data privacy and security, optimizing performance, and handling the complexities of debugging and troubleshooting in a distributed system.
 17 | 
 18 | 
 19 | ## 🌟 System Architecture
 20 | ![systemoverview](images/architecutre_overview.png)
 21 | 
 22 | The pipeline consists of two main components:
 23 | 
 24 | - **Data Pipeline**: This part of the system handles the ingestion, preprocessing, and feature extraction of car detection data. It includes steps like loading the dataset, performing preprocessing tasks, and extracting relevant features using tools like Apache Flink and Redis.
 25 | - **Training and Deployment Pipeline**: The training and deployment pipeline focuses on the model development and deployment processes. It includes steps like saving the trained model and artifacts, evaluating the model, and deploying the model using tools like MLflow, Jenkins, and Kubernetes.
 26 | 
 27 | 
 28 | **Key features of our pipeline include:**
 29 | 
 30 | - Data ingestion and preprocessing using Airflow and Kafka for stream processing
 31 | - Feature storage in Redis and an offline data store
 32 | - Distributed model training with TensorFlow on Kubeflow
 33 | - Model versioning and artifact management with MLflow
 34 | - Automated deployment pipeline using Jenkins and Kubernetes
 35 | - Scalable model serving with KServe API server
 36 | 
 37 | 
 38 | ## 📁 Repository Structure
 39 | ```
 40 | 📦
 41 | ├─ .env                      # Environment variables used across the project
 42 | ├─ Jenkinsfile               # Configuration for a Jenkins CI/CD pipeline
 43 | ├─ README.md                 # General project documentation
 44 | ├─ api                       # Contains code related to the API layer
 45 | │  ├─ README_serve.md        # Documentation for the API serving component
 46 | │  ├─ triton_client.py       # Code for interacting with the Triton Inference Server
 47 | │  └─ upload_model_to_minio.py # Script to upload the trained model to Minio storage
 48 | ├─ constants.py              # Shared constants and configurations used across the project
 49 | ├─ deployments               # Kubernetes configurations
 50 | │  ├─ mwt.yaml               # Configuration for the Multi-Worker Training (MWT) component
 51 | │  ├─ triton-isvc.yaml       # Configuration for the Triton Inference Service
 52 | │  └─ triton-servingruntime.yaml # Configuration for the Triton Inference Server runtime
 53 | ├─ distributed_training      # Code and configuration for distributed training
 54 | │  ├─ Dockerfile             # Dockerfile for the distributed training component
 55 | │  ├─ README_distributed.md  # Documentation for the distributed training component
 56 | │  ├─ build.sh               # Script to build the distributed training Docker image
 57 | │  ├─ mwt.py                 # Main logic for the Multi-Worker Training component
 58 | │  ├─ nets                   # Neural network architecture definitions
 59 | │  │  └─ nn.py               # Neural network model implementation
 60 | │  ├─ test                   # Test configuration for the distributed training
 61 | │  │  └─ test.yaml           # Test deployment configuration
 62 | │  ├─ utils                  # Utility functions for the distributed training
 63 | │  │  ├─ config.py           # Configuration handling for the distributed training
 64 | │  │  ├─ dataset.py          # Dataset-related utilities
 65 | │  │  └─ image_utils.py      # Image processing utilities
 66 | │  └─ weights                # Folder containing a pre-trained model
 67 | │     └─ model.h5            # Saved weights for the pre-trained model
 68 | ├─ docker-compose.yml        # Docker Compose configuration for the entire project
 69 | ├─ images                    # Folder for storing project-related images
 70 | ├─ mlflow                    # Code and configuration for the MLflow component
 71 | │  └─ Dockerfile             # Dockerfile for the MLflow component
 72 | ├─ model_repo                # Repository for storing the trained model
 73 | │  └─ yolov8n_car            # Folder for the YOLOv8 car detection model
 74 | │     ├─ 1                   # Version 1 of the model
 75 | │     │  └─ model.onnx       # ONNX format of the trained model
 76 | │     └─ config.pbtxt        # Triton Inference Server configuration for the model
 77 | ├─ notebooks                 # Folder for Jupyter Notebooks (likely for debugging/exploration)
 78 | │  └─ debug.ipynb            # Sample Jupyter Notebook for debugging
 79 | ├─ requirements.txt          # Python dependencies for the project
 80 | └─ streaming                 # Code and configuration for the data streaming component
 81 |    ├─ Dockerfile             # Dockerfile for the streaming component
 82 |    ├─ README_streaming.md    # Documentation for the streaming component
 83 |    ├─ docker-compose.yml     # Docker Compose configuration for the streaming component
 84 |    ├─ kafka_connector        # Configuration for the Kafka connector
 85 |    │  └─ connect-timescaledb-sink.json # Kafka connector configuration for TimescaleDB sink
 86 |    ├─ produce.py             # Script to produce sample data for the streaming component
 87 |    └─ run.sh                 # Script to run the streaming component
 88 | ```
 89 | 
 90 | ## 🔍 How to Guide:
 91 | 
 92 | ### 1. Data Pipeline:
 93 | - The data pipeline starts with the Car Detection Dataset Source.
 94 | - Images are loaded, preprocessed, and features are extracted using Airflow.
 95 | - We also use Kafka for stream processing of fake stream data, which is then processed by Apache Flink.
 96 | - Data is stored in Redis (online store) and synced to PostgreSQL (an offline store).
 97 | 
 98 | To get started with the *Data pipeline* component:
 99 | ```shell
100 | cd streaming
101 | ```
102 | 
103 | And read the respective README file: [Data Pipeline Guide](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/streaming/README.md)
104 | 
105 | ### 2. Training Pipeline:
106 | Our training pipeline utilizes Kubeflow and TensorFlow for distributed training. Here's an overview of the process:
107 | 
108 | 1. Data Preparation: Features are pulled from the offline store and prepared using Kubeflow.
109 | 
110 | 2. Distributed Training: We use TensorFlow for distributed training, which allows us to process large datasets efficiently across multiple nodes.
111 | 
112 | 3. Model Evaluation: After training, the model is evaluated to ensure it meets performance criteria.
113 | 
114 | 4. Artifact Management: The trained model and associated artifacts are saved to the MLflow model registry for versioning and easy retrieval.
115 | 
116 | Key features of our distributed training approach:
117 | - Scalability: Easily scale training across multiple nodes using Kubeflow.
118 | - Efficiency: Utilize TensorFlow's distributed training capabilities for faster processing.
119 | - Version Control: Track experiments and models using MLflow for reproducibility.
120 | 
121 | To get started with the training pipeline:
122 | 
123 | ```shell
124 | cd distributed_training
125 | ```
126 | 
127 | For detailed instructions on setting up and running the distributed training, please refer to our [Distributed Training Guide](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/distributed_training/README.md).
128 | 
129 | 
130 | Here's an updated version of the serving pipeline section in your README to match the style of the previous parts you've edited:
131 | 
132 | ### 3. Deployment Pipeline:
133 | The serving pipeline deploys the trained model for inference, ensuring that it can handle various workloads efficiently. Below are the key aspects of our serving approach:
134 | 
135 | 1. **Scalability**: ModelMesh scales the serving infrastructure dynamically to accommodate varying loads and large volumes of requests, ensuring reliable performance even under heavy demand.
136 | 
137 | 2. **Multi-Model Support**: ModelMesh can manage and serve multiple models simultaneously, providing flexibility in deployment strategies and enabling seamless model updates.
138 | 
139 | 3. **Efficient Resource Utilization**: By dynamically allocating resources based on the demand for different models, ModelMesh optimizes the use of computational resources, reducing costs and improving efficiency.
140 | 
141 | To get started with the serving pipeline:
142 | 
143 | ```shell
144 | cd api
145 | ```
146 | 
147 | For detailed instructions on setting up and managing the serving infrastructure, please refer to our [Deployment Pipeline Guide](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/api/README.md).


--------------------------------------------------------------------------------
/api/README.md:
--------------------------------------------------------------------------------
  1 | # Deployment Pipeline Guide
  2 | 
  3 | ## Table of Contents
  4 | 
  5 | 1. [Prerequisites](#prerequisites)
  6 |    - [Install kustomize](#install-kustomize)
  7 |    - [Install modelmesh-serving](#install-modelmesh-serving)
  8 | 2. [Deployment Pipeline Overview](deployment-pipeline-overview)
  9 | 3. [Getting Started](#getting-started)
 10 | 4. [Making Prediction](#making-prediction)
 11 | 
 12 | ---
 13 | 
 14 | ## Prerequisites
 15 | 
 16 | Before getting started, ensure that your environment meets the following prerequisites:
 17 | 
 18 | - GKE Version: Use GKE version 1.29
 19 | 
 20 | ### Install kustomize
 21 | 
 22 | [Kustomize](https://kubectl.docs.kubernetes.io/) is an alternative tool to Helm for installing applications on Kubernetes. Install it by running the following commands:
 23 | 
 24 | ```shell
 25 | curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
 26 | sudo mv kustomize /usr/local/bin/
 27 | ```
 28 | 
 29 | ### Install modelmesh-serving
 30 | 
 31 | Clone the modelmesh-serving repository:
 32 | 
 33 | ```shell
 34 | RELEASE=release-0.9
 35 | git clone -b $RELEASE --depth 1 --single-branch https://github.com/kserve/modelmesh-serving.git
 36 | cd modelmesh-serving
 37 | ```
 38 | 
 39 | Create a new namespace and install modelmesh-serving:
 40 | 
 41 | ```shell
 42 | kubectl create namespace modelmesh-serving
 43 | ./scripts/install.sh --namespace modelmesh-serving --quickstart
 44 | 
 45 | ```
 46 | 
 47 | After a few minutes, you should see the following output:
 48 | 
 49 | ![modelmesh-serving](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/modelmesh-serving-installation.png)
 50 | 
 51 | ## Deployment Pipeline Overview
 52 | 
 53 | The following diagram provides an overview of the deployment pipeline, detailing each step from model optimization to deployment and scaling.
 54 | 
 55 | ![deploymentOverview](images/deployment_pipeline.png)
 56 | 
 57 | ### Key Components:
 58 | 
 59 | 1. Model Optimization (ONNX):
 60 |     - Optimizes the model for serving, converting it into ONNX format.
 61 | 
 62 | 2. Model Testing:
 63 | 
 64 |     - Runs tests to ensure that the optimized model meets the necessary performance and accuracy criteria.
 65 | 
 66 | 3. Runtime Containerization:
 67 | 
 68 |     - Packages the model into a containerized runtime environment.
 69 | 
 70 | 4. Ingest Serving-Model to S3:
 71 | 
 72 |     - The containerized model is uploaded to an S3-compatible storage, such as MinIO.
 73 | 5. Deployment and Scaling:
 74 | 
 75 |     - The model is deployed and scaled using Kubernetes (K8s), managed through `kubectl`.
 76 | 
 77 | 6. Model Serving API:
 78 | 
 79 |     - The deployed model is accessible via an API, allowing users to make predictions.
 80 | 
 81 | ## Getting Started
 82 | 
 83 | ### Port-forward the `MinIO` Service
 84 | 
 85 | To access MinIO locally, use the following command:
 86 | 
 87 | ```shell
 88 | kubectl port-forward svc/minio 9000:9000 -n modelmesh-serving
 89 | ```
 90 | 
 91 | ### Access MinIO Credentials
 92 | 
 93 | Obtain the `MINIO_ACCESS_KEY` and `MINIO_SECRET_KEY` required to sign in and upload your models to MinIO:
 94 | 
 95 | ```shell
 96 | kubectl get po minio-676b8dcf45-nw2zw -o json | jq -r '.spec.containers[0].env[] | select(.name == "MINIO_ACCESS_KEY") | .value'
 97 | 
 98 | kubectl get po minio-676b8dcf45-nw2zw -o json | jq -r '.spec.containers[0].env[] | select(.name == "MINIO_SECRET_KEY") | .value'
 99 | ```
100 | 
101 | You can see that in my case, `MINIO_ACCESS_KEY` is `AKIAIOSFODNN7EXAMPLE`, and `MINIO_SECRET_KEY` is `wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY`.
102 | 
103 | ![minio-credentials](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/minio-credentials.png).
104 | 
105 | Access localhost:9000 to open MINIO and upload the model to the MINIO bucket. The structure for storing our ONNX model and the config.pbtxt file should look like this. Remember to use the model we trained in the previous step for serving. The format should be ONNX, so please convert the weight file to ONNX before uploading it to the bucket
106 | 
107 | ![Screenshot from 2024-05-11 17-01-13](https://github.com/HungNguyenDev1511/Capstone-Project-Model-Serving/assets/69066161/adc4b65c-a51c-4e64-9a1a-377f680810ed)
108 | 
109 | ![Screenshot from 2024-05-11 17-01-19](https://github.com/HungNguyenDev1511/Capstone-Project-Model-Serving/assets/69066161/8461cdc0-1fcd-491e-9b24-8d8d9b5bfc58)
110 | 
111 | 
112 | ### Upload Model to MinIO:
113 | 
114 | You can manually upload the model or use the following script:
115 | ``` shell
116 | python api/upload_model_to_minio.py
117 | ```
118 | 
119 | ### Deploy the ONNX Model:
120 | 
121 | Deploy the model using the following commands:
122 | 
123 | ```shell
124 | kubectl get p
125 | kubectl apply -f deployments/triton-isvc.yaml
126 | kubectl apply -f deployments/triton-servingruntime.yaml
127 | ```
128 | 
129 | ### Verify the Service Readiness:
130 | 
131 | Check if the service is ready:
132 | 
133 | ```shell
134 | kubectl get isvc
135 | ```
136 | 
137 | You can see that is `false`:
138 | 
139 | ![Error](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/false_modelmesh_deploy.png)
140 | 
141 | It should take several minutes for our service to become READY. 
142 | 
143 | If it doesn’t, please check the logs of the `mm` container in the pod corresponding to triton using the following command to check logs:
144 | 
145 | ```shell
146 | kubectl describe pod modelmesh-serving-triton-2.x-6c4978d6db-5k59z
147 | ```
148 | 
149 | ![Error Log Pod Describe](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/error_log_pod.png)
150 | 
151 | 
152 | Once the service is ready, you should see the following result:
153 | 
154 | ![Result](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/result.png)
155 | ![Result Inference Service](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/isvc.png)
156 | 
157 | 
158 | ## Making Prediction:
159 | 
160 | To make a prediction, follow these steps:
161 | 
162 | 1. Port-forward `modelmesh-serving` service
163 |     ```shell
164 |     kubectl port-forward --address 0.0.0.0 service/modelmesh-serving 8008 -n modelmesh-serving
165 |     ```
166 | 2. Test your newly created modelmesh-serving service
167 |     ```shell
168 |     python api/triton_client.py
169 |     ```
170 | 


--------------------------------------------------------------------------------
/api/images/deployment_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/api/images/deployment_pipeline.png


--------------------------------------------------------------------------------
/api/triton_client.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import numpy as np
  4 | import wget
  5 | import os
  6 | import cv2
  7 | from constants import CLASSES  # Import danh sách CLASSES từ file constants.py
  8 | 
  9 | 
 10 | def preprocess(cv2_image, model_shape=(640, 640)):
 11 |     image_rgb = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
 12 |     resized = cv2.resize(image_rgb, model_shape)
 13 | 
 14 |     # Scale input pixel value to 0 to 1
 15 |     input_image = resized / 255.0
 16 |     input_image = input_image.transpose(2, 0, 1)
 17 |     result = input_image[np.newaxis, :, :, :].astype(np.float32)
 18 | 
 19 |     return result
 20 | 
 21 | 
 22 | def xywh2xyxy(x):
 23 |     # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
 24 |     y = np.copy(x)
 25 |     y[..., 0] = x[..., 0] - x[..., 2] / 2
 26 |     y[..., 1] = x[..., 1] - x[..., 3] / 2
 27 |     y[..., 2] = x[..., 0] + x[..., 2] / 2
 28 |     y[..., 3] = x[..., 1] + x[..., 3] / 2
 29 |     return y
 30 | 
 31 | 
 32 | def nms(boxes, scores, iou_threshold):
 33 |     # Sort by score
 34 |     sorted_indices = np.argsort(scores)[::-1]
 35 | 
 36 |     keep_boxes = []
 37 |     while sorted_indices.size > 0:
 38 |         # Pick the last box
 39 |         box_id = sorted_indices[0]
 40 |         keep_boxes.append(box_id)
 41 | 
 42 |         # Compute IoU of the picked box with the rest
 43 |         ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
 44 | 
 45 |         # Remove boxes with IoU over the threshold
 46 |         keep_indices = np.where(ious < iou_threshold)[0]
 47 | 
 48 |         sorted_indices = sorted_indices[keep_indices + 1]
 49 | 
 50 |     return keep_boxes
 51 | 
 52 | 
 53 | def compute_iou(box, boxes):
 54 |     # Compute xmin, ymin, xmax, ymax for both boxes
 55 |     xmin = np.maximum(box[0], boxes[:, 0])
 56 |     ymin = np.maximum(box[1], boxes[:, 1])
 57 |     xmax = np.minimum(box[2], boxes[:, 2])
 58 |     ymax = np.minimum(box[3], boxes[:, 3])
 59 | 
 60 |     # Compute intersection area
 61 |     intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
 62 | 
 63 |     # Compute union area
 64 |     box_area = (box[2] - box[0]) * (box[3] - box[1])
 65 |     boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
 66 |     union_area = box_area + boxes_area - intersection_area
 67 | 
 68 |     # Compute IoU
 69 |     iou = intersection_area / union_area
 70 | 
 71 |     return iou
 72 | 
 73 | 
 74 | def postprocess(outputs, original_shape, model_shape=(640, 640), threshold=0.8):
 75 |     model_height, model_width = model_shape
 76 |     original_height, original_width = original_shape[:2]
 77 |     outputs = np.array(outputs[0]["data"]).reshape(outputs[0]["shape"])
 78 |     predictions = np.squeeze(outputs).T
 79 | 
 80 |     # Filter out object confidence scores below threshold
 81 |     scores = np.max(predictions[:, 4:], axis=1)
 82 |     predictions = predictions[scores > threshold, :]
 83 |     scores = scores[scores > threshold]
 84 |     class_ids = np.argmax(predictions[:, 4:], axis=1)
 85 | 
 86 |     # Get bounding boxes for each object
 87 |     bboxes = predictions[:, :4]
 88 | 
 89 |     # Rescale bboxes
 90 |     model_shape = np.array([model_width, model_height, model_width, model_height])
 91 |     original_shape = np.array(
 92 |         [original_width, original_height, original_width, original_height]
 93 |     )
 94 |     bboxes = np.divide(bboxes, model_shape, dtype=np.float32)
 95 |     bboxes *= original_shape
 96 |     bboxes = bboxes.astype(np.int32)
 97 | 
 98 |     # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
 99 |     indices = nms(bboxes, scores, 0.3)
100 | 
101 |     return bboxes[indices], scores[indices], class_ids[indices]
102 | 
103 | 
104 | def draw_image(image, bboxes, scores, class_ids):
105 |     image_draw = image.copy()
106 |     for bbox, score, label in zip(xywh2xyxy(bboxes), scores, class_ids):
107 |         bbox = bbox.round().astype(np.int32).tolist()
108 |         cls_id = int(label)
109 |         cls = CLASSES[cls_id]
110 |         color = (0, 255, 0)
111 |         cv2.rectangle(image_draw, tuple(bbox[:2]), tuple(bbox[2:]), color, 2)
112 |         cv2.putText(
113 |             image_draw,
114 |             f"{cls}:{int(score*100)}",
115 |             (bbox[0], bbox[1] - 2),
116 |             cv2.FONT_HERSHEY_SIMPLEX,
117 |             0.60,
118 |             [225, 255, 255],
119 |             thickness=1,
120 |         )
121 |         cv2.imwrite("drawed.jpg", image_draw)
122 | 
123 | 
124 | def main():
125 |     image_url = "https://ultralytics.com/images/bus.jpg"
126 |     image_name = os.path.basename(image_url)
127 |     if not os.path.exists(image_name):
128 |         wget.download(image_url)
129 | 
130 |     original_image = cv2.imread(image_name)
131 |     image = preprocess(original_image)
132 | 
133 |     request_data = {
134 |         "inputs": [
135 |             {
136 |                 "name": "images",
137 |                 "shape": image.shape,
138 |                 "datatype": "FP32",
139 |                 "data": image.flatten().tolist(),  # Flatten the image and convert to list
140 |             }
141 |         ]
142 |     }
143 | 
144 |     headers = {
145 |         "Content-Type": "application/json",  # Change content type to JSON
146 |     }
147 | 
148 |     response = requests.post(
149 |         "http://localhost:8008/v2/models/onnx/infer",
150 |         headers=headers,
151 |         data=json.dumps(request_data),
152 |         verify=False,
153 |     ).json()
154 | 
155 |     result = response["outputs"]
156 |     bboxes, scores, class_ids = postprocess(result, original_image.shape)
157 |     print(bboxes)
158 |     print(scores)
159 |     print(class_ids)
160 |     draw_image(
161 |         original_image, bboxes, scores, class_ids
162 |     )  # Use DRAWED_PATH instead of "drawed.jpg"
163 | 
164 | 
165 | if __name__ == "__main__":
166 |     main()
167 | 


--------------------------------------------------------------------------------
/api/upload_model_to_minio.py:
--------------------------------------------------------------------------------
 1 | from minio import Minio
 2 | from minio.error import S3Error
 3 | from dotenv import load_dotenv
 4 | import os
 5 | 
 6 | def main():
 7 |     # Create a client with the MinIO server playground, its access key
 8 |     # and secret key.
 9 |     client = Minio(
10 |         "http://localhost:9000",
11 |         access_key=os.getenv("MINIO_ACCESS_KEY"),
12 |         secret_key=os.getenv("MINIO_SECRET_KEY"),
13 |     )
14 | 
15 |     # Make 'modelmesh-models' bucket if not exist.
16 |     bucket_name = "modelmesh-models"
17 |     found = client.bucket_exists(bucket_name)
18 |     if not found:
19 |         client.make_bucket(bucket_name)
20 |     else:
21 |         print(f"Bucket {bucket_name} already exists")
22 | 
23 |     # Upload './models/mnist-svm.joblib' (or whatever)
24 |     # as object name to our newly created bucket 'modelmesh-models'.
25 |     client.fput_object(
26 |         bucket_name=bucket_name,
27 |         file_path=f"./model_repo/yolov8n_car/",
28 |     )
29 |     print(f"Model and config are successfully uploaded to bucket '{bucket_name}'.")
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     try:
34 |         main()
35 |     except S3Error as exc:
36 |         print("error occurred.", exc)
37 | 


--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
 1 | # constants.py
 2 | CLASSES = [
 3 |     "person",
 4 |     "bicycle",
 5 |     "car",
 6 |     "motorcycle",
 7 |     "airplane",
 8 |     "bus",
 9 |     "train",
10 |     "truck",
11 |     "boat",
12 |     "traffic light",
13 |     "fire hydrant",
14 |     "street sign",
15 |     "stop sign",
16 |     "parking meter",
17 |     "bench",
18 |     "bird",
19 |     "cat",
20 |     "dog",
21 |     "horse",
22 |     "sheep",
23 |     "cow",
24 |     "elephant",
25 |     "bear",
26 |     "zebra",
27 |     "giraffe",
28 |     "hat",
29 |     "backpack",
30 |     "umbrella",
31 |     "shoe",
32 |     "eye glasses",
33 |     "handbag",
34 |     "tie",
35 |     "suitcase",
36 |     "frisbee",
37 |     "skis",
38 |     "snowboard",
39 |     "sports ball",
40 |     "kite",
41 |     "baseball bat",
42 |     "baseball glove",
43 |     "skateboard",
44 |     "surfboard",
45 |     "tennis racket",
46 |     "bottle",
47 |     "plate",
48 |     "wine glass",
49 |     "cup",
50 |     "fork",
51 |     "knife",
52 |     "spoon",
53 |     "bowl",
54 |     "banana",
55 |     "apple",
56 |     "sandwich",
57 |     "orange",
58 |     "broccoli",
59 |     "carrot",
60 |     "hot dog",
61 |     "pizza",
62 |     "donut",
63 |     "cake",
64 |     "chair",
65 |     "couch",
66 |     "potted plant",
67 |     "bed",
68 |     "mirror",
69 |     "dining table",
70 |     "window",
71 |     "desk",
72 |     "toilet",
73 |     "door",
74 |     "tv",
75 |     "laptop",
76 |     "mouse",
77 |     "remote",
78 |     "keyboard",
79 |     "cell phone",
80 |     "microwave",
81 |     "oven",
82 |     "toaster",
83 |     "sink",
84 |     "refrigerator",
85 |     "blender",
86 |     "book",
87 |     "clock",
88 |     "vase",
89 |     "scissors",
90 |     "teddy bear",
91 |     "hair drier",
92 |     "toothbrush",
93 | ]


--------------------------------------------------------------------------------
/deployments/mwt.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: kubeflow.org/v1
 2 | kind: TFJob
 3 | metadata:
 4 |   name: multi-worker
 5 |   namespace: distributed-training
 6 | spec:
 7 |   tfReplicaSpecs:
 8 |     Worker:
 9 |       replicas: 2
10 |       restartPolicy: Never
11 |       template:
12 |         spec:
13 |           containers:
14 |             - name: tensorflow
15 |               image: 6666688889/distributed_training:0.0.13
16 |               volumeMounts:
17 |                 - mountPath: /train
18 |                   name: training
19 |                   readOnly: true
20 |           volumes:
21 |             - name: training
22 |               persistentVolumeClaim:
23 |                 claimName: mwt-volume
24 |                 readOnly: true
25 | ---
26 | apiVersion: v1
27 | kind: PersistentVolumeClaim
28 | metadata:
29 |   name: mwt-volume
30 |   namespace: distributed-training
31 | spec:
32 |   accessModes:
33 |   - ReadWriteMany
34 |   resources:
35 |     requests:
36 |       storage: 10Gi


--------------------------------------------------------------------------------
/deployments/triton-isvc.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: serving.kserve.io/v1beta1
 2 | kind: InferenceService
 3 | metadata:
 4 |   name: cardetection-mm
 5 |   namespace: modelmesh-serving
 6 |   annotations:
 7 |     serving.kserve.io/deploymentMode: ModelMesh
 8 |     serving.kserve.io/secretKey: localMinIO
 9 | spec:
10 |   predictor:
11 |     model:
12 |       modelFormat:
13 |         name: onnx
14 |       runtime: triton-2.x
15 |       storageUri: s3://modelmesh-example-models/cardetect/yolov8n_car


--------------------------------------------------------------------------------
/deployments/triton-servingruntime.yaml:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 IBM Corporation
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | apiVersion: serving.kserve.io/v1alpha1
 15 | kind: ServingRuntime
 16 | metadata:
 17 |   name: triton-2.x
 18 |   labels:
 19 |     name: modelmesh-serving-triton-2.x-SR
 20 |   annotations:
 21 |     maxLoadingConcurrency: "2"
 22 |     serving.kserve.io/autoscalerClass: hpa
 23 |     serving.kserve.io/targetUtilizationPercentage: "75"
 24 |     serving.kserve.io/metrics: "cpu"
 25 |     serving.kserve.io/min-scale: "2"
 26 |     serving.kserve.io/max-scale: "3"
 27 | spec:
 28 |   supportedModelFormats:
 29 |     - name: keras
 30 |       version: "2" # 2.6.0
 31 |       autoSelect: true
 32 |     - name: onnx
 33 |       version: "1" # 1.5.3
 34 |       autoSelect: true
 35 |     - name: pytorch
 36 |       version: "1" # 1.8.0a0+17f8c32
 37 |       autoSelect: true
 38 |     - name: tensorflow
 39 |       version: "1" # 1.15.4
 40 |       autoSelect: true
 41 |     - name: tensorflow
 42 |       version: "2" # 2.3.1
 43 |       autoSelect: true
 44 |     - name: tensorrt
 45 |       version: "7" # 7.2.1
 46 |       autoSelect: true
 47 | 
 48 |   protocolVersions:
 49 |     - grpc-v2
 50 |   multiModel: true
 51 |   replicas: 1
 52 |   grpcEndpoint: "port:8085"
 53 |   grpcDataEndpoint: "port:8001"
 54 | 
 55 |   containers:
 56 |     - name: triton
 57 |       image: nvcr.io/nvidia/tritonserver:23.09-py3
 58 |       command: [/bin/sh]
 59 |       args:
 60 |         - -c
 61 |         - 'mkdir -p /models/_triton_models;
 62 |           chmod 777 /models/_triton_models;
 63 |           exec tritonserver
 64 |           "--model-repository=/models/_triton_models"
 65 |           "--model-control-mode=explicit"
 66 |           "--strict-model-config=false"
 67 |           "--strict-readiness=false"
 68 |           "--allow-http=true"
 69 |           "--allow-sagemaker=false"
 70 |           '
 71 |       resources:
 72 |         requests:
 73 |           cpu: 500m
 74 |           memory: 1Gi
 75 |         limits:
 76 |           cpu: "5"
 77 |           memory: 1Gi
 78 |       livenessProbe:
 79 |         # the server is listening only on 127.0.0.1, so an httpGet probe sent
 80 |         # from the kublet running on the node cannot connect to the server
 81 |         # (not even with the Host header or host field)
 82 |         # exec a curl call to have the request originate from localhost in the
 83 |         # container
 84 |         exec:
 85 |           command:
 86 |             - curl
 87 |             - --fail
 88 |             - --silent
 89 |             - --show-error
 90 |             - --max-time
 91 |             - "9"
 92 |             - http://localhost:8000/v2/health/live
 93 |         initialDelaySeconds: 5
 94 |         periodSeconds: 30
 95 |         timeoutSeconds: 10
 96 |   builtInAdapter:
 97 |     serverType: triton
 98 |     runtimeManagementPort: 8001
 99 |     memBufferBytes: -134217728
100 |     modelLoadingTimeoutMillis: 90000


--------------------------------------------------------------------------------
/distributed_training/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:2.12.0
 2 | 
 3 | # Update the package list
 4 | RUN apt-get update
 5 | 
 6 | # Install necessary packages
 7 | RUN apt install -y libgl1-mesa-glx
 8 | 
 9 | # Install Python dependencies
10 | RUN pip install opencv-python-headless==4.5.3.56
11 | RUN pip install mlflow==2.14.1
12 | RUN pip install tqdm
13 | 
14 | # Copy the application code to the container
15 | COPY . /app
16 | 
17 | # Set the working directory
18 | WORKDIR /app
19 | 
20 | # Run mwt.py with the --train argument
21 | CMD ["python", "mwt.py", "--train"]
22 | 


--------------------------------------------------------------------------------
/distributed_training/README.md:
--------------------------------------------------------------------------------
  1 | # Distributed Training Pipeline
  2 | 
  3 | ## Overview:
  4 | 
  5 | This pipeline leverages a combination of Redis for online feature storage, PostgreSQL for offline storage, TensorFlow for distributed training, and MLflow for model tracking and registry. Kubeflow orchestrates the entire process, ensuring a seamless flow from data preparation to model deployment.
  6 | 
  7 | ## Table of Contents
  8 | 
  9 | - [Dataset Preparation](#dataset-preparation)
 10 | - [Deploying Multi-Worker Training Jobs](#deploying-multi-worker-training-jobs)
 11 | - [Monitoring and Investigating Models](#monitoring-and-investigating-models)
 12 | - [Running MLflow with Docker Compose:](#running-mlflow-with-docker-compose)
 13 | - [Important Considerations](#important-considerations)
 14 | - [Integrating Jenkins for Continuous Integration](#integrating-jenkins-for-continuous-integration)
 15 | - [References](#references)
 16 | 
 17 | ## Dataset Preparation:
 18 | 
 19 | Begin by downloading the dataset required for the training job from the following link: [Download Dataset](https://drive.google.com/drive/folders/12ncEAoWT_kwuPT8YRdFysqgS54XJwre7?usp=drive_link). The folder structure should resemble the following:
 20 | 
 21 | <div align="center">
 22 |   <img src="https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/structure_training.png" alt="Training Job Structure">
 23 | </div>
 24 | 
 25 | ## Deploying Multi-Worker Training Jobs
 26 | 
 27 | To deploy multi-worker training jobs, apply the configuration using Kubernetes:
 28 | 
 29 | ``` shell
 30 | kubectl apply -f deployments/mwt.yaml
 31 | ```
 32 | 
 33 | ## Monitoring and Investigating Models
 34 | 
 35 | To monitor the training process and inspect the models, update the `persistentVolumeClaim` in the `tests/nginx.yaml` file:
 36 | 
 37 | ```shell
 38 | kubectl apply -f tests/nginx.yaml
 39 | ```
 40 | 
 41 | This setup creates a pod that shares a volume with other training pods, allowing them to write and read from a common source. This shared volume facilitates easy access to logs and other critical data.
 42 | 
 43 | You can access the pod to check and read logs using the following command:
 44 | 
 45 | ```shell
 46 | kubectl exec -ti nginx bash
 47 | ```
 48 | 
 49 | ## Running MLflow with Docker Compose
 50 | 
 51 | For a proof-of-concept (POC) or limited resource environments, you can opt to run the MLflow service using Docker:
 52 | 
 53 | ```shell
 54 | docker compose -f docker-compose.yml up --d --build
 55 | ```
 56 | 
 57 | ## Important Considerations:
 58 | 
 59 | 👉 If multiple GPUs are not available, consider using an alternative strategy, as illustrated below:
 60 | 
 61 | ![Strategy Scope](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/strategy.png)
 62 | 
 63 | 👉 Customize the script to run the training job according to your requirements. If the job fails, you can diagnose the issue by checking the pod error logs:
 64 | 
 65 |  ```shell
 66 |     kubectl get TFjob
 67 |  ```
 68 | 
 69 |  Please check the pod error log and fix it.
 70 | ![Result Train ](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/result_train_pod.png) 
 71 | 
 72 | 👉 In your training script, ensure the model definition and dataset loading are encapsulated within the strategy scope:
 73 | 
 74 | ![Strategy Scope](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/strategy_scope.png)
 75 | 
 76 | 👉 To monitor the training process, you can exec into the pod or container (if using Docker) to observe the training job in real-time:
 77 | 
 78 | ![Train Process](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/train_process.png)
 79 | 
 80 | 👉 The trained model versions will be stored and managed in MLflow:
 81 | 
 82 | ![Result](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/mlflow%20_modelregistry.png)
 83 | 
 84 | 
 85 | ## Integrating Jenkins for Continuous Integration
 86 | 
 87 | For automated retraining when new data is available, you can integrate Jenkins into your CI/CD pipeline.
 88 | 
 89 | 1. Install Ngrok: 
 90 | 
 91 | ```shell
 92 |   curl -s https://ngrok-agent.s3.amazonaws.com/ngrok.asc 
 93 |   sudo tee /etc/apt/trusted.gpg.d/ngrok.asc >/dev/null
 94 |   echo "deb https://ngrok-agent.s3.amazonaws.com buster main" 
 95 |   sudo tee /etc/apt/sources.list.d/ngrok.list 
 96 |   sudo apt update  
 97 |   sudo apt install ngrok
 98 | ```
 99 | 
100 |   
101 | 2. Test Ngrok Installation: Run `ngrok` in the terminal to verify the installation:
102 | 
103 | ![CurlNgrok](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/ngrok.png)
104 | 
105 | 3. Retrieve Jenkins Password: Access Jenkins by retrieving the password as shown below:
106 | 
107 | ![JenkinsPassword](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/password_jenkins.png)
108 | 
109 | 4. Configure Jenkins:
110 | 
111 | - Open browser `localhost:8081` to open `Jenkins` -> `Manage Jenkins` -> `Plugins and Type` : `Docker Pipeline` and `Docker` and choose `Install without start` 
112 |   ![JenkinsPlugin](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/instal_docker_jenkins.png)
113 | - Install necessary plugins like `Docker Pipeline` and `Docker`.
114 |   ![DowloadPlugin](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/install_docker_success.png)
115 | 
116 | 5. Expose Jenkins with Ngrok: 
117 | 
118 |   - Run `ngrok http 8081` to expose Jenkins:
119 |   ![NgrokForwardingPort](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/ngrok_forwarding.png)
120 | 
121 | 6. Set Up GitHub Webhook:
122 | 
123 | - Open your Github repository: In this case is Capstone-Model-Serving-pipeline -> go to `Settings` of repository -> `Webhook` -> `Add Webhook` and paste the Forwarding url in step above to Payload Url and concat "/github-webhook/", Content Type: choose `Applycation/json`. In the part "Which events would you like to trigger this webhook" choose `Push` and `Pull`. Finally, wait for the webhook status to show a green mark, indicating that it is working correctly
124 | 
125 |   ![WebhookGithub](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/webhook_github.png)
126 | 
127 | - Check the connection. If Jenkins is successfully connected to GitHub, it will appear like this in the GitHub UI (with a green mark on the webhook)
128 | 
129 |   ![Webhookconnect](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/result_connect_jenkins_github.png)
130 | 
131 | 7. Configure Jenkins Multibranch Pipeline:
132 | 
133 | - Back to Jenkins -> choose `Dashboard` -> `New Item` then enter the name of your project and choose `Multibranch Pipeline` and `OK`
134 | 
135 | - Add name Project -> `Branch Source` and `Add Source` you choose Github 
136 | 
137 |   ![UiConnectToRepository](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/add_credential.png)
138 | 
139 | - In `Github Credential` -> Choose the Project Name you create above -> and Type the User Name of your Github Account store the Repository (Model-mesh-serving-pipeline blabla ) and in The Password -> Back to your Github Repository -> `Developer settings` -> Personal access tokens then choose `Token classic` -> Generate a New token classic and choose all options for a demo with no error copy the token generated to `Jenkins Password` and `Add`
140 | 
141 |   ![TokenGithub](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/github_tokens.png)
142 | 
143 | - Copy the repository we are working on using the repository's HTTPS URL
144 | - Check all information, `Validate it`, and if everything looks correct, then `Save`
145 | 
146 |   ![Validate](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/validate_connect_repo.png)
147 | 
148 | - Choose the `Credential` and then choose the `Scope of our project` and `Add the project credential create a new Credential` -> in Username you type the user of DockerHub
149 |   ![UiDockerhub](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/add_credential_dockerhub.png)
150 | 
151 | 
152 | - For the Password: Go to DockerHub (where you store your Docker images) then navigate to `Account Setting` -> `Security` -> Generate new token. Copy this token and paste it into Jenkins credentials, using 'dockerhub' as the ID. 
153 | 
154 |   ![TokenDockerhub](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/generate_token_docker_hub.png)
155 | 
156 | - Choose `Manage Jenkins` -> `System and go to Github part` -> In Github API usage rate limiting strategy -> Never check rate limit (NOT RECOMMENDED) and `Save` 
157 | - Finally, go to the repository in Jenkins -> `Configure and Github Credential` Select the Github Credential you created in step above then `Save` 
158 | - Click `Scan Repository Now` to check if all connections are correct. If they are not, restart Jenkins and try again
159 | 
160 | - The result of the build on Jenkins will look like this
161 |   ![JenkinsBuild](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/ui_build_jenkins.png)
162 | 
163 | - As you can see, the application version will increase
164 |   ![Version](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/result_push_dockerhub.png)
165 | 
166 | # References
167 | 
168 | For more information, please take a look at examples [here](https://github.com/kubeflow/training-operator/tree/master/examples) and [here](https://github.com/kubeflow/examples/tree/master/github_issue_summarization).
169 | 
170 | Some other useful examples:
171 | - https://henning.kropponline.de/2017/03/19/distributing-tensorflow/
172 | - https://www.cs.cornell.edu/courses/cs4787/2019sp/notes/lecture22.pdf
173 | - https://web.eecs.umich.edu/~mosharaf/Readings/Parameter-Server.pdf
174 | - https://s3.us.cloud-object-storage.appdomain.cloud/developer/default/series/os-kubeflow-2020/static/kubeflow06.pdf
175 | - https://xzhu0027.gitbook.io/blog/ml-system/sys-ml-index/parameter-servers
176 | - http://www.juyang.co/distributed-model-training-ii-parameter-server-and-allreduce/
177 | 


--------------------------------------------------------------------------------
/distributed_training/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | IMAGE=6666688889/distributed_training:0.0.13
3 | docker build -t $IMAGE .
4 | docker push $IMAGE


--------------------------------------------------------------------------------
/distributed_training/images/training_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/distributed_training/images/training_pipeline.png


--------------------------------------------------------------------------------
/distributed_training/mwt.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import multiprocessing
  3 | import os
  4 | import sys
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | 
  9 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 10 | os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
 11 | 
 12 | import tensorflow as tf
 13 | import tqdm
 14 | 
 15 | from nets import nn
 16 | from utils import config
 17 | from distributed_trainning.utils import image_utils
 18 | from utils.dataset import input_fn, DataLoader
 19 | import posixpath
 20 | 
 21 | np.random.seed(12345)
 22 | tf.random.set_seed(12345)
 23 | 
 24 | tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
 25 | 
 26 | import mlflow
 27 | import mlflow.tensorflow
 28 | 
 29 | # Set the MLflow tracking URI
 30 | mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI", "http://mlflow:5000"))
 31 | 
 32 | 
 33 | def train():
 34 |     strategy = tf.distribute.MultiWorkerMirroredStrategy()
 35 | 
 36 |     image_path = posixpath.join(config.data_dir, config.image_dir, "train")
 37 |     label_path = posixpath.join(config.data_dir, config.label_dir, "train")
 38 | 
 39 |     image_files = [
 40 |         os.path.splitext(file_name)[0]
 41 |         for file_name in os.listdir(image_path)
 42 |         if file_name.lower().endswith(".jpg")
 43 |     ]
 44 |     label_files = [
 45 |         os.path.splitext(file_name)[0]
 46 |         for file_name in os.listdir(label_path)
 47 |         if file_name.lower().endswith(".txt")
 48 |     ]
 49 | 
 50 |     file_names = list(set(image_files) & set(label_files))
 51 | 
 52 |     steps = len(file_names) // config.batch_size
 53 |     if os.path.exists(os.path.join(config.data_dir, "TF")):
 54 |         dataset = DataLoader().input_fn(file_names)
 55 |     else:
 56 |         dataset = input_fn(file_names)
 57 |     dataset = strategy.experimental_distribute_dataset(dataset)
 58 | 
 59 |     with strategy.scope():
 60 |         model = nn.build_model()
 61 |         model.summary()
 62 |         optimizer = tf.keras.optimizers.Adam(nn.CosineLR(steps), 0.937)
 63 | 
 64 |     with strategy.scope():
 65 |         loss_object = nn.ComputeLoss()
 66 | 
 67 |         def compute_loss(y_true, y_pred):
 68 |             total_loss = loss_object(y_pred, y_true)
 69 |             return tf.reduce_sum(total_loss) / config.batch_size
 70 | 
 71 |     with strategy.scope():
 72 | 
 73 |         def train_step(image, y_true):
 74 |             with tf.GradientTape() as tape:
 75 |                 y_pred = model(image, training=True)
 76 |                 loss = compute_loss(y_true, y_pred)
 77 |             variables = model.trainable_variables
 78 |             gradients = tape.gradient(loss, variables)
 79 |             optimizer.apply_gradients(zip(gradients, variables))
 80 |             return loss
 81 | 
 82 |     with strategy.scope():
 83 | 
 84 |         @tf.function
 85 |         def distributed_train_step(image, y_true):
 86 |             per_replica_losses = strategy.run(train_step, args=(image, y_true))
 87 |             return strategy.reduce(
 88 |                 tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None
 89 |             )
 90 | 
 91 |     def train_fn():
 92 |         if not os.path.exists("weights"):
 93 |             os.makedirs("weights")
 94 |         pb = tf.keras.utils.Progbar(steps, stateful_metrics=["loss"])
 95 |         print(f"[INFO] {len(file_names)} data points")
 96 | 
 97 |         # Start MLflow run
 98 |         with mlflow.start_run():
 99 |             mlflow.log_param("batch_size", config.batch_size)
100 |             mlflow.log_param("num_epochs", config.num_epochs)
101 | 
102 |             for step, inputs in enumerate(dataset):
103 |                 if step % steps == 0:
104 |                     print(f"Epoch {step // steps + 1}/{config.num_epochs}")
105 |                     pb = tf.keras.utils.Progbar(steps, stateful_metrics=["loss"])
106 |                 step += 1
107 |                 image, y_true_1, y_true_2, y_true_3 = inputs
108 |                 y_true = (y_true_1, y_true_2, y_true_3)
109 |                 loss = distributed_train_step(image, y_true)
110 |                 pb.add(1, [("loss", loss.numpy())])
111 | 
112 |                 # Log loss to MLflow
113 |                 mlflow.log_metric("loss", loss.numpy(), step=step)
114 | 
115 |                 if step % steps == 0:
116 |                     model.save_weights(
117 |                         os.path.join("weights", f"model_{config.version}.h5")
118 |                     )
119 |                     # Log model checkpoint to MLflow
120 |                     mlflow.log_artifact(
121 |                         os.path.join("weights", f"model_{config.version}.h5")
122 |                     )
123 |                 if step // steps == config.num_epochs:
124 |                     mlflow.tensorflow.log_model(model, "model")
125 |                     sys.exit("--- Stop Training ---")
126 | 
127 |     train_fn()
128 | 
129 | 
130 | # Rest of your script remains unchanged
131 | def test():
132 |     def draw_bbox(image, boxes):
133 |         for box in boxes:
134 |             coordinate = np.array(box[:4], dtype=np.int32)
135 |             c1, c2 = (coordinate[0], coordinate[1]), (coordinate[2], coordinate[3])
136 |             cv2.rectangle(image, c1, c2, (255, 0, 0), 1)
137 |         return image
138 | 
139 |     def test_fn():
140 |         if not os.path.exists("results"):
141 |             os.makedirs("results")
142 |         image_path = posixpath.join(config.data_dir, config.image_dir, "valid")
143 |         label_path = posixpath.join(config.data_dir, config.label_dir, "valid")
144 | 
145 |         image_files = [
146 |             os.path.splitext(file_name)[0]
147 |             for file_name in os.listdir(image_path)
148 |             if file_name.lower().endswith(".jpg")
149 |         ]
150 |         label_files = [
151 |             os.path.splitext(file_name)[0]
152 |             for file_name in os.listdir(label_path)
153 |             if file_name.lower().endswith(".txt")
154 |         ]
155 | 
156 |         file_names = list(set(image_files) & set(label_files))
157 | 
158 |         model = nn.build_model(training=False)
159 |         model.load_weights(f"weights/model_{config.version}.h5", True)
160 | 
161 |         for file_name in tqdm.tqdm(file_names):
162 |             image = cv2.imread(
163 |                 posixpath.join(
164 |                     config.data_dir, config.image_dir, "valid", file_name + ".jpg"
165 |                 )
166 |             )
167 |             image_np = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
168 | 
169 |             image_np, scale, dw, dh = image_utils.resize(image_np)
170 |             image_np = image_np.astype(np.float32) / 255.0
171 | 
172 |             boxes, scores, labels = model.predict(image_np[np.newaxis, ...])
173 | 
174 |             boxes, scores, labels = (
175 |                 np.squeeze(boxes, 0),
176 |                 np.squeeze(scores, 0),
177 |                 np.squeeze(labels, 0),
178 |             )
179 | 
180 |             boxes[:, [0, 2]] = (boxes[:, [0, 2]] - dw) / scale
181 |             boxes[:, [1, 3]] = (boxes[:, [1, 3]] - dh) / scale
182 |             image = draw_bbox(image, boxes)
183 |             cv2.imwrite(f"results/{file_name}.jpg", image)
184 | 
185 |     test_fn()
186 | 
187 | 
188 | def write_tf_record(queue, sentinel):
189 |     def byte_feature(value):
190 |         if not isinstance(value, bytes):
191 |             if not isinstance(value, list):
192 |                 value = value.encode("utf-8")
193 |             else:
194 |                 value = [val.encode("utf-8") for val in value]
195 |         if not isinstance(value, list):
196 |             value = [value]
197 |         return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
198 | 
199 |     while True:
200 |         file_name = queue.get()
201 | 
202 |         if file_name == sentinel:
203 |             break
204 |         in_image = image_utils.load_image(file_name)[:, :, ::-1]
205 |         boxes, label = image_utils.load_label(file_name)
206 | 
207 |         in_image, boxes = image_utils.resize(in_image, boxes)
208 | 
209 |         y_true_1, y_true_2, y_true_3 = image_utils.process_box(boxes, label)
210 | 
211 |         in_image = in_image.astype("float32")
212 |         y_true_1 = y_true_1.astype("float32")
213 |         y_true_2 = y_true_2.astype("float32")
214 |         y_true_3 = y_true_3.astype("float32")
215 | 
216 |         in_image = in_image.tobytes()
217 |         y_true_1 = y_true_1.tobytes()
218 |         y_true_2 = y_true_2.tobytes()
219 |         y_true_3 = y_true_3.tobytes()
220 | 
221 |         features = tf.train.Features(
222 |             feature={
223 |                 "in_image": byte_feature(in_image),
224 |                 "y_true_1": byte_feature(y_true_1),
225 |                 "y_true_2": byte_feature(y_true_2),
226 |                 "y_true_3": byte_feature(y_true_3),
227 |             }
228 |         )
229 |         tf_example = tf.train.Example(features=features)
230 |         opt = tf.io.TFRecordOptions("GZIP")
231 |         with tf.io.TFRecordWriter(
232 |             os.path.join(config.data_dir, "TF", file_name + ".tf"), opt
233 |         ) as writer:
234 |             writer.write(tf_example.SerializeToString())
235 | 
236 | 
237 | def generate_tf_record():
238 |     if not os.path.exists(os.path.join(config.data_dir, "TF")):
239 |         os.makedirs(os.path.join(config.data_dir, "TF"))
240 |     file_names = []
241 |     with open(os.path.join(config.data_dir, "train.txt")) as reader:
242 |         for line in reader.readlines():
243 |             file_names.append(line.rstrip().split(" ")[0])
244 |     sentinel = ("", [])
245 |     queue = multiprocessing.Manager().Queue()
246 |     for file_name in tqdm.tqdm(file_names):
247 |         queue.put(file_name)
248 |     for _ in range(os.cpu_count()):
249 |         queue.put(sentinel)
250 |     print("[INFO] generating TF record")
251 |     process_pool = []
252 |     for i in range(os.cpu_count()):
253 |         process = multiprocessing.Process(
254 |             target=write_tf_record, args=(queue, sentinel)
255 |         )
256 |         process_pool.append(process)
257 |         process.start()
258 |     for process in process_pool:
259 |         process.join()
260 | 
261 | 
262 | class AnchorGenerator:
263 |     def __init__(self, num_cluster):
264 |         self.num_cluster = num_cluster
265 | 
266 |     def iou(self, boxes, clusters):  # 1 box -> k clusters
267 |         n = boxes.shape[0]
268 |         k = self.num_cluster
269 | 
270 |         box_area = boxes[:, 0] * boxes[:, 1]
271 |         box_area = box_area.repeat(k)
272 |         box_area = np.reshape(box_area, (n, k))
273 | 
274 |         cluster_area = clusters[:, 0] * clusters[:, 1]
275 |         cluster_area = np.tile(cluster_area, [1, n])
276 |         cluster_area = np.reshape(cluster_area, (n, k))
277 | 
278 |         box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))
279 |         cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))
280 |         min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
281 | 
282 |         box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
283 |         cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
284 |         min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
285 |         inter_area = np.multiply(min_w_matrix, min_h_matrix)
286 | 
287 |         return inter_area / (box_area + cluster_area - inter_area)
288 | 
289 |     def avg_iou(self, boxes, clusters):
290 |         accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
291 |         return accuracy
292 | 
293 |     def generator(self, boxes, k, dist=np.median):
294 |         box_number = boxes.shape[0]
295 |         last_nearest = np.zeros((box_number,))
296 |         clusters = boxes[
297 |             np.random.choice(box_number, k, replace=False)
298 |         ]  # init k clusters
299 |         while True:
300 |             distances = 1 - self.iou(boxes, clusters)
301 | 
302 |             current_nearest = np.argmin(distances, axis=1)
303 |             if (last_nearest == current_nearest).all():
304 |                 break  # clusters won't change
305 |             for cluster in range(k):
306 |                 clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0)
307 |             last_nearest = current_nearest
308 | 
309 |         return clusters
310 | 
311 |     def generate_anchor(self):
312 |         boxes = self.get_boxes()
313 |         result = self.generator(boxes, k=self.num_cluster)
314 |         result = result[np.lexsort(result.T[0, None])]
315 |         print("\nAnchors: \n{}".format(result))
316 |         print("\nFitness: {:.4f}".format(self.avg_iou(boxes, result)))
317 | 
318 |     @staticmethod
319 |     def get_boxes():
320 |         boxes = []
321 |         file_names = [
322 |             file_name[:-4]
323 |             for file_name in os.listdir(
324 |                 posixpath.join(config.data_dir, config.label_dir)
325 |             )
326 |         ]
327 |         for file_name in file_names:
328 |             for box in image_utils.load_label(file_name)[0]:
329 |                 boxes.append([box[2] - box[0], box[3] - box[1]])
330 |         return np.array(boxes)
331 | 
332 | 
333 | if __name__ == "__main__":
334 |     parser = argparse.ArgumentParser()
335 |     parser.add_argument("--anchor", action="store_true")
336 |     parser.add_argument("--record", action="store_true")
337 |     parser.add_argument("--train", action="store_true")
338 |     parser.add_argument("--test", action="store_true")
339 | 
340 |     args = parser.parse_args()
341 |     if args.anchor:
342 |         AnchorGenerator(9).generate_anchor()
343 |     if args.record:
344 |         generate_tf_record()
345 |     if args.train:
346 |         train()
347 |     if args.test:
348 |         test()
349 | 


--------------------------------------------------------------------------------
/distributed_training/nets/nn.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import tensorflow as tf
  4 | from tensorflow.keras import backend
  5 | from tensorflow.keras import layers
  6 | 
  7 | from utils import config
  8 | 
  9 | initializer = tf.random_normal_initializer(stddev=0.01)
 10 | l2 = tf.keras.regularizers.l2(4e-5)
 11 | 
 12 | 
 13 | def conv(x, filters, k=1, s=1):
 14 |     if s == 2:
 15 |         x = layers.ZeroPadding2D(((1, 0), (1, 0)))(x)
 16 |         padding = "valid"
 17 |     else:
 18 |         padding = "same"
 19 |     x = layers.Conv2D(
 20 |         filters,
 21 |         k,
 22 |         s,
 23 |         padding,
 24 |         use_bias=False,
 25 |         kernel_initializer=initializer,
 26 |         kernel_regularizer=l2,
 27 |     )(x)
 28 |     x = layers.BatchNormalization(momentum=0.03)(x)
 29 |     x = layers.Activation(tf.nn.swish)(x)
 30 |     return x
 31 | 
 32 | 
 33 | def residual(x, filters, add=True):
 34 |     inputs = x
 35 |     if add:
 36 |         x = conv(x, filters, 1)
 37 |         x = conv(x, filters, 3)
 38 |         x = inputs + x
 39 |     else:
 40 |         x = conv(x, filters, 1)
 41 |         x = conv(x, filters, 3)
 42 |     return x
 43 | 
 44 | 
 45 | def csp(x, filters, n, add=True):
 46 |     y = conv(x, filters // 2)
 47 |     for _ in range(n):
 48 |         y = residual(y, filters // 2, add)
 49 | 
 50 |     x = conv(x, filters // 2)
 51 |     x = layers.concatenate([x, y])
 52 | 
 53 |     x = conv(x, filters)
 54 |     return x
 55 | 
 56 | 
 57 | def build_model(training=True):
 58 |     depth = config.depth[config.versions.index(config.version)]
 59 |     width = config.width[config.versions.index(config.version)]
 60 | 
 61 |     inputs = layers.Input([config.image_size, config.image_size, 3])
 62 |     x = tf.nn.space_to_depth(inputs, 2)
 63 |     x = conv(x, int(round(width * 64)), 3)
 64 |     x = conv(x, int(round(width * 128)), 3, 2)
 65 |     x = csp(x, int(round(width * 128)), int(round(depth * 3)))
 66 | 
 67 |     x = conv(x, int(round(width * 256)), 3, 2)
 68 |     x = csp(x, int(round(width * 256)), int(round(depth * 9)))
 69 |     x1 = x
 70 | 
 71 |     x = conv(x, int(round(width * 512)), 3, 2)
 72 |     x = csp(x, int(round(width * 512)), int(round(depth * 9)))
 73 |     x2 = x
 74 | 
 75 |     x = conv(x, int(round(width * 1024)), 3, 2)
 76 |     x = conv(x, int(round(width * 512)), 1, 1)
 77 |     x = layers.concatenate(
 78 |         [
 79 |             x,
 80 |             tf.nn.max_pool(x, 5, 1, "SAME"),
 81 |             tf.nn.max_pool(x, 9, 1, "SAME"),
 82 |             tf.nn.max_pool(x, 13, 1, "SAME"),
 83 |         ]
 84 |     )
 85 |     x = conv(x, int(round(width * 1024)), 1, 1)
 86 |     x = csp(x, int(round(width * 1024)), int(round(depth * 3)), False)
 87 | 
 88 |     x = conv(x, int(round(width * 512)), 1)
 89 |     x3 = x
 90 |     x = layers.UpSampling2D()(x)
 91 |     x = layers.concatenate([x, x2])
 92 |     x = csp(x, int(round(width * 512)), int(round(depth * 3)), False)
 93 | 
 94 |     x = conv(x, int(round(width * 256)), 1)
 95 |     x4 = x
 96 |     x = layers.UpSampling2D()(x)
 97 |     x = layers.concatenate([x, x1])
 98 |     x = csp(x, int(round(width * 256)), int(round(depth * 3)), False)
 99 |     p3 = layers.Conv2D(
100 |         3 * (len(config.class_dict) + 5),
101 |         1,
102 |         name=f"p3_{len(config.class_dict)}",
103 |         kernel_initializer=initializer,
104 |         kernel_regularizer=l2,
105 |     )(x)
106 | 
107 |     x = conv(x, int(round(width * 256)), 3, 2)
108 |     x = layers.concatenate([x, x4])
109 |     x = csp(x, int(round(width * 512)), int(round(depth * 3)), False)
110 |     p4 = layers.Conv2D(
111 |         3 * (len(config.class_dict) + 5),
112 |         1,
113 |         name=f"p4_{len(config.class_dict)}",
114 |         kernel_initializer=initializer,
115 |         kernel_regularizer=l2,
116 |     )(x)
117 | 
118 |     x = conv(x, int(round(width * 512)), 3, 2)
119 |     x = layers.concatenate([x, x3])
120 |     x = csp(x, int(round(width * 1024)), int(round(depth * 3)), False)
121 |     p5 = layers.Conv2D(
122 |         3 * (len(config.class_dict) + 5),
123 |         1,
124 |         name=f"p5_{len(config.class_dict)}",
125 |         kernel_initializer=initializer,
126 |         kernel_regularizer=l2,
127 |     )(x)
128 | 
129 |     if training:
130 |         return tf.keras.Model(inputs, [p5, p4, p3])
131 |     else:
132 |         return tf.keras.Model(inputs, Predict()([p5, p4, p3]))
133 | 
134 | 
135 | def process_layer(feature_map, anchors):
136 |     grid_size = tf.shape(feature_map)[1:3]
137 |     ratio = tf.cast(
138 |         tf.constant([config.image_size, config.image_size]) / grid_size, tf.float32
139 |     )
140 |     rescaled_anchors = [
141 |         (anchor[0] / ratio[1], anchor[1] / ratio[0]) for anchor in anchors
142 |     ]
143 | 
144 |     feature_map = tf.reshape(
145 |         feature_map, [-1, grid_size[0], grid_size[1], 3, 5 + len(config.class_dict)]
146 |     )
147 | 
148 |     box_centers, box_sizes, conf, prob = tf.split(
149 |         feature_map, [2, 2, 1, len(config.class_dict)], axis=-1
150 |     )
151 |     box_centers = tf.nn.sigmoid(box_centers)
152 | 
153 |     grid_x = tf.range(grid_size[1], dtype=tf.int32)
154 |     grid_y = tf.range(grid_size[0], dtype=tf.int32)
155 |     grid_x, grid_y = tf.meshgrid(grid_x, grid_y)
156 |     x_offset = tf.reshape(grid_x, (-1, 1))
157 |     y_offset = tf.reshape(grid_y, (-1, 1))
158 |     x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
159 |     x_y_offset = tf.cast(
160 |         tf.reshape(x_y_offset, [grid_size[0], grid_size[1], 1, 2]), tf.float32
161 |     )
162 | 
163 |     box_centers = box_centers + x_y_offset
164 |     box_centers = box_centers * ratio[::-1]
165 | 
166 |     box_sizes = tf.exp(box_sizes) * rescaled_anchors
167 |     box_sizes = box_sizes * ratio[::-1]
168 | 
169 |     boxes = tf.concat([box_centers, box_sizes], axis=-1)
170 | 
171 |     return x_y_offset, boxes, conf, prob
172 | 
173 | 
174 | def box_iou(pred_boxes, valid_true_boxes):
175 |     pred_box_xy = pred_boxes[..., 0:2]
176 |     pred_box_wh = pred_boxes[..., 2:4]
177 | 
178 |     pred_box_xy = tf.expand_dims(pred_box_xy, -2)
179 |     pred_box_wh = tf.expand_dims(pred_box_wh, -2)
180 | 
181 |     true_box_xy = valid_true_boxes[:, 0:2]
182 |     true_box_wh = valid_true_boxes[:, 2:4]
183 | 
184 |     intersect_min = tf.maximum(
185 |         pred_box_xy - pred_box_wh / 2.0, true_box_xy - true_box_wh / 2.0
186 |     )
187 |     intersect_max = tf.minimum(
188 |         pred_box_xy + pred_box_wh / 2.0, true_box_xy + true_box_wh / 2.0
189 |     )
190 | 
191 |     intersect_wh = tf.maximum(intersect_max - intersect_min, 0.0)
192 | 
193 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
194 |     pred_box_area = pred_box_wh[..., 0] * pred_box_wh[..., 1]
195 |     true_box_area = true_box_wh[..., 0] * true_box_wh[..., 1]
196 |     true_box_area = tf.expand_dims(true_box_area, axis=0)
197 | 
198 |     return intersect_area / (pred_box_area + true_box_area - intersect_area + 1e-10)
199 | 
200 | 
201 | def compute_nms(args):
202 |     boxes, classification = args
203 | 
204 |     def nms_fn(score, label):
205 |         score_indices = tf.where(backend.greater(score, config.threshold))
206 | 
207 |         filtered_boxes = tf.gather_nd(boxes, score_indices)
208 |         filtered_scores = backend.gather(score, score_indices)[:, 0]
209 | 
210 |         nms_indices = tf.image.non_max_suppression(
211 |             filtered_boxes, filtered_scores, config.max_boxes, 0.1
212 |         )
213 |         score_indices = backend.gather(score_indices, nms_indices)
214 | 
215 |         label = tf.gather_nd(label, score_indices)
216 |         score_indices = backend.stack([score_indices[:, 0], label], axis=1)
217 | 
218 |         return score_indices
219 | 
220 |     all_indices = []
221 |     for c in range(int(classification.shape[1])):
222 |         scores = classification[:, c]
223 |         labels = c * tf.ones((backend.shape(scores)[0],), dtype="int64")
224 |         all_indices.append(nms_fn(scores, labels))
225 |     indices = backend.concatenate(all_indices, axis=0)
226 | 
227 |     scores = tf.gather_nd(classification, indices)
228 |     labels = indices[:, 1]
229 |     scores, top_indices = tf.nn.top_k(
230 |         scores, k=backend.minimum(config.max_boxes, backend.shape(scores)[0])
231 |     )
232 | 
233 |     indices = backend.gather(indices[:, 0], top_indices)
234 |     boxes = backend.gather(boxes, indices)
235 |     labels = backend.gather(labels, top_indices)
236 | 
237 |     pad_size = backend.maximum(0, config.max_boxes - backend.shape(scores)[0])
238 | 
239 |     boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1)
240 |     scores = tf.pad(scores, [[0, pad_size]], constant_values=-1)
241 |     labels = tf.pad(labels, [[0, pad_size]], constant_values=-1)
242 |     labels = backend.cast(labels, "int32")
243 | 
244 |     boxes.set_shape([config.max_boxes, 4])
245 |     scores.set_shape([config.max_boxes])
246 |     labels.set_shape([config.max_boxes])
247 | 
248 |     return [boxes, scores, labels]
249 | 
250 | 
251 | class ComputeLoss(object):
252 |     def __init__(self):
253 |         super().__init__()
254 | 
255 |     @staticmethod
256 |     def compute_loss(y_pred, y_true, anchors):
257 |         grid_size = tf.shape(y_pred)[1:3]
258 |         ratio = tf.cast(
259 |             tf.constant([config.image_size, config.image_size]) / grid_size, tf.float32
260 |         )
261 |         batch_size = tf.cast(tf.shape(y_pred)[0], tf.float32)
262 | 
263 |         x_y_offset, pred_boxes, pred_conf, pred_prob = process_layer(y_pred, anchors)
264 | 
265 |         object_mask = y_true[..., 4:5]
266 | 
267 |         def cond(idx, _):
268 |             return tf.less(idx, tf.cast(batch_size, tf.int32))
269 | 
270 |         def body(idx, mask):
271 |             valid_true_boxes = tf.boolean_mask(
272 |                 y_true[idx, ..., 0:4], tf.cast(object_mask[idx, ..., 0], "bool")
273 |             )
274 |             iou = box_iou(pred_boxes[idx], valid_true_boxes)
275 |             return idx + 1, mask.write(
276 |                 idx, tf.cast(tf.reduce_max(iou, axis=-1) < 0.2, tf.float32)
277 |             )
278 | 
279 |         ignore_mask = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
280 | 
281 |         _, ignore_mask = tf.while_loop(cond=cond, body=body, loop_vars=[0, ignore_mask])
282 |         ignore_mask = ignore_mask.stack()
283 |         ignore_mask = tf.expand_dims(ignore_mask, -1)
284 | 
285 |         true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset
286 |         pred_xy = pred_boxes[..., 0:2] / ratio[::-1] - x_y_offset
287 | 
288 |         true_tw_th = y_true[..., 2:4] / anchors
289 |         pred_tw_th = pred_boxes[..., 2:4] / anchors
290 |         true_tw_th = tf.where(
291 |             tf.equal(true_tw_th, 0), tf.ones_like(true_tw_th), true_tw_th
292 |         )
293 |         pred_tw_th = tf.where(
294 |             tf.equal(pred_tw_th, 0), tf.ones_like(pred_tw_th), pred_tw_th
295 |         )
296 |         true_tw_th = tf.math.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9))
297 |         pred_tw_th = tf.math.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9))
298 | 
299 |         box_loss_scale = y_true[..., 2:3] * y_true[..., 3:4]
300 |         box_loss_scale = 2.0 - box_loss_scale / tf.cast(
301 |             config.image_size**2, tf.float32
302 |         )
303 | 
304 |         xy_loss = tf.reduce_sum(
305 |             tf.square(true_xy - pred_xy) * object_mask * box_loss_scale
306 |         )
307 |         wh_loss = tf.reduce_sum(
308 |             tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale
309 |         )
310 | 
311 |         conf_pos_mask = object_mask
312 |         conf_neg_mask = (1 - object_mask) * ignore_mask
313 |         conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(
314 |             labels=object_mask, logits=pred_conf
315 |         )
316 |         conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(
317 |             labels=object_mask, logits=pred_conf
318 |         )
319 | 
320 |         conf_loss = tf.reduce_sum((conf_loss_pos + conf_loss_neg))
321 | 
322 |         true_conf = y_true[..., 5:]
323 | 
324 |         class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(
325 |             true_conf, pred_prob
326 |         )
327 |         class_loss = tf.reduce_sum(class_loss)
328 | 
329 |         return xy_loss + wh_loss + conf_loss + class_loss
330 | 
331 |     def __call__(self, y_pred, y_true):
332 |         loss = 0.0
333 |         anchor_group = [config.anchors[6:9], config.anchors[3:6], config.anchors[0:3]]
334 | 
335 |         for i in range(len(y_pred)):
336 |             loss += self.compute_loss(y_pred[i], y_true[i], anchor_group[i])
337 |         return loss
338 | 
339 | 
340 | class CosineLR(tf.optimizers.schedules.LearningRateSchedule):
341 |     def __init__(self, steps):
342 |         super().__init__()
343 |         self.lr = 0.008 * config.batch_size / 64
344 |         self.warmup_init = 0.0008
345 |         self.warmup_step = steps
346 |         self.decay_steps = tf.cast(
347 |             (config.num_epochs - 1) * self.warmup_step, tf.float32
348 |         )
349 | 
350 |     def __call__(self, step):
351 |         linear_warmup = (
352 |             tf.cast(step, dtype=tf.float32)
353 |             / self.warmup_step
354 |             * (self.lr - self.warmup_init)
355 |         )
356 |         cosine_lr = (
357 |             0.5
358 |             * self.lr
359 |             * (1 + tf.cos(math.pi * tf.cast(step, tf.float32) / self.decay_steps))
360 |         )
361 |         return tf.where(
362 |             step < self.warmup_step, self.warmup_init + linear_warmup, cosine_lr
363 |         )
364 | 
365 |     def get_config(self):
366 |         pass
367 | 
368 | 
369 | class Predict(layers.Layer):
370 |     def __init__(self):
371 |         super().__init__()
372 | 
373 |     def call(self, inputs, **kwargs):
374 |         y_pred = [
375 |             (inputs[0], config.anchors[6:9]),
376 |             (inputs[1], config.anchors[3:6]),
377 |             (inputs[2], config.anchors[0:3]),
378 |         ]
379 | 
380 |         boxes_list, conf_list, prob_list = [], [], []
381 |         for result in [
382 |             process_layer(feature_map, anchors) for (feature_map, anchors) in y_pred
383 |         ]:
384 |             x_y_offset, box, conf, prob = result
385 |             grid_size = tf.shape(x_y_offset)[:2]
386 |             box = tf.reshape(box, [-1, grid_size[0] * grid_size[1] * 3, 4])
387 |             conf = tf.reshape(conf, [-1, grid_size[0] * grid_size[1] * 3, 1])
388 |             prob = tf.reshape(
389 |                 prob, [-1, grid_size[0] * grid_size[1] * 3, len(config.class_dict)]
390 |             )
391 |             boxes_list.append(box)
392 |             conf_list.append(tf.sigmoid(conf))
393 |             prob_list.append(tf.sigmoid(prob))
394 | 
395 |         boxes = tf.concat(boxes_list, axis=1)
396 |         conf = tf.concat(conf_list, axis=1)
397 |         prob = tf.concat(prob_list, axis=1)
398 | 
399 |         center_x, center_y, w, h = tf.split(boxes, [1, 1, 1, 1], axis=-1)
400 |         x_min = center_x - w / 2
401 |         y_min = center_y - h / 2
402 |         x_max = center_x + w / 2
403 |         y_max = center_y + h / 2
404 | 
405 |         boxes = tf.concat([x_min, y_min, x_max, y_max], axis=-1)
406 | 
407 |         outputs = tf.map_fn(
408 |             fn=compute_nms,
409 |             elems=[boxes, conf * prob],
410 |             dtype=["float32", "float32", "int32"],
411 |             parallel_iterations=100,
412 |         )
413 | 
414 |         return outputs
415 | 
416 |     def compute_output_shape(self, input_shape):
417 |         return [
418 |             (input_shape[0][0], config.max_boxes, 4),
419 |             (input_shape[1][0], config.max_boxes),
420 |             (input_shape[1][0], config.max_boxes),
421 |         ]
422 | 
423 |     def compute_mask(self, inputs, mask=None):
424 |         return (len(inputs) + 1) * [None]
425 | 
426 |     def get_config(self):
427 |         return super().get_config()
428 | 


--------------------------------------------------------------------------------
/distributed_training/test/test.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: nginx
 5 | spec:
 6 |   containers:
 7 |   - name: nginx
 8 |     image: nginx
 9 |     volumeMounts:
10 |       - mountPath: /train
11 |         name: training
12 |   volumes:
13 |     - name: training
14 |       persistentVolumeClaim:
15 |         claimName: mwt-volume
16 | 


--------------------------------------------------------------------------------
/distributed_training/utils/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy
  3 | import posixpath
  4 | 
  5 | width = [0.50, 0.75, 1.0, 1.25]
  6 | depth = [0.33, 0.67, 1.0, 1.33]
  7 | 
  8 | versions = ["s", "m", "l", "x"]
  9 | # data_dir = os.path.join('..', 'Dataset')
 10 | data_dir = posixpath.join(".", "Dataset")
 11 | 
 12 | threshold = 0.3
 13 | max_boxes = 150
 14 | image_dir = "images"
 15 | label_dir = "labels"
 16 | 
 17 | num_epochs = 2
 18 | batch_size = 32
 19 | image_size = 640
 20 | class_dict = {
 21 |     "person": 0,
 22 |     "bicycle": 1,
 23 |     "car": 2,
 24 |     "motorcycle": 3,
 25 |     "airplane": 4,
 26 |     "bus": 5,
 27 |     "train": 6,
 28 |     "truck": 7,
 29 |     "boat": 8,
 30 |     "traffic light": 9,
 31 |     "fire hydrant": 10,
 32 |     "stop sign": 11,
 33 |     "parking meter": 12,
 34 |     "bench": 13,
 35 |     "bird": 14,
 36 |     "cat": 15,
 37 |     "dog": 16,
 38 |     "horse": 17,
 39 |     "sheep": 18,
 40 |     "cow": 19,
 41 |     "elephant": 20,
 42 |     "bear": 21,
 43 |     "zebra": 22,
 44 |     "giraffe": 23,
 45 |     "backpack": 24,
 46 |     "umbrella": 25,
 47 |     "handbag": 26,
 48 |     "tie": 27,
 49 |     "suitcase": 28,
 50 |     "frisbee": 29,
 51 |     "skis": 30,
 52 |     "snowboard": 31,
 53 |     "sports ball": 32,
 54 |     "kite": 33,
 55 |     "baseball bat": 34,
 56 |     "baseball glove": 35,
 57 |     "skateboard": 36,
 58 |     "surfboard": 37,
 59 |     "tennis racket": 38,
 60 |     "bottle": 39,
 61 |     "wine glass": 40,
 62 |     "cup": 41,
 63 |     "fork": 42,
 64 |     "knife": 43,
 65 |     "spoon": 44,
 66 |     "bowl": 45,
 67 |     "banana": 46,
 68 |     "apple": 47,
 69 |     "sandwich": 48,
 70 |     "orange": 49,
 71 |     "broccoli": 50,
 72 |     "carrot": 51,
 73 |     "hot dog": 52,
 74 |     "pizza": 53,
 75 |     "donut": 54,
 76 |     "cake": 55,
 77 |     "chair": 56,
 78 |     "couch": 57,
 79 |     "potted plant": 58,
 80 |     "bed": 59,
 81 |     "dining table": 60,
 82 |     "toilet": 61,
 83 |     "tv": 62,
 84 |     "laptop": 63,
 85 |     "mouse": 64,
 86 |     "remote": 65,
 87 |     "keyboard": 66,
 88 |     "cell phone": 67,
 89 |     "microwave": 68,
 90 |     "oven": 69,
 91 |     "toaster": 70,
 92 |     "sink": 71,
 93 |     "refrigerator": 72,
 94 |     "book": 73,
 95 |     "clock": 74,
 96 |     "vase": 75,
 97 |     "scissors": 76,
 98 |     "teddy bear": 77,
 99 |     "hair drier": 78,
100 |     "toothbrush": 79,
101 | }
102 | 
103 | version = "s"
104 | anchors = numpy.array(
105 |     [
106 |         [8.0, 9.0],
107 |         [16.0, 24.0],
108 |         [28.0, 58.0],
109 |         [41.0, 25.0],
110 |         [58.0, 125.0],
111 |         [71.0, 52.0],
112 |         [129.0, 97.0],
113 |         [163.0, 218.0],
114 |         [384.0, 347.0],
115 |     ],
116 |     numpy.float32,
117 | )
118 | 


--------------------------------------------------------------------------------
/distributed_training/utils/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy
  3 | import tensorflow as tf
  4 | from tensorflow.keras import utils
  5 | import posixpath
  6 | from distributed_trainning.utils import image_utils
  7 | from utils import config
  8 | 
  9 | 
 10 | class Generator(utils.Sequence):
 11 |     def __init__(self, file_names):
 12 |         self.file_names = file_names
 13 | 
 14 |     def __len__(self):
 15 |         return int(numpy.floor(len(self.file_names) / config.batch_size))
 16 | 
 17 |     def __getitem__(self, index):
 18 |         image = image_utils.load_image(self.file_names[index])
 19 |         boxes = image_utils.load_label(self.file_names[index])
 20 |         image, boxes = image_utils.resize(image, boxes)
 21 |         # image, boxes = util.random_flip(image, boxes)
 22 | 
 23 |         image = image[:, :, ::-1].astype(numpy.float32)
 24 |         image = image / 255.0
 25 |         y_true_1, y_true_2, y_true_3 = image_utils.process_box(boxes)
 26 |         return image, y_true_1, y_true_2, y_true_3
 27 | 
 28 |     def on_epoch_end(self):
 29 |         numpy.random.shuffle(self.file_names)
 30 | 
 31 | 
 32 | def input_fn(file_names):
 33 |     def generator_fn():
 34 |         generator = utils.OrderedEnqueuer(Generator(file_names), True)
 35 |         generator.start(workers=min(os.cpu_count(), config.batch_size))
 36 |         while True:
 37 |             image, y_true_1, y_true_2, y_true_3 = generator.get().__next__()
 38 |             yield image, y_true_1, y_true_2, y_true_3
 39 | 
 40 |     output_types = (tf.float32, tf.float32, tf.float32, tf.float32)
 41 |     output_shapes = (
 42 |         (config.image_size, config.image_size, 3),
 43 |         (
 44 |             config.image_size // 32,
 45 |             config.image_size // 32,
 46 |             3,
 47 |             len(config.class_dict) + 5,
 48 |         ),
 49 |         (
 50 |             config.image_size // 16,
 51 |             config.image_size // 16,
 52 |             3,
 53 |             len(config.class_dict) + 5,
 54 |         ),
 55 |         (config.image_size // 8, config.image_size // 8, 3, len(config.class_dict) + 5),
 56 |     )
 57 | 
 58 |     dataset = tf.data.Dataset.from_generator(
 59 |         generator=generator_fn, output_types=output_types, output_shapes=output_shapes
 60 |     )
 61 | 
 62 |     dataset = dataset.repeat(config.num_epochs + 1)
 63 |     dataset = dataset.batch(config.batch_size)
 64 |     dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
 65 |     return dataset
 66 | 
 67 | 
 68 | class DataLoader:
 69 |     def __init__(self):
 70 |         super().__init__()
 71 |         self.description = {
 72 |             "in_image": tf.io.FixedLenFeature([], tf.string),
 73 |             "y_true_1": tf.io.FixedLenFeature([], tf.string),
 74 |             "y_true_2": tf.io.FixedLenFeature([], tf.string),
 75 |             "y_true_3": tf.io.FixedLenFeature([], tf.string),
 76 |         }
 77 | 
 78 |     def parse_data(self, tf_record):
 79 |         features = tf.io.parse_single_example(tf_record, self.description)
 80 | 
 81 |         in_image = tf.io.decode_raw(features["in_image"], tf.float32)
 82 |         in_image = tf.reshape(in_image, (config.image_size, config.image_size, 3))
 83 |         in_image = in_image / 255.0
 84 | 
 85 |         y_true_1 = tf.io.decode_raw(features["y_true_1"], tf.float32)
 86 |         y_true_1 = tf.reshape(
 87 |             y_true_1,
 88 |             (
 89 |                 config.image_size // 32,
 90 |                 config.image_size // 32,
 91 |                 3,
 92 |                 5 + len(config.class_dict),
 93 |             ),
 94 |         )
 95 | 
 96 |         y_true_2 = tf.io.decode_raw(features["y_true_2"], tf.float32)
 97 |         y_true_2 = tf.reshape(
 98 |             y_true_2,
 99 |             (
100 |                 config.image_size // 16,
101 |                 config.image_size // 16,
102 |                 3,
103 |                 5 + len(config.class_dict),
104 |             ),
105 |         )
106 | 
107 |         y_true_3 = tf.io.decode_raw(features["y_true_3"], tf.float32)
108 |         y_true_3 = tf.reshape(
109 |             y_true_3,
110 |             (
111 |                 config.image_size // 8,
112 |                 config.image_size // 8,
113 |                 3,
114 |                 5 + len(config.class_dict),
115 |             ),
116 |         )
117 | 
118 |         return in_image, y_true_1, y_true_2, y_true_3
119 | 
120 |     def input_fn(self, file_names):
121 |         dataset = tf.data.TFRecordDataset(file_names, "GZIP")
122 |         dataset = dataset.map(self.parse_data, os.cpu_count())
123 |         dataset = dataset.repeat(config.num_epochs + 1)
124 |         dataset = dataset.batch(config.batch_size)
125 |         dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
126 |         return dataset
127 | 


--------------------------------------------------------------------------------
/distributed_training/utils/image_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import xml.etree.ElementTree
  3 | import posixpath
  4 | import cv2
  5 | import numpy
  6 | 
  7 | from utils import config
  8 | 
  9 | 
 10 | def load_image(file_name, split="train"):
 11 |     path = posixpath.join(config.data_dir, config.image_dir, split, file_name + ".jpg")
 12 |     image = cv2.imread(path)
 13 |     return image
 14 | 
 15 | 
 16 | def load_label(file_name, split="train"):
 17 |     # Construct the new path for the label file
 18 |     path = posixpath.join(config.data_dir, config.label_dir, split, file_name + ".txt")
 19 | 
 20 |     boxes = []
 21 | 
 22 |     # Read the text file line by line
 23 |     with open(path, "r") as f:
 24 |         for line in f:
 25 |             # Split the line into coordinates
 26 |             _, x_min, y_min, x_max, y_max = line.strip().split()
 27 |             x_min = float(x_min)
 28 |             y_min = float(y_min)
 29 |             x_max = float(x_max)
 30 |             y_max = float(y_max)
 31 | 
 32 |             boxes.append([x_min, y_min, x_max, y_max])
 33 | 
 34 |     boxes = numpy.asarray(boxes, numpy.float32)
 35 |     return boxes
 36 | 
 37 | 
 38 | def resize(image, boxes=None):
 39 |     shape = image.shape[:2]
 40 |     scale = min(config.image_size / shape[1], config.image_size / shape[0])
 41 |     image = cv2.resize(image, (int(scale * shape[1]), int(scale * shape[0])))
 42 | 
 43 |     image_padded = numpy.zeros([config.image_size, config.image_size, 3], numpy.uint8)
 44 | 
 45 |     dw = (config.image_size - int(scale * shape[1])) // 2
 46 |     dh = (config.image_size - int(scale * shape[0])) // 2
 47 | 
 48 |     image_padded[
 49 |         dh : int(scale * shape[0]) + dh, dw : int(scale * shape[1]) + dw, :
 50 |     ] = image.copy()
 51 | 
 52 |     if boxes is None:
 53 |         return image_padded, scale, dw, dh
 54 | 
 55 |     else:
 56 |         boxes[:, [0, 2]] = boxes[:, [0, 2]] * scale + dw
 57 |         boxes[:, [1, 3]] = boxes[:, [1, 3]] * scale + dh
 58 | 
 59 |         return image_padded, boxes
 60 | 
 61 | 
 62 | def random_flip(image, boxes):
 63 |     if numpy.random.uniform() < 0.5:
 64 |         image = cv2.flip(image, 1)
 65 |         boxes[:, 0] = image.shape[1] - boxes[:, 2]
 66 |         boxes[:, 2] = image.shape[1] - boxes[:, 0]
 67 |     return image, boxes
 68 | 
 69 | 
 70 | def process_box(boxes):
 71 |     anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
 72 |     anchors = config.anchors
 73 |     box_centers = (boxes[:, 0:2] + boxes[:, 2:4]) / 2
 74 |     box_size = boxes[:, 2:4] - boxes[:, 0:2]
 75 | 
 76 |     y_true_1 = numpy.zeros(
 77 |         (
 78 |             config.image_size // 32,
 79 |             config.image_size // 32,
 80 |             3,
 81 |             5 + len(config.class_dict),
 82 |         ),
 83 |         numpy.float32,
 84 |     )
 85 |     y_true_2 = numpy.zeros(
 86 |         (
 87 |             config.image_size // 16,
 88 |             config.image_size // 16,
 89 |             3,
 90 |             5 + len(config.class_dict),
 91 |         ),
 92 |         numpy.float32,
 93 |     )
 94 |     y_true_3 = numpy.zeros(
 95 |         (config.image_size // 8, config.image_size // 8, 3, 5 + len(config.class_dict)),
 96 |         numpy.float32,
 97 |     )
 98 | 
 99 |     y_true = [y_true_1, y_true_2, y_true_3]
100 | 
101 |     box_size = numpy.expand_dims(box_size, 1)
102 | 
103 |     min_np = numpy.maximum(-box_size / 2, -anchors / 2)
104 |     max_np = numpy.minimum(box_size / 2, anchors / 2)
105 | 
106 |     whs = max_np - min_np
107 | 
108 |     overlap = whs[:, :, 0] * whs[:, :, 1]
109 |     union = (
110 |         box_size[:, :, 0] * box_size[:, :, 1]
111 |         + anchors[:, 0] * anchors[:, 1]
112 |         - whs[:, :, 0] * whs[:, :, 1]
113 |         + 1e-10
114 |     )
115 | 
116 |     iou = overlap / union
117 |     best_match_idx = numpy.argmax(iou, axis=1)
118 | 
119 |     ratio_dict = {1.0: 8.0, 2.0: 16.0, 3.0: 32.0}
120 |     for i, idx in enumerate(best_match_idx):
121 |         feature_map_group = 2 - idx // 3
122 |         ratio = ratio_dict[numpy.ceil((idx + 1) / 3.0)]
123 |         x = int(numpy.floor(box_centers[i, 0] / ratio))
124 |         y = int(numpy.floor(box_centers[i, 1] / ratio))
125 |         k = anchors_mask[feature_map_group].index(idx)
126 |         # c = labels[i]
127 | 
128 |         y_true[feature_map_group][y, x, k, :2] = box_centers[i]
129 |         y_true[feature_map_group][y, x, k, 2:4] = box_size[i]
130 |         y_true[feature_map_group][y, x, k, 4] = 1.0
131 |         # y_true[feature_map_group][y, x, k, 5 + c] = 1.
132 | 
133 |     return y_true_1, y_true_2, y_true_3
134 | 


--------------------------------------------------------------------------------
/distributed_training/weights/model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/distributed_training/weights/model.h5


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   mlflow:
 5 |     container_name: mlflow 
 6 |     build:
 7 |       context: ./mlflow
 8 |       dockerfile: Dockerfile
 9 |     ports:
10 |       - "5000:5000"
11 |     volumes:
12 |       - mlflow-artifacts:/mlflow
13 |     environment:
14 |       - MLFLOW_TRACKING_URI=http://0.0.0.0:5000
15 |       - MLFLOW_ARTIFACT_ROOT=/mlflow
16 |   
17 |   train-service:
18 |     container_name: train
19 |     build:
20 |       context: ./train
21 |       dockerfile: Dockerfile
22 |     volumes:
23 |       - ./train:/app
24 |     depends_on:
25 |       - mlflow
26 |     environment:
27 |       - MLFLOW_TRACKING_URI=http://mlflow:5000
28 |       
29 | 
30 |   jenkins:
31 |     image: fullstackdatascience/jenkins:lts
32 |     container_name: jenkins 
33 |     restart: unless-stopped
34 |     privileged: true
35 |     user: root
36 |     ports:
37 |       - 8081:8080
38 |       - 50000:50000
39 |     volumes:
40 |       - jenkins_home:/var/jenkins_home
41 |       - /var/run/docker.sock:/var/run/docker.sock
42 | 
43 | 
44 | volumes:
45 |   mlflow-artifacts:
46 |   jenkins_home: 
47 | 


--------------------------------------------------------------------------------
/images/PipelineAllcode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/PipelineAllcode.png


--------------------------------------------------------------------------------
/images/add_credential.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/add_credential.png


--------------------------------------------------------------------------------
/images/add_credential_dockerhub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/add_credential_dockerhub.png


--------------------------------------------------------------------------------
/images/add_token_dockerhub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/add_token_dockerhub.png


--------------------------------------------------------------------------------
/images/architecutre_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/architecutre_overview.png


--------------------------------------------------------------------------------
/images/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/bus.jpg


--------------------------------------------------------------------------------
/images/check_request_github_jenkins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/check_request_github_jenkins.png


--------------------------------------------------------------------------------
/images/connector.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/connector.png


--------------------------------------------------------------------------------
/images/data_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/data_pipeline.png


--------------------------------------------------------------------------------
/images/diagram_pipe.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/diagram_pipe.gif


--------------------------------------------------------------------------------
/images/error_log_pod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/error_log_pod.png


--------------------------------------------------------------------------------
/images/false_modelmesh_deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/false_modelmesh_deploy.png


--------------------------------------------------------------------------------
/images/generate_token_docker_hub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/generate_token_docker_hub.png


--------------------------------------------------------------------------------
/images/get_token_github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/get_token_github.png


--------------------------------------------------------------------------------
/images/github_tokens.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/github_tokens.png


--------------------------------------------------------------------------------
/images/instal_docker_jenkins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/instal_docker_jenkins.png


--------------------------------------------------------------------------------
/images/install_docker_success.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/install_docker_success.png


--------------------------------------------------------------------------------
/images/isvc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/isvc.png


--------------------------------------------------------------------------------
/images/jenkins_container.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/jenkins_container.png


--------------------------------------------------------------------------------
/images/jenkins_portal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/jenkins_portal.png


--------------------------------------------------------------------------------
/images/jenkins_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/jenkins_ui.png


--------------------------------------------------------------------------------
/images/messenger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/messenger.png


--------------------------------------------------------------------------------
/images/minio-credentials.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/minio-credentials.png


--------------------------------------------------------------------------------
/images/mlflow _modelregistry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/mlflow _modelregistry.png


--------------------------------------------------------------------------------
/images/modelmesh-serving-installation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/modelmesh-serving-installation.png


--------------------------------------------------------------------------------
/images/ngrok.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/ngrok.png


--------------------------------------------------------------------------------
/images/ngrok_forwarding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/ngrok_forwarding.png


--------------------------------------------------------------------------------
/images/password_jenkins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/password_jenkins.png


--------------------------------------------------------------------------------
/images/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/result.png


--------------------------------------------------------------------------------
/images/result_connect_jenkins_github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/result_connect_jenkins_github.png


--------------------------------------------------------------------------------
/images/result_push_dockerhub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/result_push_dockerhub.png


--------------------------------------------------------------------------------
/images/result_train_pod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/result_train_pod.png


--------------------------------------------------------------------------------
/images/strategy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/strategy.png


--------------------------------------------------------------------------------
/images/strategy_scope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/strategy_scope.png


--------------------------------------------------------------------------------
/images/structure_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/structure_data.png


--------------------------------------------------------------------------------
/images/structure_training.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/structure_training.png


--------------------------------------------------------------------------------
/images/topic_tab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/topic_tab.png


--------------------------------------------------------------------------------
/images/train_process.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/train_process.png


--------------------------------------------------------------------------------
/images/ui_build_jenkins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/ui_build_jenkins.png


--------------------------------------------------------------------------------
/images/validate_connect_repo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/validate_connect_repo.png


--------------------------------------------------------------------------------
/images/webhook_github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/images/webhook_github.png


--------------------------------------------------------------------------------
/mlflow/Dockerfile:
--------------------------------------------------------------------------------
1 | # Dockerfile.mlflow
2 | FROM python:3.9-slim
3 | 
4 | RUN pip install mlflow
5 | 
6 | CMD ["mlflow", "server", "--host", "0.0.0.0", "--port", "5000"]


--------------------------------------------------------------------------------
/model_repo/yolov8n_car/1/model.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/model_repo/yolov8n_car/1/model.onnx


--------------------------------------------------------------------------------
/model_repo/yolov8n_car/config.pbtxt:
--------------------------------------------------------------------------------
 1 | # Model configuration file (optional)
 2 | # https://github.com/triton-inference-server/tutorials/blob/main/Conceptual_Guide/Part_1-model_deployment/README.md#model-configuration
 3 | name: "yolov8n_car" 
 4 | backend: "onnxruntime" # Select the backend to run the model https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton
 5 | max_batch_size : 2 # Max batch size the model can support
 6 | # In most cases, Triton can help to extract `input` and `output`
 7 | # but we should declare it explicitly
 8 | input [
 9 |   {
10 |     name: "images"
11 |     data_type: TYPE_FP32
12 |     dims: [ 3, 640, 640 ] # If no batch, pls use [ 1, 640, 640 ]
13 |   }
14 | ]
15 | output [
16 |   {
17 |     name: "output0"
18 |     data_type: TYPE_FP32
19 |     dims: [ -1, -1 ] # If no batch, pls use [ 84, 8400 ]
20 |   }
21 | ]
22 | 
23 | instance_group [ { kind: KIND_CPU } ]


--------------------------------------------------------------------------------
/notebooks/debug.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import xml.etree.ElementTree\n",
 11 |     "# import cv2\n",
 12 |     "import numpy\n",
 13 |     "from utils import config\n",
 14 |     "import posixpath"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "def load_image(file_name):\n",
 24 |     "    path = posixpath.join(config.data_dir, config.image_dir, file_name + '.jpg')\n",
 25 |     "    image = cv2.imread(path)\n",
 26 |     "    return image\n",
 27 |     "\n",
 28 |     "\n",
 29 |     "def load_label(file_name, split='train'):\n",
 30 |     "    # Construct the new path for the label file\n",
 31 |     "    path = posixpath.join(config.data_dir, config.label_dir, split, file_name + '.txt')\n",
 32 |     "    \n",
 33 |     "    boxes = []\n",
 34 |     "    \n",
 35 |     "    # Read the text file line by line\n",
 36 |     "    with open(path, 'r') as f:\n",
 37 |     "        for line in f:\n",
 38 |     "            # Split the line into coordinates\n",
 39 |     "            _, x_min, y_min, x_max, y_max = line.strip().split()\n",
 40 |     "            x_min = float(x_min)\n",
 41 |     "            y_min = float(y_min)\n",
 42 |     "            x_max = float(x_max)\n",
 43 |     "            y_max = float(y_max)\n",
 44 |     "\n",
 45 |     "            boxes.append([x_min, y_min, x_max, y_max])\n",
 46 |     "   \n",
 47 |     "    boxes = numpy.asarray(boxes, numpy.float32)    \n",
 48 |     "    return boxes"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 3,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "name": "stdout",
 58 |      "output_type": "stream",
 59 |      "text": [
 60 |       "0.562590448668639 0.6324806949999999 0.38422575976331363 0.2265122263157895\n",
 61 |       "0.9337916063609467 0.564913127368421 0.13241678727810652 0.17631917631578944\n"
 62 |      ]
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "path = \"Dataset/labels/valid/vid_4_700.txt\"\n",
 67 |     "with open(path, 'r') as f:\n",
 68 |     "    for line in f:\n",
 69 |     "        # print(line)\n",
 70 |     "        _, x_min, y_min, x_max, y_max = line.strip().split()\n",
 71 |     "        print(x_min, y_min, x_max, y_max)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "# file_names = []\n",
 81 |     "# with open(os.path.join(config.data_dir, 'train.txt')) as f:\n",
 82 |     "#     for file_name in f.readlines():\n",
 83 |     "#         image_path = os.path.join(config.data_dir, config.image_dir, file_name.rstrip() + '.jpg')\n",
 84 |     "#         label_path = os.path.join(config.data_dir, config.label_dir, file_name.rstrip() + '.xml')\n",
 85 |     "#         if os.path.exists(image_path) and os.path.exists(label_path):\n",
 86 |     "#             if os.path.exists(os.path.join(config.data_dir, 'TF')):\n",
 87 |     "#                 file_names.append(os.path.join(config.data_dir, 'TF', file_name.rstrip() + '.tf'))\n",
 88 |     "#             else:\n",
 89 |     "#                 file_names.append(file_name.rstrip())\n",
 90 |     "                \n",
 91 |     "# print(file_names)"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 5,
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "data": {
101 |       "text/plain": [
102 |        "(284, 338, 284)"
103 |       ]
104 |      },
105 |      "execution_count": 5,
106 |      "metadata": {},
107 |      "output_type": "execute_result"
108 |     }
109 |    ],
110 |    "source": [
111 |     "\n",
112 |     "image_path = posixpath.join(config.data_dir, config.image_dir, 'train')\n",
113 |     "label_path = posixpath.join(config.data_dir, config.label_dir, 'train')\n",
114 |     "\n",
115 |     "image_files = [os.path.splitext(file_name)[0] for file_name in os.listdir(image_path) if file_name.lower().endswith('.jpg')]\n",
116 |     "label_files = [os.path.splitext(file_name)[0] for file_name in os.listdir(label_path) if file_name.lower().endswith('.txt')]\n",
117 |     "\n",
118 |     "file_names = list(set(image_files) & set(label_files))\n",
119 |     "len(file_names), len(image_files), len(label_files)"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 6,
125 |    "metadata": {},
126 |    "outputs": [
127 |     {
128 |      "data": {
129 |       "text/plain": [
130 |        "<generator object input_fn at 0x000002653F301EE0>"
131 |       ]
132 |      },
133 |      "execution_count": 6,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "from utils import config, util\n",
140 |     "from tensorflow.keras import utils\n",
141 |     "import tensorflow as tf\n",
142 |     "\n",
143 |     "\n",
144 |     "class Generator(utils.Sequence):\n",
145 |     "    def __init__(self, file_names):\n",
146 |     "        self.file_names = file_names\n",
147 |     "\n",
148 |     "    def __len__(self):\n",
149 |     "        return int(numpy.floor(len(self.file_names) / config.batch_size))\n",
150 |     "\n",
151 |     "    def __getitem__(self, index):\n",
152 |     "        image = util.load_image(self.file_names[index])\n",
153 |     "        boxes = util.load_label(self.file_names[index])\n",
154 |     "        image, boxes = util.resize(image, boxes)\n",
155 |     "        # image, boxes = util.random_flip(image, boxes)\n",
156 |     "\n",
157 |     "        image = image[:, :, ::-1].astype(numpy.float32)\n",
158 |     "        image = image / 255.0\n",
159 |     "        y_true_1, y_true_2, y_true_3 = util.process_box(boxes)\n",
160 |     "        return image, y_true_1, y_true_2, y_true_3\n",
161 |     "\n",
162 |     "    def on_epoch_end(self):\n",
163 |     "        numpy.random.shuffle(self.file_names)\n",
164 |     "\n",
165 |     "\n",
166 |     "def input_fn(file_names):\n",
167 |     "    # def generator_fn():\n",
168 |     "        generator = utils.OrderedEnqueuer(Generator(file_names), True)\n",
169 |     "        # generator.start(workers=min(os.cpu_count(), config.batch_size))\n",
170 |     "        while True:\n",
171 |     "            image, y_true_1, y_true_2, y_true_3 = generator.get().__next__()\n",
172 |     "            print(image, y_true_1, y_true_2, y_true_3)\n",
173 |     "            yield image, y_true_1, y_true_2, y_true_3\n",
174 |     "            \n",
175 |     "            \n",
176 |     "input_fn(file_names)"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 7,
182 |    "metadata": {},
183 |    "outputs": [
184 |     {
185 |      "name": "stdout",
186 |      "output_type": "stream",
187 |      "text": [
188 |       "vid_4_6380\n",
189 |       "[[[0. 0. 0.]\n",
190 |       "  [0. 0. 0.]\n",
191 |       "  [0. 0. 0.]\n",
192 |       "  ...\n",
193 |       "  [0. 0. 0.]\n",
194 |       "  [0. 0. 0.]\n",
195 |       "  [0. 0. 0.]]\n",
196 |       "\n",
197 |       " [[0. 0. 0.]\n",
198 |       "  [0. 0. 0.]\n",
199 |       "  [0. 0. 0.]\n",
200 |       "  ...\n",
201 |       "  [0. 0. 0.]\n",
202 |       "  [0. 0. 0.]\n",
203 |       "  [0. 0. 0.]]\n",
204 |       "\n",
205 |       " [[0. 0. 0.]\n",
206 |       "  [0. 0. 0.]\n",
207 |       "  [0. 0. 0.]\n",
208 |       "  ...\n",
209 |       "  [0. 0. 0.]\n",
210 |       "  [0. 0. 0.]\n",
211 |       "  [0. 0. 0.]]\n",
212 |       "\n",
213 |       " ...\n",
214 |       "\n",
215 |       " [[0. 0. 0.]\n",
216 |       "  [0. 0. 0.]\n",
217 |       "  [0. 0. 0.]\n",
218 |       "  ...\n",
219 |       "  [0. 0. 0.]\n",
220 |       "  [0. 0. 0.]\n",
221 |       "  [0. 0. 0.]]\n",
222 |       "\n",
223 |       " [[0. 0. 0.]\n",
224 |       "  [0. 0. 0.]\n",
225 |       "  [0. 0. 0.]\n",
226 |       "  ...\n",
227 |       "  [0. 0. 0.]\n",
228 |       "  [0. 0. 0.]\n",
229 |       "  [0. 0. 0.]]\n",
230 |       "\n",
231 |       " [[0. 0. 0.]\n",
232 |       "  [0. 0. 0.]\n",
233 |       "  [0. 0. 0.]\n",
234 |       "  ...\n",
235 |       "  [0. 0. 0.]\n",
236 |       "  [0. 0. 0.]\n",
237 |       "  [0. 0. 0.]]] [[[[0. 0. 0. ... 0. 0. 0.]\n",
238 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
239 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
240 |       "\n",
241 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
242 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
243 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
244 |       "\n",
245 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
246 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
247 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
248 |       "\n",
249 |       "  ...\n",
250 |       "\n",
251 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
252 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
253 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
254 |       "\n",
255 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
256 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
257 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
258 |       "\n",
259 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
260 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
261 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
262 |       "\n",
263 |       "\n",
264 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
265 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
266 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
267 |       "\n",
268 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
269 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
270 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
271 |       "\n",
272 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
273 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
274 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
275 |       "\n",
276 |       "  ...\n",
277 |       "\n",
278 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
279 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
280 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
281 |       "\n",
282 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
283 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
284 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
285 |       "\n",
286 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
287 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
288 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
289 |       "\n",
290 |       "\n",
291 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
292 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
293 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
294 |       "\n",
295 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
296 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
297 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
298 |       "\n",
299 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
300 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
301 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
302 |       "\n",
303 |       "  ...\n",
304 |       "\n",
305 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
306 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
307 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
308 |       "\n",
309 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
310 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
311 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
312 |       "\n",
313 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
314 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
315 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
316 |       "\n",
317 |       "\n",
318 |       " ...\n",
319 |       "\n",
320 |       "\n",
321 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
322 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
323 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
324 |       "\n",
325 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
326 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
327 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
328 |       "\n",
329 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
330 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
331 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
332 |       "\n",
333 |       "  ...\n",
334 |       "\n",
335 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
336 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
337 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
338 |       "\n",
339 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
340 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
341 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
342 |       "\n",
343 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
344 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
345 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
346 |       "\n",
347 |       "\n",
348 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
349 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
350 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
351 |       "\n",
352 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
353 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
354 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
355 |       "\n",
356 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
357 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
358 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
359 |       "\n",
360 |       "  ...\n",
361 |       "\n",
362 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
363 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
364 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
365 |       "\n",
366 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
367 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
368 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
369 |       "\n",
370 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
371 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
372 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
373 |       "\n",
374 |       "\n",
375 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
376 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
377 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
378 |       "\n",
379 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
380 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
381 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
382 |       "\n",
383 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
384 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
385 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
386 |       "\n",
387 |       "  ...\n",
388 |       "\n",
389 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
390 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
391 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
392 |       "\n",
393 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
394 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
395 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
396 |       "\n",
397 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
398 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
399 |       "   [0. 0. 0. ... 0. 0. 0.]]]] [[[[0. 0. 0. ... 0. 0. 0.]\n",
400 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
401 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
402 |       "\n",
403 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
404 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
405 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
406 |       "\n",
407 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
408 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
409 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
410 |       "\n",
411 |       "  ...\n",
412 |       "\n",
413 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
414 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
415 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
416 |       "\n",
417 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
418 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
419 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
420 |       "\n",
421 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
422 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
423 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
424 |       "\n",
425 |       "\n",
426 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
427 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
428 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
429 |       "\n",
430 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
431 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
432 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
433 |       "\n",
434 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
435 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
436 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
437 |       "\n",
438 |       "  ...\n",
439 |       "\n",
440 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
441 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
442 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
443 |       "\n",
444 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
445 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
446 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
447 |       "\n",
448 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
449 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
450 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
451 |       "\n",
452 |       "\n",
453 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
454 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
455 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
456 |       "\n",
457 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
458 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
459 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
460 |       "\n",
461 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
462 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
463 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
464 |       "\n",
465 |       "  ...\n",
466 |       "\n",
467 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
468 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
469 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
470 |       "\n",
471 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
472 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
473 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
474 |       "\n",
475 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
476 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
477 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
478 |       "\n",
479 |       "\n",
480 |       " ...\n",
481 |       "\n",
482 |       "\n",
483 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
484 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
485 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
486 |       "\n",
487 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
488 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
489 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
490 |       "\n",
491 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
492 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
493 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
494 |       "\n",
495 |       "  ...\n",
496 |       "\n",
497 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
498 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
499 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
500 |       "\n",
501 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
502 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
503 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
504 |       "\n",
505 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
506 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
507 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
508 |       "\n",
509 |       "\n",
510 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
511 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
512 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
513 |       "\n",
514 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
515 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
516 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
517 |       "\n",
518 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
519 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
520 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
521 |       "\n",
522 |       "  ...\n",
523 |       "\n",
524 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
525 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
526 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
527 |       "\n",
528 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
529 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
530 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
531 |       "\n",
532 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
533 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
534 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
535 |       "\n",
536 |       "\n",
537 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
538 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
539 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
540 |       "\n",
541 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
542 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
543 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
544 |       "\n",
545 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
546 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
547 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
548 |       "\n",
549 |       "  ...\n",
550 |       "\n",
551 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
552 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
553 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
554 |       "\n",
555 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
556 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
557 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
558 |       "\n",
559 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
560 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
561 |       "   [0. 0. 0. ... 0. 0. 0.]]]] [[[[0. 0. 0. ... 0. 0. 0.]\n",
562 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
563 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
564 |       "\n",
565 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
566 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
567 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
568 |       "\n",
569 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
570 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
571 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
572 |       "\n",
573 |       "  ...\n",
574 |       "\n",
575 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
576 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
577 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
578 |       "\n",
579 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
580 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
581 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
582 |       "\n",
583 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
584 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
585 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
586 |       "\n",
587 |       "\n",
588 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
589 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
590 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
591 |       "\n",
592 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
593 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
594 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
595 |       "\n",
596 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
597 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
598 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
599 |       "\n",
600 |       "  ...\n",
601 |       "\n",
602 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
603 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
604 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
605 |       "\n",
606 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
607 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
608 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
609 |       "\n",
610 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
611 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
612 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
613 |       "\n",
614 |       "\n",
615 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
616 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
617 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
618 |       "\n",
619 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
620 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
621 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
622 |       "\n",
623 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
624 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
625 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
626 |       "\n",
627 |       "  ...\n",
628 |       "\n",
629 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
630 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
631 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
632 |       "\n",
633 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
634 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
635 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
636 |       "\n",
637 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
638 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
639 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
640 |       "\n",
641 |       "\n",
642 |       " ...\n",
643 |       "\n",
644 |       "\n",
645 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
646 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
647 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
648 |       "\n",
649 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
650 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
651 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
652 |       "\n",
653 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
654 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
655 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
656 |       "\n",
657 |       "  ...\n",
658 |       "\n",
659 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
660 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
661 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
662 |       "\n",
663 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
664 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
665 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
666 |       "\n",
667 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
668 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
669 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
670 |       "\n",
671 |       "\n",
672 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
673 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
674 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
675 |       "\n",
676 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
677 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
678 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
679 |       "\n",
680 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
681 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
682 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
683 |       "\n",
684 |       "  ...\n",
685 |       "\n",
686 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
687 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
688 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
689 |       "\n",
690 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
691 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
692 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
693 |       "\n",
694 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
695 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
696 |       "   [0. 0. 0. ... 0. 0. 0.]]]\n",
697 |       "\n",
698 |       "\n",
699 |       " [[[0. 0. 0. ... 0. 0. 0.]\n",
700 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
701 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
702 |       "\n",
703 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
704 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
705 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
706 |       "\n",
707 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
708 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
709 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
710 |       "\n",
711 |       "  ...\n",
712 |       "\n",
713 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
714 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
715 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
716 |       "\n",
717 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
718 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
719 |       "   [0. 0. 0. ... 0. 0. 0.]]\n",
720 |       "\n",
721 |       "  [[0. 0. 0. ... 0. 0. 0.]\n",
722 |       "   [0. 0. 0. ... 0. 0. 0.]\n",
723 |       "   [0. 0. 0. ... 0. 0. 0.]]]]\n"
724 |      ]
725 |     }
726 |    ],
727 |    "source": [
728 |     "index = 0\n",
729 |     "print(file_names[index])\n",
730 |     "image = util.load_image(file_names[index])\n",
731 |     "boxes = util.load_label(file_names[index])\n",
732 |     "image, boxes = util.resize(image, boxes)\n",
733 |     "# image, boxes = util.random_flip(image, boxes)\n",
734 |     "\n",
735 |     "image = image[:, :, ::-1].astype(numpy.float32)\n",
736 |     "image = image / 255.0\n",
737 |     "y_true_1, y_true_2, y_true_3 = util.process_box(boxes)\n",
738 |     "print(image, y_true_1, y_true_2, y_true_3)\n"
739 |    ]
740 |   },
741 |   {
742 |    "cell_type": "code",
743 |    "execution_count": 8,
744 |    "metadata": {},
745 |    "outputs": [],
746 |    "source": [
747 |     "# # Create an iterator for the dataset\n",
748 |     "# iterator = iter(dataset)\n",
749 |     "\n",
750 |     "# # Get one sample from the dataset\n",
751 |     "# sample = next(iterator)\n",
752 |     "\n",
753 |     "# # Unpack the sample\n",
754 |     "# image, y_true_1, y_true_2, y_true_3 = sample"
755 |    ]
756 |   }
757 |  ],
758 |  "metadata": {
759 |   "kernelspec": {
760 |    "display_name": "Python 3",
761 |    "language": "python",
762 |    "name": "python3"
763 |   },
764 |   "language_info": {
765 |    "codemirror_mode": {
766 |     "name": "ipython",
767 |     "version": 3
768 |    },
769 |    "file_extension": ".py",
770 |    "mimetype": "text/x-python",
771 |    "name": "python",
772 |    "nbconvert_exporter": "python",
773 |    "pygments_lexer": "ipython3",
774 |    "version": "3.10.13"
775 |   }
776 |  },
777 |  "nbformat": 4,
778 |  "nbformat_minor": 2
779 | }
780 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | minio==7.1.17
2 | ultralytics==8.0.202
3 | onnx==1.15.0
4 | tritonclient[http]==2.39.0
5 | onnxsim==0.4.35
6 | onnxruntime-gpu==1.16.1
7 | tensorrt==8.6.1
8 | cuda-python==12.3.0
9 | 


--------------------------------------------------------------------------------
/streaming/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8-slim
 2 | 
 3 | # Copy app handler code
 4 | 
 5 | # Install dependencies
 6 | RUN pip3 install kafka-python==2.0.2
 7 | RUN pip3 install avro==1.11.1
 8 | RUN pip3 install pandas==1.5.1
 9 | RUN pip3 install pyarrow==10.0.1
10 | RUN pip3 install python-schema-registry-client==2.4.1
11 | RUN pip3 install pymongo==4.5.0
12 | 
13 | # Copy app handler code
14 | COPY produce.py produce.py
15 | # COPY kafka_producer/generate_schemas.py generate_schemas.py
16 | # COPY run.sh .
17 | # # Uncomment this to generate a random schema
18 | # RUN chmod +x /run.sh && ./run.sh generate_schemas
19 | 
20 | CMD [ "python", "-u", "produce.py", "--mode", "setup", "--bootstrap_servers", "broker:29092"]


--------------------------------------------------------------------------------
/streaming/README.md:
--------------------------------------------------------------------------------
 1 | # Data Pipeline Guide
 2 | ## Table of Contents
 3 | 1. [Introduction](#introduction)
 4 | 2. [Dataset Setup](#dataset-setup)
 5 |    - [Downloading the Dataset](#downloading-the-dataset)
 6 |    - [Folder Structure](#folder-structure)
 7 | 3. [Kafka and Flink Setup](#kafka-and-flink-setup)
 8 |    - [Starting Docker Compose](#starting-docker-compose)
 9 |    - [Accessing Kafka Control Center](#accessing-kafka-control-center)
10 | 4. [Viewing Kafka Topics](#viewing-kafka-topics)
11 |    - [Accessing the Topics Tab](#accessing-the-topics-tab)
12 |    - [Viewing Topic Messages](#viewing-topic-messages)
13 | 5. [Adding Kafka Connector](#adding-kafka-connector)
14 | 6. [Verifying Data in PostgreSQL](#verifying-data-in-postgresql)
15 | 7. [Note](#note)
16 | 
17 | ---
18 | ![DataPipeline](images/data-pipeline.png)
19 | ## Introduction
20 | In this step, we focus on the real-time data processing component of our pipeline. The goal is to simulate and process streaming data to enhance the robustness of our data pipeline. 
21 | 
22 | 👉 We achieve this by leveraging **Kafka** for stream ingestion and **Apache Flink** for stream processing.
23 | 
24 | **Note:** In this project, Kafka acts as the backbone for stream data ingestion, handling both real and simulated (fake) stream data. **Apache Flink** processes this data in real-time, ensuring that the processed data is available in our **Redis** online store. Redis, in turn, is synced with our **PostgreSQL** offline store, providing a unified data storage solution.
25 | 
26 | Key steps in this process include:
27 | 
28 | 1. **Kafka Producer Setup**: The Kafka producer service is responsible for continuously sending data streams. You can customize the message format, bind data to messages, and specify the Kafka topic for message distribution.
29 | 
30 | 2. **Stream Processing with Apache Flink**: Flink processes the incoming data streams, transforming them into a format suitable for storage in Redis. This ensures that data is available for immediate use in both real-time and batch processing scenarios.
31 | 
32 | 3. **Data Syncing**: Redis, serving as the online store, is synced with PostgreSQL to maintain consistency between real-time and offline data.
33 | 
34 | For more details on setting up the Kafka producer and configuring Flink for stream processing, refer to the [Confluent PostgreSQL Sink Guide](https://docs.confluent.io/cloud/current/connectors/cc-postgresql-sink.html#step-6-check-the-results-in-postgresql).
35 | 
36 | 
37 | # Dataset Setup
38 | 
39 | ## Downloading the Dataset:
40 | To begin, download the dataset required for streaming from the following link: [Dataset Link](https://drive.google.com/drive/folders/12ncEAoWT_kwuPT8YRdFysqgS54XJwre7?usp=drive_link)
41 | 
42 | ## Folder Structure
43 | The structure of the folder will be like this:
44 | 
45 | <div align="center">
46 |   <img src="https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/structure_data.png" alt="Structure Data Folder">
47 | </div>
48 | 
49 | # Kafka Flink Setup
50 | 
51 | ## Starting Docker Compose
52 | If you haven't already done so in previous steps, start the Docker Compose setup to launch the necessary services.
53 | 
54 | ## Accessing Kafka Control Center
55 | Once Docker Compose is running, you can access the Kafka Control Center by navigating to `https://localhost:9021`. This interface allows you to manage and monitor your Kafka cluster.
56 | 
57 | # Viewing Kafka Topics
58 | 
59 | ## Accessing the Topics Tab
60 | To view the available Kafka topics, click on the `Topics` tab within the Kafka Control Center. You can follow the steps outlined in the image below:
61 | 
62 | ![Topic Tab](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/topic_tab.png)
63 | 
64 | ## Viewing Topic Messages
65 | Select a specific topic (e.g., `image 0`) to view the messages being transmitted:
66 | 
67 | ![Message](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/messenger.png)
68 | 
69 | # Adding Kafka Connector
70 | To ensure that messages are forwarded to PostgreSQL, you will need to add a Kafka connector. An example configuration file, `connect-timescaledb-sink.json`, is provided in this repository for your reference:
71 | 
72 | ![Connector](https://github.com/HungNguyenDev1511/Car-detection-serving-model/blob/refactor/images/connector.png)
73 | 
74 | # Verifying Data in PostgreSQL
75 | Finally, after setting up the Kafka connector, verify that the data has been successfully transferred to PostgreSQL. You can do this by querying the database using SQL to confirm that the data is correctly stored and ready for training.
76 | 
77 | # Note
78 | Before proceeding to data verification, ensure that the Kafka connector is properly configured and operational. This step is crucial for the successful transmission of data from Kafka to PostgreSQL.
79 | 


--------------------------------------------------------------------------------
/streaming/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | services:
  2 |   zookeeper:
  3 |     image: confluentinc/cp-zookeeper:7.5.0
  4 |     # hostname: zookeeper
  5 |     container_name: flink-zookeeper
  6 |     ports:
  7 |       - "2181:2181"
  8 |     healthcheck:
  9 |       test: echo srvr | nc zookeeper 2181 || exit 1
 10 |       start_period: 10s
 11 |       retries: 20
 12 |       interval: 10s
 13 |     environment:
 14 |       ZOOKEEPER_CLIENT_PORT: 2181
 15 |       ZOOKEEPER_TICK_TIME: 2000
 16 | 
 17 |   # Kafka broker
 18 |   broker:
 19 |     image: confluentinc/cp-server:7.5.0
 20 |     # hostname: broker
 21 |     container_name: flink-broker
 22 |     depends_on:
 23 |       - zookeeper
 24 |     ports:
 25 |       - "9092:9092"
 26 |       - "9101:9101"
 27 |     healthcheck:
 28 |       test: nc -z localhost 9092 || exit -1
 29 |       start_period: 15s
 30 |       interval: 5s
 31 |       timeout: 10s
 32 |       retries: 10
 33 |     environment:
 34 |       KAFKA_BROKER_ID: 1
 35 |       KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
 36 |       KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
 37 |       KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
 38 |       KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
 39 | 
 40 |   # For managing Avro schemas
 41 |   schema-registry:
 42 |     image: confluentinc/cp-schema-registry:7.5.0
 43 |     # hostname: schema-registry
 44 |     container_name: flink-schema-registry
 45 |     depends_on:
 46 |       - broker
 47 |     ports:
 48 |       - "8081:8081"
 49 |     healthcheck:
 50 |       start_period: 10s
 51 |       interval: 10s
 52 |       retries: 20
 53 |       test: curl --user superUser:superUser --fail --silent --insecure http://localhost:8081/subjects --output /dev/null || exit 1
 54 |     environment:
 55 |       SCHEMA_REGISTRY_HOST_NAME: schema-registry
 56 |       SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:29092'
 57 |       SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
 58 | 
 59 |   # For connecting to offline store
 60 |   connect:
 61 |     image: confluentinc/cp-kafka-connect:7.5.0
 62 |     # hostname: connect
 63 |     container_name: flink-connect
 64 |     depends_on:
 65 |       broker:
 66 |         condition: service_healthy
 67 |       schema-registry:
 68 |         condition: service_healthy
 69 |       zookeeper:
 70 |         condition: service_healthy
 71 |     ports:
 72 |       - "8083:8083"
 73 |     environment:
 74 |       CONNECT_BOOTSTRAP_SERVERS: 'broker:29092'
 75 |       CONNECT_REST_ADVERTISED_HOST_NAME: connect
 76 |       CONNECT_REST_PORT: 8083
 77 |       CONNECT_GROUP_ID: compose-connect-group
 78 |       CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs
 79 |       CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1
 80 |       CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000
 81 |       CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets
 82 |       CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1
 83 |       CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status
 84 |       CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1
 85 |       CONNECT_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter
 86 |       CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter
 87 |       CONNECT_KEY_CONVERTER_SCHEMAS_ENABLE: true
 88 |       CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: true
 89 |       CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
 90 |       CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
 91 |       CONNECT_PLUGIN_PATH: '/usr/share/java,/etc/kafka-connect/jars'
 92 | 
 93 |     volumes:
 94 |       - /Car-detection-serving-model/streaming/kafka_connector/jars
 95 | 
 96 |   # Confluent control center to manage Kafka
 97 |   control-center:
 98 |     image: confluentinc/cp-enterprise-control-center:7.5.0
 99 |     # hostname: control-center
100 |     container_name: flink-control-center
101 |     depends_on:
102 |       - broker
103 |       - schema-registry
104 |       - connect
105 |     ports:
106 |       - "9021:9021"
107 |     healthcheck:
108 |       test: ["CMD", "curl", "-f", "http://localhost:9021/healthcheck"] # Adjust the URL and options as needed
109 |       interval: 30s
110 |       timeout: 10s
111 |       retries: 3
112 |     environment:
113 |       CONTROL_CENTER_BOOTSTRAP_SERVERS: 'broker:29092'
114 |       CONTROL_CENTER_CONNECT_CONNECT-DEFAULT_CLUSTER: 'connect:8083'
115 |       # CONTROL_CENTER_KSQL_KSQLDB1_URL: "http://ksqldb-server:8088"
116 |       # CONTROL_CENTER_KSQL_KSQLDB1_ADVERTISED_URL: "http://localhost:8088"
117 |       CONTROL_CENTER_SCHEMA_REGISTRY_URL: "http://schema-registry:8081"
118 |       CONTROL_CENTER_REPLICATION_FACTOR: 1
119 |       CONTROL_CENTER_INTERNAL_TOPICS_PARTITIONS: 1
120 |       # CONTROL_CENTER_MONITORING_INTERCEPTOR_TOPIC_PARTITIONS: 1
121 |       CONTROL_CENTER_CONNECT_HEALTHCHECK_ENDPOINT: '/connectors'
122 |       CONFLUENT_METRICS_TOPIC_REPLICATION: 1
123 |       # PORT: 9021
124 | 
125 |   # Offline store
126 |   timescaledb:
127 |     image: timescale/timescaledb:latest-pg13
128 |     command: postgres -c shared_preload_libraries=timescaledb
129 |     container_name: flink-timescaledb
130 |     ports:
131 |       - "5432:5432"
132 |     healthcheck:
133 |       test: ['CMD', 'psql', '-U', 'k6', '-c', 'SELECT 1']
134 |       interval: 10s
135 |       timeout: 5s
136 |       retries: 5
137 |     environment:
138 |       - PGDATA=/var/lib/postgresql/data/timescaledb
139 |       - POSTGRES_DB=k6
140 |       - POSTGRES_USER=k6
141 |       - POSTGRES_PASSWORD=k6
142 | 
143 |   # Simulation of sending messages to Kafka topics
144 |   kafka_producer:
145 |     build:
146 |       context: .
147 |       dockerfile: /Car-detection-serving-model/streaming/Dockerfile
148 |     depends_on:
149 |       broker:
150 |         condition: service_healthy
151 |       timescaledb:
152 |         condition: service_healthy
153 |     container_name: flink-kafka-producer
154 |     volumes:
155 |       - /Car-detection-serving-model/streaming/Dataset/images/train:/images
156 | 


--------------------------------------------------------------------------------
/streaming/images/data-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HungNguyenDev1511/Car-detection-serving-model/493b25a3784ec8357420ca43d2e5029376c21dae/streaming/images/data-pipeline.png


--------------------------------------------------------------------------------
/streaming/kafka_connector/connect-timescaledb-sink.json :
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "images-sink",
 3 |     "config": {
 4 |         "connector.class": "io.confluent.connect.jdbc.JdbcSinkConnector",
 5 |         "tasks.max": "1",
 6 |         "topics": "sink_images_0",
 7 |         "connection.url": "jdbc:postgresql://host.docker.internal:5432/k6",
 8 |         "connection.user": "k6",
 9 |         "connection.password": "k6",
10 |         "auto.create": true
11 |     }
12 | }


--------------------------------------------------------------------------------
/streaming/produce.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import io
  3 | import json
  4 | import os
  5 | from datetime import datetime
  6 | from time import sleep
  7 | import random
  8 | 
  9 | import numpy as np
 10 | from bson import json_util
 11 | from kafka import KafkaAdminClient, KafkaProducer
 12 | from kafka.admin import NewTopic
 13 | 
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument(
 16 |     "-m",
 17 |     "--mode",
 18 |     default="setup",
 19 |     choices=["setup", "teardown"],
 20 |     help="Whether to setup or teardown a Kafka topic with driver stats events. Setup will teardown before beginning emitting events.",
 21 | )
 22 | parser.add_argument(
 23 |     "-b",
 24 |     "--bootstrap_servers",
 25 |     default="localhost:9092",
 26 |     help="Where the bootstrap server is",
 27 | )
 28 | parser.add_argument(
 29 |     "-c",
 30 |     "--schemas_path",
 31 |     default="./avro_schemas",
 32 |     help="Folder containing all generated avro schemas",
 33 | )
 34 | parser.add_argument(
 35 |     "-i",
 36 |     "--image_dir",
 37 |     default="./images",
 38 |     help="Directory containing the images to send",
 39 | )
 40 | 
 41 | args = parser.parse_args()
 42 | 
 43 | image_id_counter = 1  
 44 | 
 45 | def create_topic(admin, topic_name):
 46 |     # Create topic if not exists
 47 |     try:
 48 |         # Create Kafka topic
 49 |         topic = NewTopic(name=topic_name, num_partitions=1, replication_factor=1)
 50 |         admin.create_topics([topic])
 51 |         print(f"A new topic {topic_name} has been created!")
 52 |     except Exception:
 53 |         print(f"Topic {topic_name} already exists. Skipping creation!")
 54 |         pass
 55 | 
 56 | 
 57 | def create_streams(servers, schemas_path, image_dir):
 58 |     producer = None
 59 |     admin = None
 60 |     for _ in range(10):
 61 |         try:
 62 |             producer = KafkaProducer(bootstrap_servers=servers)
 63 |             admin = KafkaAdminClient(bootstrap_servers=servers)
 64 |             print("SUCCESS: instantiated Kafka admin and producer")
 65 |             break
 66 |         except Exception as e:
 67 |             print(
 68 |                 f"Trying to instantiate admin and producer with bootstrap servers {servers} with error {e}"
 69 |             )
 70 |             sleep(10)
 71 |             pass
 72 | 
 73 |     image_files = [
 74 |         os.path.join(image_dir, f)
 75 |         for f in os.listdir(image_dir)
 76 |         if os.path.isfile(os.path.join(image_dir, f))
 77 |     ]
 78 |     image_index = 0
 79 | 
 80 |     while True:
 81 |         image_file = image_files[image_index]
 82 |         image_index = (image_index + 1) % len(image_files)
 83 | 
 84 |         with open(image_file, "rb") as img_file:
 85 |             image_data = img_file.read()
 86 | 
 87 |         record = {
 88 |             "schema": {
 89 |                 "type": "struct",
 90 |                 "fields": [
 91 |                     {"type": "int64", "optional": False, "field": "image_id"},
 92 |                     {"type": "bytes", "optional": False, "field": "image_data"},
 93 |                 ],
 94 |             }
 95 |         }
 96 |         record["payload"] = {}
 97 | 
 98 |         record["payload"]["image_id"] = image_id_counter 
 99 |         image_id_counter += 1 # tanc chi so image id
100 |         record["payload"]["image_data"] = image_data
101 | 
102 | 
103 |         # Get topic name for this image
104 |         topic_name = f"image_0"
105 | 
106 |         # Create a new topic for this image if not exists
107 |         create_topic(admin, topic_name=topic_name)
108 | 
109 |         # Send messages to this topic
110 |         producer.send(
111 |             topic_name, json.dumps(record, default=json_util.default).encode("utf-8")
112 |         )
113 |         print(record)
114 |         sleep(2)
115 | 
116 | 
117 | def teardown_stream(topic_name, servers=["localhost:9092"]):
118 |     try:
119 |         admin = KafkaAdminClient(bootstrap_servers=servers)
120 |         print(admin.delete_topics([topic_name]))
121 |         print(f"Topic {topic_name} deleted")
122 |     except Exception as e:
123 |         print(str(e))
124 |         pass
125 | 
126 | 
127 | if __name__ == "__main__":
128 |     parsed_args = vars(args)
129 |     mode = parsed_args["mode"]
130 |     servers = parsed_args["bootstrap_servers"]
131 |     image_dir = parsed_args["image_dir"]
132 | 
133 |     # Tear down all previous streams
134 |     print("Tearing down all existing topics!")
135 |     for image_id in range(NUM_IMAGE):
136 |         try:
137 |             teardown_stream(f"image_{image_id}", [servers])
138 |         except Exception as e:
139 |             print(f"Topic image_{image_id} does not exist. Skipping...!")
140 | 
141 |     if mode == "setup":
142 |         schemas_path = parsed_args["schemas_path"]
143 |         create_streams([servers], schemas_path, image_dir)
144 | 


--------------------------------------------------------------------------------
/streaming/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | cmd=$1
 3 | 
 4 | usage() {
 5 |     echo "run.sh <command> <arguments>"
 6 |     echo "Available commands:"
 7 |     echo " register_connector          register a new Kafka connector"
 8 |     echo " start_streaming             start streaming to Kafka"
 9 |     echo " stop_streaming              stop streaming to Kafka"
10 |     echo "Available arguments:"
11 |     echo " [connector config path]     path to connector config, for command register_connector only"
12 | }
13 | 
14 | if [[ -z "$cmd" ]]; then
15 |     echo "Missing command"
16 |     usage
17 |     exit 1
18 | fi
19 | 
20 | case $cmd in
21 |     register_connector)
22 |         if [[ -z "$2" ]]; then
23 |             echo "Missing connector config path"
24 |             usage
25 |             exit 1
26 |         else
27 |             echo "Registering a new connector from $2"
28 |             # Assign a connector config path such as: kafka_connect_jdbc/configs/connect-timescaledb-sink.json
29 |             curl -s -X POST -H 'Content-Type: application/json' --data @$2  http://localhost:8083/connectors
30 |         fi
31 |         ;;
32 |     generate_schemas)
33 |         # Generate data for 1 device with number of features in the range from 2 to 10
34 |         python generate_schemas.py --min_features 2 --max_features 10 --num_schemas 1
35 |         ;;
36 |     *)
37 |         echo -n "Unknown command: $cmd"
38 |         usage
39 |         exit 1
40 |         ;;
41 | esac


--------------------------------------------------------------------------------