├── .env-sample
├── .gitignore
├── LICENSE
├── README.md
├── assets
    └── AI in Logistics Container Number Recognition Header.jpg
├── cloudbuild.yaml
├── container_prefix.txt
├── main.py
└── requirements.txt


/.env-sample:
--------------------------------------------------------------------------------
1 | VISION_ENDPOINT=""
2 | VISION_KEY=""


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | data
  2 | models/*.pth
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | cover/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # PyBuilder
 62 | .pybuilder/
 63 | target/
 64 | 
 65 | # Jupyter Notebook
 66 | .ipynb_checkpoints
 67 | 
 68 | # IPython
 69 | profile_default/
 70 | ipython_config.py
 71 | 
 72 | # pyenv
 73 | #   For a library or package, you might want to ignore these files since the code is
 74 | #   intended to run in multiple environments; otherwise, check them in:
 75 | # .python-version
 76 | 
 77 | # pipenv
 78 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 79 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 80 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 81 | #   install all needed dependencies.
 82 | #Pipfile.lock
 83 | 
 84 | # poetry
 85 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 86 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
 87 | #   commonly ignored for libraries.
 88 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 89 | #poetry.lock
 90 | 
 91 | # pdm
 92 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 93 | #pdm.lock
 94 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 95 | #   in version control.
 96 | #   https://pdm.fming.dev/#use-with-ide
 97 | .pdm.toml
 98 | 
 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
100 | __pypackages__/
101 | 
102 | # Celery stuff
103 | celerybeat-schedule
104 | celerybeat.pid
105 | 
106 | # SageMath parsed files
107 | *.sage.py
108 | 
109 | # Environments
110 | .env
111 | .venv
112 | env/
113 | venv/
114 | ENV/
115 | env.bak/
116 | venv.bak/
117 | 
118 | # Cython debug symbols
119 | cython_debug/
120 | 
121 | .idea/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Jonathan Law
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AI in Logistics: Container Number Recognition
 2 | 
 3 | [[`Project Writeup`](https://medium.com/@jonathanlawhh) [`My Website`](https://jonathanlawhh.com/)]
 4 | 
 5 | ## Project Overview
 6 | ![AI in Logistics: Container Number Recognition header image](/assets/AI%20in%20Logistics%20Container%20Number%20Recognition%20Header.jpg)
 7 | Traditional container tracking often relies on manual scans and tedious paperwork, creating inefficiencies and bottlenecks.
 8 | This project leverages Optical Character Recognition (OCR) technology to automatically read container numbers directly from images, offering innovation in logistics management.
 9 | 
10 | Companies using this AI solution can now enjoy real-time visibility into container movement within their premises.
11 | 
12 | ## References
13 | - [Azure AI Vision](https://azure.microsoft.com/en-us/products/ai-services/ai-vision) by Microsoft Azure
14 | - [OpenCV](https://opencv.org/)
15 | 
16 | ## Setup and Usage
17 | 
18 | ### Software Requirements
19 | - Python >= 3.10
20 | - [Microsoft Azure Vision API](https://azure.microsoft.com/en-us/products/ai-services/ai-vision) API keys
21 | 
22 | ### Installation
23 | 
24 | 1. Clone this repository:
25 | ```bash
26 | git clone https://github.com/jonathanlawhh/container-number-recognition-ai.git
27 | ```
28 | 2. Install required libraries:
29 | ```bash
30 | pip install -R requirements.txt
31 | ```
32 | 
33 | ### Usage
34 | 
35 | 1. Place your container images in the .\data\ folder.
36 | 2. Rename `.env-sample` to `.env`
37 | 3. Fill up both values in .env `VISION_ENDPOINT` and `VISION_KEY` from your Microsoft Azure Vision API project.
38 | 4. Run the script.
39 | ```bash
40 | python main.py
41 | ```
42 | 
43 | ## Closing thoughts
44 | 
45 | - Using a ready built service such as Azure Vision AI offloads most of the image processing task
46 | - Azure Vision API is more reliable than building using Tesseract OCR if the environment is dynamic, performance is more consistent compared to running on a local hardware
47 | - Can be integrated with in-house Transport Management Systems


--------------------------------------------------------------------------------
/assets/AI in Logistics Container Number Recognition Header.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jonathanlawhh/container-number-recognition-ai/334ba97b714b937a0416811fb8b9b994807b2175/assets/AI in Logistics Container Number Recognition Header.jpg


--------------------------------------------------------------------------------
/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | steps:
 2 |   - name: 'gcr.io/cloud-builders/gcloud'
 3 |     args:
 4 |       - functions
 5 |       - deploy
 6 |       - container_number_recognition_ai  # Replace with your function name
 7 |       - --runtime=python39
 8 |       - --trigger-http
 9 |       - --allow-unauthenticated
10 |       - --entry-point=http_request  # Replace with your entry point function
11 |       - --region=us-central1  # Replace with your desired region
12 |       - --max-instances=2
13 |       - --set-secrets=VISION_ENDPOINT=CNR-VISION_ENDPOINT:latest,VISION_KEY=CNR-VISION_KEY:latest
14 | options:
15 |   logging: CLOUD_LOGGING_ONLY


--------------------------------------------------------------------------------
/container_prefix.txt:
--------------------------------------------------------------------------------
1 | AAAU,ACCU,ACLU,ACXU,AEXU,AKLU,ALLU,ALMU,ALNU,ALRU,AMCU,AMFU,AMZU,ANNU,ANYU,APHU,APLU,APMU,APRU,APZU,ARCU,ARDU,ARKU,ARMU,ARTU,ASIU,ATBU,ATVU,AWSU,AXIU,BAFU,BAXU,BBXU,BCGU,BCHU,BCLU,BEAU,BENU,BGFU,BHCU,BISU,BLJU,BLKU,BLTU,BMLU,BMOU,BORU,BOXU,BRFU,BRKU,BSIU,BUTU,BVIU,BXRU,CAAU,CADU,CAEU,CAIU,CAJU,CARU,CASU,CATU,CAXU,CAZU,CBCU,CBHU,CCGU,CCLU,CCRU,CCSU,CDDU,CDKU,CEOU,CESU,CGIU,CGMU,CHIU,CHSU,CINU,CIPU,CKLU,CLCU,CLHU,CLOU,CLTU,CLXU,CMAU,CMCU,CMHU,CMNU,CMTU,CMUU,CNCU,CNEU,CNIU,CNSU,COCU,CORU,COZU,CPIU,CPLU,CPSU,CPWU,CRLU,CRSU,CRTU,CRXU,CSFU,CSLU,CSNU,CSOU,CSQU,CSVU,CTMU,CTWU,CTXU,CUBU,CUCU,CULU,CXCU,CXDU,CXIU,CXRU,CXSU,CXTU,CZLU,CZZU,DACU,DAMU,DAYU,DBOU,DCSU,DDCU,DFIU,DFOU,DFSU,DJLU,DLKU,DNAU,DNCU,DRYU,DTPU,DVRU,DYLU,EASU,ECMU,ECNU,ECSU,EGHU,EGSU,EIMU,EISU,EITU,EKLU,EMAU,EMCU,ENAU,EQRU,ERFU,ESDU,ESLU,ESPU,ESSU,ETNU,EUCU,EURU,EUXU,EVAU,EXFU,EXXU,FAAU,FAMU,FANU,FBIU,FBLU,FBXU,FCBU,FCCU,FCGU,FCIU,FCLU,FCXU,FESU,FFAU,FJKU,FLTU,FNBU,FNGU,FNTU,FPTU,FRLU,FSCU,FTAU,FVIU,FWUU,FXLU,GAEU,GAFU,GAOU,GASU,GATU,GAZU,GCEU,GCNU,GCUU,GELU,GESU,GETU,GGOU,GIPU,GLDU,GMOU,GNSU,GRCU,GRDU,GRIU,GSLU,GSPU,GSSU,GSTU,GTIU,GUTU,GVCU,GVDU,GVTU,HAKU,HALU,HAMU,HASU,HBSU,HBTU,HCIU,HCSU,HDMU,HGBU,HGFU,HGHU,HGTU,HIBU,HJMU,HLBU,HLCU,HLSU,HLXU,HMCU,HMKU,HMMU,HMPU,HNPU,HNSU,HOTU,HOYU,HRZU,HSIU,HSTU,HTCU,IAAU,IBLU,ICDU,ICLU,ICOU,ICSU,ICTU,ICUU,IEAU,IHOU,IJCU,IKKU,IKSU,IMTU,INBU,INGU,INKU,INNU,INTU,IPXU,IRNU,ITAU,ITLU,ITTU,IVLU,JAYU,JSSU,JTMU,JXJU,JXLU,KHJU,KHLU,KKFU,KKLU,KKTU,KLCU,KLFU,KLTU,KMTU,KNLU,KOSU,KTNU,KXTU,LCRU,LGEU,LLTU,LMCU,LNXU,LOGU,LOTU,LSEU,LTIU,LVNU,LYGU,MAEU,MAGU,MALU,MANU,MARU,MATU,MAXU,MBBU,MBDU,MBGU,MBIU,MBJU,MBTU,MCAU,MCHU,MCLU,MCPU,MCRU,MCTU,MEDU,MERU,MEXU,MFRU,MFTU,MGLU,MGNU,MHHU,MIEU,MLCU,MMAU,MMCU,MNBU,MOAU,MOCU,MOEU,MOFU,MOGU,MOLU,MOMU,MORU,MOSU,MOTU,MRKU,MRSU,MSAU,MSCU,MSDU,MSFU,MSHU,MSKU,MSMU,MSNU,MSOU,MSPU,MSTU,MSUU,MSWU,MSYU,MSZU,MTBU,MTRU,MTSU,MTYU,MVIU,MWCU,MWMU,NDSU,NEVU,NIDU,NIRU,NLLU,NOLU,NOSU,NSAU,NSCU,NSRU,NSSU,NSTU,NYKU,NZDU,NZKU,OCGU,OCLU,OCVU,OFFU,ONEU,OOCU,OOLU,OPDU,OTAU,OTEU,OTPU,OWLU,OWNU,PBIU,PCIU,PCLU,PCVU,PDLU,PGTU,PGXU,PHHU,PHLU,PILU,POCU,PONU,PQIU,PRGU,PRKU,PRSU,PSCU,PSOU,PSSU,PVDU,PXCU,QBXU,QIBU,QNNU,RALU,RAVU,RCDU,RCLU,REGU,RFCU,RFLU,RFSU,RJCU,RLTU,RLXU,RMCU,RMTU,RSLU,RSSU,RSTU,RTHU,RWLU,RWTU,SACU,SANU,SAXU,SBIU,SBOU,SCEU,SCMU,SCNU,SCPU,SCSU,SCXU,SCZU,SDDU,SDOU,SEAU,SECU,SEFU,SEGU,SEKU,SELU,SEMU,SESU,SEXU,SGCU,SGRU,SIIU,SIKU,SITU,SJKU,SKHU,SKIU,SKLU,SKRU,SKYU,SLEU,SLSU,SLZU,SMCU,SMLU,SMUU,SNBU,SNHU,SNIU,SNTU,SOCU,SOFU,SPKU,SPLU,SPWU,STBU,STJU,STMU,STRU,STXU,SUDU,SVDU,SVWU,SWLU,SWTU,SZLU,TABU,TAIU,TARU,TASU,TCBU,TCIU,TCKU,TCLU,TCNU,TCUU,TDRU,TDTU,TEMU,TENU,TEXU,TGBU,TGCU,TGHU,TGSU,TIFU,TIIU,TISU,TITU,TLCU,TLEU,TLLU,TLNU,TLXU,TMIU,TMLU,TMMU,TMYU,TOLU,TOPU,TORU,TPCU,TPHU,TPMU,TPTU,TQMU,TRDU,TRHU,TRIU,TRKU,TRLU,TRTU,TRVU,TSGU,TSSU,TSTU,TTNU,TXGU,UACU,UAEU,UASU,UESU,UETU,UFCU,UGMU,UNDU,UNIU,UNOU,UNRU,UNSU,UNXU,USPU,UTCU,UXXU,VDMU,VMLU,VSBU,VSTU,WABU,WBPU,WCIU,WCXU,WECU,WEDU,WFHU,WHLU,WHSU,WLNU,WSCU,WSLU,WTLU,WWLU,XINU,XTRU,YMLU,YMMU,YOIU,ZCLU,ZCSU,ZIMU,ZMOU


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os
  3 | from typing import Tuple
  4 | import azure.ai.vision as sdk
  5 | import numpy as np
  6 | from dotenv import load_dotenv
  7 | import cv2
  8 | import json
  9 | import re
 10 | 
 11 | load_dotenv()
 12 | assert os.environ["VISION_ENDPOINT"] != "", "Please ensure VISION ENDPOINT is setup in .env file."
 13 | assert os.environ["VISION_KEY"] != "", "Please ensure VISION KEY is setup in .env file."
 14 | 
 15 | service_options = sdk.VisionServiceOptions(os.environ["VISION_ENDPOINT"],
 16 |                                            os.environ["VISION_KEY"])
 17 | 
 18 | 
 19 | class CNRAI:
 20 |     def __init__(self):
 21 |         self.container_number: str = ""
 22 |         self.container_type: str = ""
 23 |         self.bounding_box: list = [0, 0, 0, 0]
 24 |         self.container_color: list = [0, 0, 0]
 25 |         self.error: str | None = None
 26 | 
 27 | 
 28 | def downscale(ori_img: np.ndarray) -> np.ndarray:
 29 |     """
 30 |     If the image is >= 2000 pixels wide, downscale it proportionally
 31 |     :param ori_img: Input original image
 32 |     :return: Output resized image
 33 |     """
 34 |     h, w, c = ori_img.shape
 35 |     if w >= 1000:
 36 |         # If the value is 2000, take the down_factor as 2. If the value is 3000, take the factor as 3
 37 |         down_factor: int = (w // 1000 % 10) * 2
 38 |         return cv2.resize(ori_img, (w // down_factor, h // down_factor))
 39 | 
 40 |     return ori_img
 41 | 
 42 | 
 43 | def check_orientation_horizontal(bounding_box: list) -> bool:
 44 |     """
 45 |     Helper function to check if text is moving horizontally or vertically
 46 |     Simply, the difference between the x-axis compared with y-axis determines the orientation
 47 |     :param bounding_box: [x1, y1, x2, y2,...x6, y6]
 48 |     :return: True if it is horizontal, else False if it vertical
 49 |     """
 50 |     return bounding_box[2] - bounding_box[0] > bounding_box[6] - bounding_box[0]
 51 | 
 52 | 
 53 | def get_label_angle(bounding_box: list) -> int:
 54 |     """
 55 |     Compares the coordinates from 2 ends of a bounding box and find the difference
 56 |     :param bounding_box: [x1, y1, x2, y2,...x6, y6]
 57 |     :return: Pixel differences between angles
 58 |     """
 59 |     is_horizontal: bool = check_orientation_horizontal(bounding_box)
 60 |     if is_horizontal:
 61 |         return abs((bounding_box[3] - bounding_box[1]) + (bounding_box[5] - bounding_box[7])) // 2
 62 | 
 63 |     return abs((bounding_box[6] - bounding_box[0]) + (bounding_box[4] - bounding_box[2])) // 2
 64 | 
 65 | 
 66 | def within_buffer(co1_check_buff: int, co2: int, buffer: int = 50) -> bool:
 67 |     """
 68 |     Helper function to check if co2 is in between co1 with buffer
 69 |     :param co1_check_buff:
 70 |     :param co2:
 71 |     :param buffer:
 72 |     :return: True if co2 is within buffer, otherwise it is False
 73 |     """
 74 |     return co1_check_buff - buffer < co2 < co1_check_buff + buffer
 75 | 
 76 | 
 77 | def get_carrier_prefixes() -> Tuple:
 78 |     """
 79 |     Read the pre-defined container prefix and return in a Tuple
 80 |     :return: ("APHU", "EGHU"...)
 81 |     """
 82 |     with open("./container_prefix.txt", "r") as f:
 83 |         cp = tuple([line.split(",") for line in f if len(line) > 0][0])
 84 | 
 85 |     return cp if cp else tuple([])
 86 | 
 87 | 
 88 | def get_ctnr_color(ctnr_img: np.ndarray) -> list:
 89 |     """
 90 |     Get the most dominant color from the image given
 91 |     :param ctnr_img: Input image
 92 |     :return: [B, G, R]
 93 |     """
 94 |     colors, count = np.unique(ctnr_img.reshape(-1, ctnr_img.shape[-1]), axis=0, return_counts=True)
 95 |     colors_max: np.ndarray = colors[count.argmax()]
 96 |     return colors_max.tolist()
 97 | 
 98 | 
 99 | def get_ctnr_color_from_byte(input_byte_img: bytes, crop_zone: list[int]) -> list:
100 |     """
101 |     Convert byte image to a ndarray format and pass to get container color
102 |     :param input_byte_img:
103 |     :param crop_zone: bounding box to extract color from
104 |     :return: [B, G, R]
105 |     """
106 |     nparr = np.frombuffer(input_byte_img, np.uint8)
107 |     img_np = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
108 | 
109 |     cropped_img: np.ndarray = img_np[
110 |                               max(0, crop_zone[1] - 100): min(crop_zone[3] + 100,
111 |                                                               img_np.shape[1]),
112 |                               max(0, crop_zone[0] - 100): min(crop_zone[2] + 100,
113 |                                                               img_np.shape[0])]
114 | 
115 |     byte_res: list = get_ctnr_color(cropped_img)
116 |     return byte_res
117 | 
118 | 
119 | def extract_ctnr_location(ocr_output: sdk.DetectedText) -> CNRAI:
120 |     """
121 |     Extract container information using bounding box location logic
122 |     :param ocr_output: Output from Vision API
123 |     :return: CNRAI
124 |     """
125 |     # Define standard prefix, Tuple because it may run faster
126 |     # carrier_prefix: list[str] = ["EGSU", "EMCU", "GAOU", "GLDU", "MRKU", "MRSU", "MSDU", "SEGU", "TGHU", "WEDU"]
127 |     carrier_prefix: Tuple = get_carrier_prefixes()
128 |     container_t_prefix: Tuple = ("G1", "R1", "U1", "P1", "T1")
129 | 
130 |     # The bounding block of detected container number
131 |     # [x, y, x3, y3]
132 |     bound_block = [0, 0, 0, 0]
133 | 
134 |     tmp_cnrai = CNRAI()
135 | 
136 |     orientation_horizontal: bool = True
137 |     last_xy1_cood: list[int, int] = []
138 |     allowable_buffer: int = 50
139 | 
140 |     for detected_text_line in ocr_output.lines:
141 |         for word in detected_text_line.words:
142 |             # As per shipping guidelines, container numbers will have 11 characters
143 |             # https://www.evergreen-line.com/container/jsp/CNTR_ContainerMarkings.jsp
144 |             if len(tmp_cnrai.container_number) >= 11 and tmp_cnrai.container_type != "":
145 |                 # Early exit if container number and container type is detected
146 |                 break
147 | 
148 |             clean_text: str = str(word.content).strip().replace(" ", "").upper()
149 | 
150 |             x1, y1, x2, y2, x3, y3, x4, y4 = word.bounding_polygon
151 | 
152 |             # Detect container prefix
153 |             if tmp_cnrai.container_number == "" and any(prefix in clean_text for prefix in carrier_prefix):
154 |                 tmp_cnrai.container_number = clean_text
155 |                 orientation_horizontal = check_orientation_horizontal(word.bounding_polygon)
156 |                 last_xy1_cood = word.bounding_polygon[:2]
157 | 
158 |                 bound_block[0] = x1
159 |                 bound_block[1] = y1
160 |                 bound_block[2] = x3
161 |                 bound_block[3] = y3
162 | 
163 |             # Detect container serial
164 |             # Ensure the container prefix is populated first,
165 |             # and the total character is >= 11 as per ISO standard
166 |             if 11 > len(tmp_cnrai.container_number) >= 4:
167 |                 crit_met: bool = False
168 | 
169 |                 # If horizontal container number
170 |                 if orientation_horizontal:
171 |                     crit_met = x1 > last_xy1_cood[0] and within_buffer(last_xy1_cood[1], y1, allowable_buffer)
172 | 
173 |                 # If vertical container number
174 |                 if not orientation_horizontal:
175 |                     crit_met = y1 > last_xy1_cood[1] and within_buffer(last_xy1_cood[0], x1, allowable_buffer)
176 | 
177 |                 if crit_met:
178 |                     tmp_cnrai.container_number += clean_text
179 |                     last_xy1_cood = word.bounding_polygon[:2]
180 | 
181 |                     bound_block[2] = x3 if x3 > bound_block[2] else bound_block[2]
182 |                     bound_block[3] = y3 if y3 > bound_block[3] else bound_block[3]
183 | 
184 |             # Detect container type
185 |             if tmp_cnrai.container_type == "":
186 |                 if any(t in clean_text for t in container_t_prefix):
187 |                     tmp_cnrai.container_type = clean_text
188 | 
189 |             allowable_buffer = get_label_angle(word.bounding_polygon) * 3
190 | 
191 |     tmp_cnrai.bounding_box = [int(bb) for bb in bound_block]
192 |     return tmp_cnrai
193 | 
194 | 
195 | def extract_ctnr_regex(ocr_output: sdk.DetectedText) -> CNRAI:
196 |     """
197 |     Using regex to determine the container number, the simplest method
198 |     :param ocr_output: Output from Vision API
199 |     :return: CNRAI
200 |     """
201 | 
202 |     tmp_cnrai = CNRAI()
203 | 
204 |     # Extract container number based on carrier prefix + 7 digits behind
205 |     carrier_prefix: Tuple = get_carrier_prefixes()
206 |     detected_combined_text: str = "".join(
207 |         [str(word.content).strip().replace(" ", "").upper() for detected_text_line in ocr_output.lines for word in
208 |          detected_text_line.words])
209 | 
210 |     regex_ctnr_pattern: str = "".join(["(", "|".join(carrier_prefix), ")", "(\d{7})"])
211 |     match_text: list = re.findall(regex_ctnr_pattern, detected_combined_text)
212 | 
213 |     # If successful, extract the bounding box
214 |     if len(match_text) > 0 and len(match_text[0]) > 1:
215 |         tmp_cnrai.container_number = "".join(match_text[0])
216 |         dispose_ctnr: str = tmp_cnrai.container_number
217 |         #     Get bounding box
218 |         bb: list = [0, 0, 0, 0]
219 |         for detected_text_line in ocr_output.lines:
220 |             for word in detected_text_line.words:
221 |                 if word.content in tmp_cnrai.container_number:
222 |                     x1, y1, x2, y2, x3, y3, x4, y4 = word.bounding_polygon
223 |                     if bb[0] == 0:
224 |                         bb[0] = x1
225 |                         bb[1] = y1
226 |                         dispose_ctnr = dispose_ctnr.replace(word.content, "")
227 |                         continue
228 |                     if bb[0] > 0:
229 |                         bb[2] = x3
230 |                         bb[3] = y3
231 |                         dispose_ctnr = dispose_ctnr.replace(word.content, "")
232 | 
233 |             if len(dispose_ctnr) < 1:
234 |                 break
235 | 
236 |         tmp_cnrai.bounding_box = [int(intbb) for intbb in bb]
237 | 
238 |     # Then, extract the container type. 2 digits then container_t_prefix
239 |     container_t_prefix: Tuple = ("G1", "R1", "U1", "P1", "T1")
240 |     regex_ctnr_type_pattern: str = "".join(["(\d{2})", "(", "|".join(container_t_prefix), ")"])
241 |     match_ctnr_type_text: list = re.findall(regex_ctnr_type_pattern, detected_combined_text)
242 | 
243 |     if len(match_ctnr_type_text) > 0 and len(match_ctnr_type_text[0]) > 1:
244 |         tmp_cnrai.container_type = "".join(match_ctnr_type_text[0])
245 | 
246 |     return tmp_cnrai
247 | 
248 | 
249 | def detect_container_details(input_image_byte: bytes) -> json:
250 |     """
251 |     Takes in an image in byte array format, and run OCR on it
252 |     :param input_image_byte: Image byte array []byte
253 |     :return: json format of {"container_number": "ABCD1234567", "container_type": "45G1, "bounding_box": [x, y, x3, y3], "error": error_details.message}
254 |     """
255 | 
256 |     # https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/how-to/call-analyze-image-40?pivots=programming-language-python
257 |     # First convert the byte to an image source buffer
258 |     image_source_buffer: sdk.ImageSourceBuffer = sdk.ImageSourceBuffer()
259 |     image_source_buffer.image_writer.write(input_image_byte)
260 |     vision_source = sdk.VisionSource(image_source_buffer=image_source_buffer)
261 | 
262 |     # Initialize the analysis options
263 |     analysis_options: sdk.ImageAnalysisOptions = sdk.ImageAnalysisOptions()
264 | 
265 |     analysis_options.features = (
266 |         sdk.ImageAnalysisFeature.TEXT
267 |     )
268 |     analysis_options.language = "en"
269 |     image_analyzer = sdk.ImageAnalyzer(service_options, vision_source, analysis_options)
270 | 
271 |     # Analyze and get results
272 |     result: sdk.ImageAnalysisResult = image_analyzer.analyze()
273 | 
274 |     # Early exist if error
275 |     cnrai_detection: CNRAI = CNRAI()
276 |     if result.reason != sdk.ImageAnalysisResultReason.ANALYZED:
277 |         error_details = sdk.ImageAnalysisErrorDetails.from_result(result)
278 |         cnrai_detection.error = error_details.message
279 |         return cnrai_detection.__dict__
280 | 
281 |     if result.text is None:
282 |         return cnrai_detection.__dict__
283 | 
284 |     cnrai_detection = extract_ctnr_regex(result.text)
285 | 
286 |     # If regex logic does not return any data, detect by bounding box location
287 |     if cnrai_detection.container_number == "" or cnrai_detection.container_type == "":
288 |         cnrai_detection = extract_ctnr_location(result.text)
289 | 
290 |     cnrai_detection.container_color = get_ctnr_color_from_byte(input_image_byte, cnrai_detection.bounding_box)
291 | 
292 |     return cnrai_detection.__dict__
293 | 
294 | 
295 | def http_request(request):
296 |     from flask import jsonify
297 |     """HTTP Cloud Function.
298 |     Args:
299 |         request (flask.Request): The request object.
300 |         <https://flask.palletsprojects.com/en/1.1.x/api/#incoming-request-data>
301 |     Returns:
302 |         The response text, or any set of values that can be turned into a
303 |         Response object using `make_response`
304 |         <https://flask.palletsprojects.com/en/1.1.x/api/#flask.make_response>.
305 |     """
306 | 
307 |     # Set CORS headers for the preflight request
308 |     if request.method == "OPTIONS":
309 |         # Allows GET requests from any origin with the Content-Type
310 |         # header and caches preflight response for an 3600s
311 |         headers = {
312 |             "Access-Control-Allow-Origin": "*",
313 |             "Access-Control-Allow-Methods": "GET",
314 |             "Access-Control-Allow-Headers": "Content-Type",
315 |             "Access-Control-Max-Age": "3600",
316 |         }
317 | 
318 |         return "", 204, headers
319 |     headers = {"Access-Control-Allow-Origin": "*"}
320 | 
321 |     if 'image' not in request.files or request.files["image"].filename == '':
322 |         return jsonify({"message": "No files found"}), 400, headers
323 | 
324 |     file = request.files['image']
325 |     if file and file.filename.rsplit('.', 1)[1].lower() not in ["jpg", "jpeg", "bmp", "png"]:
326 |         return jsonify({"message": "Wrong file type"}), 415, headers
327 | 
328 |     im = cv2.imdecode(np.fromstring(file.read(), np.uint8), cv2.IMREAD_COLOR)
329 |     encoded_im = cv2.imencode('.JPG', im)[1].tobytes()
330 | 
331 |     return jsonify(detect_container_details(encoded_im)), 200, headers
332 | 
333 | 
334 | if __name__ == '__main__':
335 |     image_dir = "./data"
336 | 
337 |     for filename in os.listdir(image_dir):
338 |         f = os.path.join(image_dir, filename)
339 | 
340 |         # checking if it is a file
341 |         if os.path.isfile(f) and f.endswith((".bmp", ".jpg", ".jpeg", ".png")):
342 |             input_img = cv2.imread(f)
343 |             a = cv2.imencode('.JPG', input_img)[1].tobytes()
344 | 
345 |             res: json = detect_container_details(a)
346 |             assert res["error"] is None, res["error"]
347 | 
348 |             # Crop the detected bounding area from original image
349 |             cropped_img: np.ndarray = input_img[
350 |                                       max(0, res["bounding_box"][1] - 100): min(res["bounding_box"][3] + 100,
351 |                                                                                 input_img.shape[1]),
352 |                                       max(0, res["bounding_box"][0] - 100): min(res["bounding_box"][2] + 100,
353 |                                                                                 input_img.shape[0])]
354 | 
355 |             # Put the detected details top left of the cropped image
356 |             text = f"{res['container_number']} - {res['container_type']}"
357 |             cv2.putText(cropped_img, text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX,
358 |                         color=(0, 255, 100), fontScale=1, thickness=2, lineType=cv2.LINE_AA)
359 | 
360 |             # Display
361 |             print(res)
362 | 
363 |             cropped_img = downscale(cropped_img)
364 |             cv2.imshow("output", cropped_img)
365 |             cv2.waitKey()
366 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | python-dotenv
2 | azure-ai-vision
3 | opencv-python
4 | numpy
5 | flask
6 | functions-framework==3.5.0


--------------------------------------------------------------------------------