├── .env-sample ├── .gitignore ├── LICENSE ├── README.md ├── assets └── AI in Logistics Container Number Recognition Header.jpg ├── cloudbuild.yaml ├── container_prefix.txt ├── main.py └── requirements.txt /.env-sample: -------------------------------------------------------------------------------- 1 | VISION_ENDPOINT="" 2 | VISION_KEY="" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | models/*.pth 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # PyBuilder 62 | .pybuilder/ 63 | target/ 64 | 65 | # Jupyter Notebook 66 | .ipynb_checkpoints 67 | 68 | # IPython 69 | profile_default/ 70 | ipython_config.py 71 | 72 | # pyenv 73 | # For a library or package, you might want to ignore these files since the code is 74 | # intended to run in multiple environments; otherwise, check them in: 75 | # .python-version 76 | 77 | # pipenv 78 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 79 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 80 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 81 | # install all needed dependencies. 82 | #Pipfile.lock 83 | 84 | # poetry 85 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 86 | # This is especially recommended for binary packages to ensure reproducibility, and is more 87 | # commonly ignored for libraries. 88 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 89 | #poetry.lock 90 | 91 | # pdm 92 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 93 | #pdm.lock 94 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 95 | # in version control. 96 | # https://pdm.fming.dev/#use-with-ide 97 | .pdm.toml 98 | 99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 100 | __pypackages__/ 101 | 102 | # Celery stuff 103 | celerybeat-schedule 104 | celerybeat.pid 105 | 106 | # SageMath parsed files 107 | *.sage.py 108 | 109 | # Environments 110 | .env 111 | .venv 112 | env/ 113 | venv/ 114 | ENV/ 115 | env.bak/ 116 | venv.bak/ 117 | 118 | # Cython debug symbols 119 | cython_debug/ 120 | 121 | .idea/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Jonathan Law 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI in Logistics: Container Number Recognition 2 | 3 | [[`Project Writeup`](https://medium.com/@jonathanlawhh) [`My Website`](https://jonathanlawhh.com/)] 4 | 5 | ## Project Overview 6 | ![AI in Logistics: Container Number Recognition header image](/assets/AI%20in%20Logistics%20Container%20Number%20Recognition%20Header.jpg) 7 | Traditional container tracking often relies on manual scans and tedious paperwork, creating inefficiencies and bottlenecks. 8 | This project leverages Optical Character Recognition (OCR) technology to automatically read container numbers directly from images, offering innovation in logistics management. 9 | 10 | Companies using this AI solution can now enjoy real-time visibility into container movement within their premises. 11 | 12 | ## References 13 | - [Azure AI Vision](https://azure.microsoft.com/en-us/products/ai-services/ai-vision) by Microsoft Azure 14 | - [OpenCV](https://opencv.org/) 15 | 16 | ## Setup and Usage 17 | 18 | ### Software Requirements 19 | - Python >= 3.10 20 | - [Microsoft Azure Vision API](https://azure.microsoft.com/en-us/products/ai-services/ai-vision) API keys 21 | 22 | ### Installation 23 | 24 | 1. Clone this repository: 25 | ```bash 26 | git clone https://github.com/jonathanlawhh/container-number-recognition-ai.git 27 | ``` 28 | 2. Install required libraries: 29 | ```bash 30 | pip install -R requirements.txt 31 | ``` 32 | 33 | ### Usage 34 | 35 | 1. Place your container images in the .\data\ folder. 36 | 2. Rename `.env-sample` to `.env` 37 | 3. Fill up both values in .env `VISION_ENDPOINT` and `VISION_KEY` from your Microsoft Azure Vision API project. 38 | 4. Run the script. 39 | ```bash 40 | python main.py 41 | ``` 42 | 43 | ## Closing thoughts 44 | 45 | - Using a ready built service such as Azure Vision AI offloads most of the image processing task 46 | - Azure Vision API is more reliable than building using Tesseract OCR if the environment is dynamic, performance is more consistent compared to running on a local hardware 47 | - Can be integrated with in-house Transport Management Systems -------------------------------------------------------------------------------- /assets/AI in Logistics Container Number Recognition Header.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonathanlawhh/container-number-recognition-ai/334ba97b714b937a0416811fb8b9b994807b2175/assets/AI in Logistics Container Number Recognition Header.jpg -------------------------------------------------------------------------------- /cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: 'gcr.io/cloud-builders/gcloud' 3 | args: 4 | - functions 5 | - deploy 6 | - container_number_recognition_ai # Replace with your function name 7 | - --runtime=python39 8 | - --trigger-http 9 | - --allow-unauthenticated 10 | - --entry-point=http_request # Replace with your entry point function 11 | - --region=us-central1 # Replace with your desired region 12 | - --max-instances=2 13 | - --set-secrets=VISION_ENDPOINT=CNR-VISION_ENDPOINT:latest,VISION_KEY=CNR-VISION_KEY:latest 14 | options: 15 | logging: CLOUD_LOGGING_ONLY -------------------------------------------------------------------------------- /container_prefix.txt: -------------------------------------------------------------------------------- 1 | AAAU,ACCU,ACLU,ACXU,AEXU,AKLU,ALLU,ALMU,ALNU,ALRU,AMCU,AMFU,AMZU,ANNU,ANYU,APHU,APLU,APMU,APRU,APZU,ARCU,ARDU,ARKU,ARMU,ARTU,ASIU,ATBU,ATVU,AWSU,AXIU,BAFU,BAXU,BBXU,BCGU,BCHU,BCLU,BEAU,BENU,BGFU,BHCU,BISU,BLJU,BLKU,BLTU,BMLU,BMOU,BORU,BOXU,BRFU,BRKU,BSIU,BUTU,BVIU,BXRU,CAAU,CADU,CAEU,CAIU,CAJU,CARU,CASU,CATU,CAXU,CAZU,CBCU,CBHU,CCGU,CCLU,CCRU,CCSU,CDDU,CDKU,CEOU,CESU,CGIU,CGMU,CHIU,CHSU,CINU,CIPU,CKLU,CLCU,CLHU,CLOU,CLTU,CLXU,CMAU,CMCU,CMHU,CMNU,CMTU,CMUU,CNCU,CNEU,CNIU,CNSU,COCU,CORU,COZU,CPIU,CPLU,CPSU,CPWU,CRLU,CRSU,CRTU,CRXU,CSFU,CSLU,CSNU,CSOU,CSQU,CSVU,CTMU,CTWU,CTXU,CUBU,CUCU,CULU,CXCU,CXDU,CXIU,CXRU,CXSU,CXTU,CZLU,CZZU,DACU,DAMU,DAYU,DBOU,DCSU,DDCU,DFIU,DFOU,DFSU,DJLU,DLKU,DNAU,DNCU,DRYU,DTPU,DVRU,DYLU,EASU,ECMU,ECNU,ECSU,EGHU,EGSU,EIMU,EISU,EITU,EKLU,EMAU,EMCU,ENAU,EQRU,ERFU,ESDU,ESLU,ESPU,ESSU,ETNU,EUCU,EURU,EUXU,EVAU,EXFU,EXXU,FAAU,FAMU,FANU,FBIU,FBLU,FBXU,FCBU,FCCU,FCGU,FCIU,FCLU,FCXU,FESU,FFAU,FJKU,FLTU,FNBU,FNGU,FNTU,FPTU,FRLU,FSCU,FTAU,FVIU,FWUU,FXLU,GAEU,GAFU,GAOU,GASU,GATU,GAZU,GCEU,GCNU,GCUU,GELU,GESU,GETU,GGOU,GIPU,GLDU,GMOU,GNSU,GRCU,GRDU,GRIU,GSLU,GSPU,GSSU,GSTU,GTIU,GUTU,GVCU,GVDU,GVTU,HAKU,HALU,HAMU,HASU,HBSU,HBTU,HCIU,HCSU,HDMU,HGBU,HGFU,HGHU,HGTU,HIBU,HJMU,HLBU,HLCU,HLSU,HLXU,HMCU,HMKU,HMMU,HMPU,HNPU,HNSU,HOTU,HOYU,HRZU,HSIU,HSTU,HTCU,IAAU,IBLU,ICDU,ICLU,ICOU,ICSU,ICTU,ICUU,IEAU,IHOU,IJCU,IKKU,IKSU,IMTU,INBU,INGU,INKU,INNU,INTU,IPXU,IRNU,ITAU,ITLU,ITTU,IVLU,JAYU,JSSU,JTMU,JXJU,JXLU,KHJU,KHLU,KKFU,KKLU,KKTU,KLCU,KLFU,KLTU,KMTU,KNLU,KOSU,KTNU,KXTU,LCRU,LGEU,LLTU,LMCU,LNXU,LOGU,LOTU,LSEU,LTIU,LVNU,LYGU,MAEU,MAGU,MALU,MANU,MARU,MATU,MAXU,MBBU,MBDU,MBGU,MBIU,MBJU,MBTU,MCAU,MCHU,MCLU,MCPU,MCRU,MCTU,MEDU,MERU,MEXU,MFRU,MFTU,MGLU,MGNU,MHHU,MIEU,MLCU,MMAU,MMCU,MNBU,MOAU,MOCU,MOEU,MOFU,MOGU,MOLU,MOMU,MORU,MOSU,MOTU,MRKU,MRSU,MSAU,MSCU,MSDU,MSFU,MSHU,MSKU,MSMU,MSNU,MSOU,MSPU,MSTU,MSUU,MSWU,MSYU,MSZU,MTBU,MTRU,MTSU,MTYU,MVIU,MWCU,MWMU,NDSU,NEVU,NIDU,NIRU,NLLU,NOLU,NOSU,NSAU,NSCU,NSRU,NSSU,NSTU,NYKU,NZDU,NZKU,OCGU,OCLU,OCVU,OFFU,ONEU,OOCU,OOLU,OPDU,OTAU,OTEU,OTPU,OWLU,OWNU,PBIU,PCIU,PCLU,PCVU,PDLU,PGTU,PGXU,PHHU,PHLU,PILU,POCU,PONU,PQIU,PRGU,PRKU,PRSU,PSCU,PSOU,PSSU,PVDU,PXCU,QBXU,QIBU,QNNU,RALU,RAVU,RCDU,RCLU,REGU,RFCU,RFLU,RFSU,RJCU,RLTU,RLXU,RMCU,RMTU,RSLU,RSSU,RSTU,RTHU,RWLU,RWTU,SACU,SANU,SAXU,SBIU,SBOU,SCEU,SCMU,SCNU,SCPU,SCSU,SCXU,SCZU,SDDU,SDOU,SEAU,SECU,SEFU,SEGU,SEKU,SELU,SEMU,SESU,SEXU,SGCU,SGRU,SIIU,SIKU,SITU,SJKU,SKHU,SKIU,SKLU,SKRU,SKYU,SLEU,SLSU,SLZU,SMCU,SMLU,SMUU,SNBU,SNHU,SNIU,SNTU,SOCU,SOFU,SPKU,SPLU,SPWU,STBU,STJU,STMU,STRU,STXU,SUDU,SVDU,SVWU,SWLU,SWTU,SZLU,TABU,TAIU,TARU,TASU,TCBU,TCIU,TCKU,TCLU,TCNU,TCUU,TDRU,TDTU,TEMU,TENU,TEXU,TGBU,TGCU,TGHU,TGSU,TIFU,TIIU,TISU,TITU,TLCU,TLEU,TLLU,TLNU,TLXU,TMIU,TMLU,TMMU,TMYU,TOLU,TOPU,TORU,TPCU,TPHU,TPMU,TPTU,TQMU,TRDU,TRHU,TRIU,TRKU,TRLU,TRTU,TRVU,TSGU,TSSU,TSTU,TTNU,TXGU,UACU,UAEU,UASU,UESU,UETU,UFCU,UGMU,UNDU,UNIU,UNOU,UNRU,UNSU,UNXU,USPU,UTCU,UXXU,VDMU,VMLU,VSBU,VSTU,WABU,WBPU,WCIU,WCXU,WECU,WEDU,WFHU,WHLU,WHSU,WLNU,WSCU,WSLU,WTLU,WWLU,XINU,XTRU,YMLU,YMMU,YOIU,ZCLU,ZCSU,ZIMU,ZMOU -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | from typing import Tuple 4 | import azure.ai.vision as sdk 5 | import numpy as np 6 | from dotenv import load_dotenv 7 | import cv2 8 | import json 9 | import re 10 | 11 | load_dotenv() 12 | assert os.environ["VISION_ENDPOINT"] != "", "Please ensure VISION ENDPOINT is setup in .env file." 13 | assert os.environ["VISION_KEY"] != "", "Please ensure VISION KEY is setup in .env file." 14 | 15 | service_options = sdk.VisionServiceOptions(os.environ["VISION_ENDPOINT"], 16 | os.environ["VISION_KEY"]) 17 | 18 | 19 | class CNRAI: 20 | def __init__(self): 21 | self.container_number: str = "" 22 | self.container_type: str = "" 23 | self.bounding_box: list = [0, 0, 0, 0] 24 | self.container_color: list = [0, 0, 0] 25 | self.error: str | None = None 26 | 27 | 28 | def downscale(ori_img: np.ndarray) -> np.ndarray: 29 | """ 30 | If the image is >= 2000 pixels wide, downscale it proportionally 31 | :param ori_img: Input original image 32 | :return: Output resized image 33 | """ 34 | h, w, c = ori_img.shape 35 | if w >= 1000: 36 | # If the value is 2000, take the down_factor as 2. If the value is 3000, take the factor as 3 37 | down_factor: int = (w // 1000 % 10) * 2 38 | return cv2.resize(ori_img, (w // down_factor, h // down_factor)) 39 | 40 | return ori_img 41 | 42 | 43 | def check_orientation_horizontal(bounding_box: list) -> bool: 44 | """ 45 | Helper function to check if text is moving horizontally or vertically 46 | Simply, the difference between the x-axis compared with y-axis determines the orientation 47 | :param bounding_box: [x1, y1, x2, y2,...x6, y6] 48 | :return: True if it is horizontal, else False if it vertical 49 | """ 50 | return bounding_box[2] - bounding_box[0] > bounding_box[6] - bounding_box[0] 51 | 52 | 53 | def get_label_angle(bounding_box: list) -> int: 54 | """ 55 | Compares the coordinates from 2 ends of a bounding box and find the difference 56 | :param bounding_box: [x1, y1, x2, y2,...x6, y6] 57 | :return: Pixel differences between angles 58 | """ 59 | is_horizontal: bool = check_orientation_horizontal(bounding_box) 60 | if is_horizontal: 61 | return abs((bounding_box[3] - bounding_box[1]) + (bounding_box[5] - bounding_box[7])) // 2 62 | 63 | return abs((bounding_box[6] - bounding_box[0]) + (bounding_box[4] - bounding_box[2])) // 2 64 | 65 | 66 | def within_buffer(co1_check_buff: int, co2: int, buffer: int = 50) -> bool: 67 | """ 68 | Helper function to check if co2 is in between co1 with buffer 69 | :param co1_check_buff: 70 | :param co2: 71 | :param buffer: 72 | :return: True if co2 is within buffer, otherwise it is False 73 | """ 74 | return co1_check_buff - buffer < co2 < co1_check_buff + buffer 75 | 76 | 77 | def get_carrier_prefixes() -> Tuple: 78 | """ 79 | Read the pre-defined container prefix and return in a Tuple 80 | :return: ("APHU", "EGHU"...) 81 | """ 82 | with open("./container_prefix.txt", "r") as f: 83 | cp = tuple([line.split(",") for line in f if len(line) > 0][0]) 84 | 85 | return cp if cp else tuple([]) 86 | 87 | 88 | def get_ctnr_color(ctnr_img: np.ndarray) -> list: 89 | """ 90 | Get the most dominant color from the image given 91 | :param ctnr_img: Input image 92 | :return: [B, G, R] 93 | """ 94 | colors, count = np.unique(ctnr_img.reshape(-1, ctnr_img.shape[-1]), axis=0, return_counts=True) 95 | colors_max: np.ndarray = colors[count.argmax()] 96 | return colors_max.tolist() 97 | 98 | 99 | def get_ctnr_color_from_byte(input_byte_img: bytes, crop_zone: list[int]) -> list: 100 | """ 101 | Convert byte image to a ndarray format and pass to get container color 102 | :param input_byte_img: 103 | :param crop_zone: bounding box to extract color from 104 | :return: [B, G, R] 105 | """ 106 | nparr = np.frombuffer(input_byte_img, np.uint8) 107 | img_np = cv2.imdecode(nparr, cv2.IMREAD_COLOR) 108 | 109 | cropped_img: np.ndarray = img_np[ 110 | max(0, crop_zone[1] - 100): min(crop_zone[3] + 100, 111 | img_np.shape[1]), 112 | max(0, crop_zone[0] - 100): min(crop_zone[2] + 100, 113 | img_np.shape[0])] 114 | 115 | byte_res: list = get_ctnr_color(cropped_img) 116 | return byte_res 117 | 118 | 119 | def extract_ctnr_location(ocr_output: sdk.DetectedText) -> CNRAI: 120 | """ 121 | Extract container information using bounding box location logic 122 | :param ocr_output: Output from Vision API 123 | :return: CNRAI 124 | """ 125 | # Define standard prefix, Tuple because it may run faster 126 | # carrier_prefix: list[str] = ["EGSU", "EMCU", "GAOU", "GLDU", "MRKU", "MRSU", "MSDU", "SEGU", "TGHU", "WEDU"] 127 | carrier_prefix: Tuple = get_carrier_prefixes() 128 | container_t_prefix: Tuple = ("G1", "R1", "U1", "P1", "T1") 129 | 130 | # The bounding block of detected container number 131 | # [x, y, x3, y3] 132 | bound_block = [0, 0, 0, 0] 133 | 134 | tmp_cnrai = CNRAI() 135 | 136 | orientation_horizontal: bool = True 137 | last_xy1_cood: list[int, int] = [] 138 | allowable_buffer: int = 50 139 | 140 | for detected_text_line in ocr_output.lines: 141 | for word in detected_text_line.words: 142 | # As per shipping guidelines, container numbers will have 11 characters 143 | # https://www.evergreen-line.com/container/jsp/CNTR_ContainerMarkings.jsp 144 | if len(tmp_cnrai.container_number) >= 11 and tmp_cnrai.container_type != "": 145 | # Early exit if container number and container type is detected 146 | break 147 | 148 | clean_text: str = str(word.content).strip().replace(" ", "").upper() 149 | 150 | x1, y1, x2, y2, x3, y3, x4, y4 = word.bounding_polygon 151 | 152 | # Detect container prefix 153 | if tmp_cnrai.container_number == "" and any(prefix in clean_text for prefix in carrier_prefix): 154 | tmp_cnrai.container_number = clean_text 155 | orientation_horizontal = check_orientation_horizontal(word.bounding_polygon) 156 | last_xy1_cood = word.bounding_polygon[:2] 157 | 158 | bound_block[0] = x1 159 | bound_block[1] = y1 160 | bound_block[2] = x3 161 | bound_block[3] = y3 162 | 163 | # Detect container serial 164 | # Ensure the container prefix is populated first, 165 | # and the total character is >= 11 as per ISO standard 166 | if 11 > len(tmp_cnrai.container_number) >= 4: 167 | crit_met: bool = False 168 | 169 | # If horizontal container number 170 | if orientation_horizontal: 171 | crit_met = x1 > last_xy1_cood[0] and within_buffer(last_xy1_cood[1], y1, allowable_buffer) 172 | 173 | # If vertical container number 174 | if not orientation_horizontal: 175 | crit_met = y1 > last_xy1_cood[1] and within_buffer(last_xy1_cood[0], x1, allowable_buffer) 176 | 177 | if crit_met: 178 | tmp_cnrai.container_number += clean_text 179 | last_xy1_cood = word.bounding_polygon[:2] 180 | 181 | bound_block[2] = x3 if x3 > bound_block[2] else bound_block[2] 182 | bound_block[3] = y3 if y3 > bound_block[3] else bound_block[3] 183 | 184 | # Detect container type 185 | if tmp_cnrai.container_type == "": 186 | if any(t in clean_text for t in container_t_prefix): 187 | tmp_cnrai.container_type = clean_text 188 | 189 | allowable_buffer = get_label_angle(word.bounding_polygon) * 3 190 | 191 | tmp_cnrai.bounding_box = [int(bb) for bb in bound_block] 192 | return tmp_cnrai 193 | 194 | 195 | def extract_ctnr_regex(ocr_output: sdk.DetectedText) -> CNRAI: 196 | """ 197 | Using regex to determine the container number, the simplest method 198 | :param ocr_output: Output from Vision API 199 | :return: CNRAI 200 | """ 201 | 202 | tmp_cnrai = CNRAI() 203 | 204 | # Extract container number based on carrier prefix + 7 digits behind 205 | carrier_prefix: Tuple = get_carrier_prefixes() 206 | detected_combined_text: str = "".join( 207 | [str(word.content).strip().replace(" ", "").upper() for detected_text_line in ocr_output.lines for word in 208 | detected_text_line.words]) 209 | 210 | regex_ctnr_pattern: str = "".join(["(", "|".join(carrier_prefix), ")", "(\d{7})"]) 211 | match_text: list = re.findall(regex_ctnr_pattern, detected_combined_text) 212 | 213 | # If successful, extract the bounding box 214 | if len(match_text) > 0 and len(match_text[0]) > 1: 215 | tmp_cnrai.container_number = "".join(match_text[0]) 216 | dispose_ctnr: str = tmp_cnrai.container_number 217 | # Get bounding box 218 | bb: list = [0, 0, 0, 0] 219 | for detected_text_line in ocr_output.lines: 220 | for word in detected_text_line.words: 221 | if word.content in tmp_cnrai.container_number: 222 | x1, y1, x2, y2, x3, y3, x4, y4 = word.bounding_polygon 223 | if bb[0] == 0: 224 | bb[0] = x1 225 | bb[1] = y1 226 | dispose_ctnr = dispose_ctnr.replace(word.content, "") 227 | continue 228 | if bb[0] > 0: 229 | bb[2] = x3 230 | bb[3] = y3 231 | dispose_ctnr = dispose_ctnr.replace(word.content, "") 232 | 233 | if len(dispose_ctnr) < 1: 234 | break 235 | 236 | tmp_cnrai.bounding_box = [int(intbb) for intbb in bb] 237 | 238 | # Then, extract the container type. 2 digits then container_t_prefix 239 | container_t_prefix: Tuple = ("G1", "R1", "U1", "P1", "T1") 240 | regex_ctnr_type_pattern: str = "".join(["(\d{2})", "(", "|".join(container_t_prefix), ")"]) 241 | match_ctnr_type_text: list = re.findall(regex_ctnr_type_pattern, detected_combined_text) 242 | 243 | if len(match_ctnr_type_text) > 0 and len(match_ctnr_type_text[0]) > 1: 244 | tmp_cnrai.container_type = "".join(match_ctnr_type_text[0]) 245 | 246 | return tmp_cnrai 247 | 248 | 249 | def detect_container_details(input_image_byte: bytes) -> json: 250 | """ 251 | Takes in an image in byte array format, and run OCR on it 252 | :param input_image_byte: Image byte array []byte 253 | :return: json format of {"container_number": "ABCD1234567", "container_type": "45G1, "bounding_box": [x, y, x3, y3], "error": error_details.message} 254 | """ 255 | 256 | # https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/how-to/call-analyze-image-40?pivots=programming-language-python 257 | # First convert the byte to an image source buffer 258 | image_source_buffer: sdk.ImageSourceBuffer = sdk.ImageSourceBuffer() 259 | image_source_buffer.image_writer.write(input_image_byte) 260 | vision_source = sdk.VisionSource(image_source_buffer=image_source_buffer) 261 | 262 | # Initialize the analysis options 263 | analysis_options: sdk.ImageAnalysisOptions = sdk.ImageAnalysisOptions() 264 | 265 | analysis_options.features = ( 266 | sdk.ImageAnalysisFeature.TEXT 267 | ) 268 | analysis_options.language = "en" 269 | image_analyzer = sdk.ImageAnalyzer(service_options, vision_source, analysis_options) 270 | 271 | # Analyze and get results 272 | result: sdk.ImageAnalysisResult = image_analyzer.analyze() 273 | 274 | # Early exist if error 275 | cnrai_detection: CNRAI = CNRAI() 276 | if result.reason != sdk.ImageAnalysisResultReason.ANALYZED: 277 | error_details = sdk.ImageAnalysisErrorDetails.from_result(result) 278 | cnrai_detection.error = error_details.message 279 | return cnrai_detection.__dict__ 280 | 281 | if result.text is None: 282 | return cnrai_detection.__dict__ 283 | 284 | cnrai_detection = extract_ctnr_regex(result.text) 285 | 286 | # If regex logic does not return any data, detect by bounding box location 287 | if cnrai_detection.container_number == "" or cnrai_detection.container_type == "": 288 | cnrai_detection = extract_ctnr_location(result.text) 289 | 290 | cnrai_detection.container_color = get_ctnr_color_from_byte(input_image_byte, cnrai_detection.bounding_box) 291 | 292 | return cnrai_detection.__dict__ 293 | 294 | 295 | def http_request(request): 296 | from flask import jsonify 297 | """HTTP Cloud Function. 298 | Args: 299 | request (flask.Request): The request object. 300 | 301 | Returns: 302 | The response text, or any set of values that can be turned into a 303 | Response object using `make_response` 304 | . 305 | """ 306 | 307 | # Set CORS headers for the preflight request 308 | if request.method == "OPTIONS": 309 | # Allows GET requests from any origin with the Content-Type 310 | # header and caches preflight response for an 3600s 311 | headers = { 312 | "Access-Control-Allow-Origin": "*", 313 | "Access-Control-Allow-Methods": "GET", 314 | "Access-Control-Allow-Headers": "Content-Type", 315 | "Access-Control-Max-Age": "3600", 316 | } 317 | 318 | return "", 204, headers 319 | headers = {"Access-Control-Allow-Origin": "*"} 320 | 321 | if 'image' not in request.files or request.files["image"].filename == '': 322 | return jsonify({"message": "No files found"}), 400, headers 323 | 324 | file = request.files['image'] 325 | if file and file.filename.rsplit('.', 1)[1].lower() not in ["jpg", "jpeg", "bmp", "png"]: 326 | return jsonify({"message": "Wrong file type"}), 415, headers 327 | 328 | im = cv2.imdecode(np.fromstring(file.read(), np.uint8), cv2.IMREAD_COLOR) 329 | encoded_im = cv2.imencode('.JPG', im)[1].tobytes() 330 | 331 | return jsonify(detect_container_details(encoded_im)), 200, headers 332 | 333 | 334 | if __name__ == '__main__': 335 | image_dir = "./data" 336 | 337 | for filename in os.listdir(image_dir): 338 | f = os.path.join(image_dir, filename) 339 | 340 | # checking if it is a file 341 | if os.path.isfile(f) and f.endswith((".bmp", ".jpg", ".jpeg", ".png")): 342 | input_img = cv2.imread(f) 343 | a = cv2.imencode('.JPG', input_img)[1].tobytes() 344 | 345 | res: json = detect_container_details(a) 346 | assert res["error"] is None, res["error"] 347 | 348 | # Crop the detected bounding area from original image 349 | cropped_img: np.ndarray = input_img[ 350 | max(0, res["bounding_box"][1] - 100): min(res["bounding_box"][3] + 100, 351 | input_img.shape[1]), 352 | max(0, res["bounding_box"][0] - 100): min(res["bounding_box"][2] + 100, 353 | input_img.shape[0])] 354 | 355 | # Put the detected details top left of the cropped image 356 | text = f"{res['container_number']} - {res['container_type']}" 357 | cv2.putText(cropped_img, text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 358 | color=(0, 255, 100), fontScale=1, thickness=2, lineType=cv2.LINE_AA) 359 | 360 | # Display 361 | print(res) 362 | 363 | cropped_img = downscale(cropped_img) 364 | cv2.imshow("output", cropped_img) 365 | cv2.waitKey() 366 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv 2 | azure-ai-vision 3 | opencv-python 4 | numpy 5 | flask 6 | functions-framework==3.5.0 --------------------------------------------------------------------------------