├── 1.1.png ├── 1.jpg ├── 2.jpg ├── 3.jpg ├── ADIL_dataset_Annotated.json ├── AIDL_Final_Notebook.ipynb ├── AIDL_NB_for_Demo.ipynb ├── README.md ├── models.zip ├── models ├── AIDL_NER_DO-0.30_EP-20_100_PERC_DATA │ ├── meta.json │ ├── ner │ │ ├── cfg │ │ ├── model │ │ └── moves │ ├── tokenizer │ └── vocab │ │ ├── key2row │ │ ├── lexemes.bin │ │ ├── strings.json │ │ └── vectors ├── AIDL_NER_DO-0.30_EP-20_80_PERC_DATA │ ├── meta.json │ ├── ner │ │ ├── cfg │ │ ├── model │ │ └── moves │ ├── tokenizer │ └── vocab │ │ ├── key2row │ │ ├── lexemes.bin │ │ ├── strings.json │ │ └── vectors └── AIDL_NER_DO-0.30_EP-20_90_PERC_DATA │ ├── meta.json │ ├── ner │ ├── cfg │ ├── model │ └── moves │ ├── tokenizer │ └── vocab │ ├── key2row │ ├── lexemes.bin │ ├── strings.json │ └── vectors ├── requirements.txt ├── run.py ├── sample.txt └── sea-lands.jpeg /1.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/1.1.png -------------------------------------------------------------------------------- /1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/1.jpg -------------------------------------------------------------------------------- /2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/2.jpg -------------------------------------------------------------------------------- /3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/3.jpg -------------------------------------------------------------------------------- /AIDL_Final_Notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "AIDL Final Notebook.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "code", 18 | "execution_count": 0, 19 | "metadata": { 20 | "colab": {}, 21 | "colab_type": "code", 22 | "id": "MsnPa2cFwr2Z" 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "# Uncomment below lines on first run\n", 27 | "# !pip install spacy google-cloud-vision" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 6, 33 | "metadata": { 34 | "colab": { 35 | "base_uri": "https://localhost:8080/", 36 | "height": 663 37 | }, 38 | "colab_type": "code", 39 | "id": "Cke7hzChzMAz", 40 | "outputId": "dc63f999-8cf4-416e-83b6-5b25c334fc8a" 41 | }, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "Archive: models.zip\n", 48 | " creating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/\n", 49 | " inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/meta.json \n", 50 | " creating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/\n", 51 | " inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/cfg \n", 52 | " inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/model \n", 53 | " inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/moves \n", 54 | " inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/tokenizer \n", 55 | " creating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/\n", 56 | " extracting: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/key2row \n", 57 | " inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/lexemes.bin \n", 58 | " inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/strings.json \n", 59 | " inflating: models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/vectors \n", 60 | " creating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/\n", 61 | " inflating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/meta.json \n", 62 | " creating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/ner/\n", 63 | " inflating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/ner/cfg \n", 64 | " inflating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/ner/model \n", 65 | " inflating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/ner/moves \n", 66 | " inflating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/tokenizer \n", 67 | " creating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/vocab/\n", 68 | " extracting: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/vocab/key2row \n", 69 | " inflating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/vocab/lexemes.bin \n", 70 | " inflating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/vocab/strings.json \n", 71 | " inflating: models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/vocab/vectors \n", 72 | " creating: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/\n", 73 | " inflating: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/meta.json \n", 74 | " creating: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/ner/\n", 75 | " inflating: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/ner/cfg \n", 76 | " inflating: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/ner/model \n", 77 | " inflating: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/ner/moves \n", 78 | " inflating: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/tokenizer \n", 79 | " creating: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/vocab/\n", 80 | " extracting: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/vocab/key2row \n", 81 | " inflating: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/vocab/lexemes.bin \n", 82 | " inflating: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/vocab/strings.json \n", 83 | " inflating: models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/vocab/vectors \n", 84 | " inflating: models/run.py \n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "# Unzipping model files\n", 90 | "! unzip models.zip" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 0, 96 | "metadata": { 97 | "colab": {}, 98 | "colab_type": "code", 99 | "id": "k0KSuevWv4dd" 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "# Importing Libraries\n", 104 | "import spacy\n", 105 | "import os \n", 106 | "import json\n", 107 | "from google.cloud import vision\n", 108 | "import io\n", 109 | "\n", 110 | "# Setting Environment Variable for Vision API\n", 111 | "os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"]=\"/content/fyp-bot-fkvpth-63ef51dcf510.json\"" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 0, 117 | "metadata": { 118 | "colab": {}, 119 | "colab_type": "code", 120 | "id": "LQlvXdE0wW6Y" 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "# Setting variables\n", 125 | "modelDir = \"models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA\"\n", 126 | "fileType = \"img\"\n", 127 | "filename = \"sample.jpg\"" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 0, 133 | "metadata": { 134 | "colab": {}, 135 | "colab_type": "code", 136 | "id": "Uxy2g9YTw-Mq" 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "# Initializing vision API\n", 141 | "client = vision.ImageAnnotatorClient()\n", 142 | "\n", 143 | "# Loading the saved Spacy model\n", 144 | "nlp = spacy.load(modelDir)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 0, 150 | "metadata": { 151 | "colab": {}, 152 | "colab_type": "code", 153 | "id": "aE1LGQBjxEtW" 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "def getOutput(type, data):\n", 158 | " \"\"\"\n", 159 | " Parameters: type: type of data, either img or txt\n", 160 | " Output: Prints the dictionary\n", 161 | " \"\"\"\n", 162 | " textToPredict = \"\"\n", 163 | " # Checking if file type is img or not\n", 164 | " if (type == \"img\"):\n", 165 | " with io.open(data, 'rb') as image_file:\n", 166 | " # Reading file contente\n", 167 | " content = image_file.read()\n", 168 | " # Creating image format to match Vision API format\n", 169 | " image = vision.types.Image(content=content)\n", 170 | " # Getting results from Vision API\n", 171 | " text_response = client.text_detection(image=image)\n", 172 | " # Getting the text from the response\n", 173 | " texts = [text.description for text in text_response.text_annotations]\n", 174 | " # Storing data in variable\n", 175 | " textToPredict = texts[0]\n", 176 | " else:\n", 177 | " # Opening txt file\n", 178 | " f = open(data, \"r\")\n", 179 | " # Storing data in variable\n", 180 | " textToPredict = f.read()\n", 181 | " # Sending textual data to Spacy model for NER\n", 182 | " doc = nlp(textToPredict)\n", 183 | " max_amt = 0\n", 184 | " i = 1\n", 185 | " data = {}\n", 186 | " items_list = []\n", 187 | " # Iterating over every entitiy to create a dictionary\n", 188 | " for ent in doc.ents:\n", 189 | " # Saving only one instance of Total Bill Amount\n", 190 | " if (ent.label_ == \"Total bill amount\"):\n", 191 | " try:\n", 192 | " amt = float(ent.text)\n", 193 | " if amt > max_amt:\n", 194 | " data[\"Total bill amount\"] = amt\n", 195 | " except Exception as e:\n", 196 | " pass\n", 197 | " # Creating a list of Items\n", 198 | " elif (ent.label_ == \"Items\"):\n", 199 | " try:\n", 200 | " items_list.append(ent.text)\n", 201 | " except Exception as e:\n", 202 | " print(e)\n", 203 | " # Checking if the detected key is already present in the key,\n", 204 | " # If yes then we create a new key to store that value instead of overwriting the previous one\n", 205 | " else:\n", 206 | " if ent.label_ in data.keys():\n", 207 | " data[ent.label_+\"-\"+str(i)] = ent.text\n", 208 | " i +=1\n", 209 | " else:\n", 210 | " data[ent.label_] = ent.text\n", 211 | " # Staring the list of items using the Items key in the dictionary\n", 212 | " data[\"Items\"]=items_list\n", 213 | " # Sorting all the elements of the dictionary\n", 214 | " data = dict(sorted(data.items()))\n", 215 | " # Printing final result\n", 216 | " print(json.dumps(data, indent=2))" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 14, 222 | "metadata": { 223 | "colab": { 224 | "base_uri": "https://localhost:8080/", 225 | "height": 289 226 | }, 227 | "colab_type": "code", 228 | "id": "CKx5IwLNxHyf", 229 | "outputId": "f595d45f-3aa8-4e74-cacd-e3eec92dce48" 230 | }, 231 | "outputs": [ 232 | { 233 | "name": "stdout", 234 | "output_type": "stream", 235 | "text": [ 236 | "CPU times: user 3 µs, sys: 0 ns, total: 3 µs\n", 237 | "Wall time: 7.15 µs\n", 238 | "{\n", 239 | " \"Date\": \"29-11-2019\",\n", 240 | " \"Invoice number\": \"201911291623\",\n", 241 | " \"Items\": [\n", 242 | " \"YOU ARE THE GREATE\\nST MUG-FATHER\",\n", 243 | " \"PRINTED PAPER MATT\\nER\\nCRAFT PAPER BAG- H\"\n", 244 | " ],\n", 245 | " \"Store address\": \"PHOENIX MARKETCITY\\n\",\n", 246 | " \"Store address-2\": \"S-23,IIND FLOOR, 142, VELACHERY MAIN ROAD,\\nCHENNAI-600042\",\n", 247 | " \"Store name\": \"ARCHIES\",\n", 248 | " \"Store name-1\": \"ARCHIES\",\n", 249 | " \"Time\": \"16:22\",\n", 250 | " \"Total bill amount\": 434.0\n", 251 | "}\n" 252 | ] 253 | } 254 | ], 255 | "source": [ 256 | "%time\n", 257 | "# Calling the function to get the output\n", 258 | "getOutput(fileType, filename)" 259 | ] 260 | } 261 | ] 262 | } -------------------------------------------------------------------------------- /AIDL_NB_for_Demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "AIDL NB for Demo.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "code", 18 | "metadata": { 19 | "id": "MsnPa2cFwr2Z", 20 | "colab_type": "code", 21 | "colab": {} 22 | }, 23 | "source": [ 24 | "# Uncomment below lines on first run\n", 25 | "# !pip install spacy google-cloud-vision" 26 | ], 27 | "execution_count": 0, 28 | "outputs": [] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "metadata": { 33 | "id": "Cke7hzChzMAz", 34 | "colab_type": "code", 35 | "colab": {} 36 | }, 37 | "source": [ 38 | "# Unzipping model files\n", 39 | "! unzip models.zip" 40 | ], 41 | "execution_count": 0, 42 | "outputs": [] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "metadata": { 47 | "id": "k0KSuevWv4dd", 48 | "colab_type": "code", 49 | "colab": {} 50 | }, 51 | "source": [ 52 | "# Importing Libraries\n", 53 | "import spacy\n", 54 | "import os \n", 55 | "import json\n", 56 | "from google.cloud import vision\n", 57 | "import io\n", 58 | "\n", 59 | "# Setting Environment Variable for Vision API\n", 60 | "os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"]=\"/content/fyp-bot-fkvpth-63ef51dcf510.json\"" 61 | ], 62 | "execution_count": 0, 63 | "outputs": [] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "metadata": { 68 | "id": "LQlvXdE0wW6Y", 69 | "colab_type": "code", 70 | "colab": {} 71 | }, 72 | "source": [ 73 | "# Setting variables\n", 74 | "modelDir = \"models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA\"" 75 | ], 76 | "execution_count": 0, 77 | "outputs": [] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "metadata": { 82 | "id": "Uxy2g9YTw-Mq", 83 | "colab_type": "code", 84 | "colab": {} 85 | }, 86 | "source": [ 87 | "# Initializing vision API\n", 88 | "client = vision.ImageAnnotatorClient()\n", 89 | "\n", 90 | "# Loading the saved Spacy model\n", 91 | "nlp = spacy.load(modelDir)" 92 | ], 93 | "execution_count": 0, 94 | "outputs": [] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "metadata": { 99 | "id": "aE1LGQBjxEtW", 100 | "colab_type": "code", 101 | "colab": {} 102 | }, 103 | "source": [ 104 | "def getOutput(type, data):\n", 105 | " \"\"\"\n", 106 | " Parameters: type: type of data, either img or txt\n", 107 | " Output: Prints the dictionary\n", 108 | " \"\"\"\n", 109 | " textToPredict = \"\"\n", 110 | " # Checking if file type is img or not\n", 111 | " if (type == \"img\"):\n", 112 | " with io.open(data, 'rb') as image_file:\n", 113 | " # Reading file contente\n", 114 | " content = image_file.read()\n", 115 | " # Creating image format to match Vision API format\n", 116 | " image = vision.types.Image(content=content)\n", 117 | " # Getting results from Vision API\n", 118 | " text_response = client.text_detection(image=image)\n", 119 | " # Getting the text from the response\n", 120 | " texts = [text.description for text in text_response.text_annotations]\n", 121 | " # Storing data in variable\n", 122 | " textToPredict = texts[0]\n", 123 | " else:\n", 124 | " # Opening txt file\n", 125 | " f = open(data, \"r\")\n", 126 | " # Storing data in variable\n", 127 | " textToPredict = f.read()\n", 128 | " # Sending textual data to Spacy model for NER\n", 129 | " doc = nlp(textToPredict)\n", 130 | " max_amt = 0\n", 131 | " i = 1\n", 132 | " data = {}\n", 133 | " items_list = []\n", 134 | " # Iterating over every entitiy to create a dictionary\n", 135 | " for ent in doc.ents:\n", 136 | " # Saving only one instance of Total Bill Amount\n", 137 | " if (ent.label_ == \"Total bill amount\"):\n", 138 | " try:\n", 139 | " amt = float(ent.text)\n", 140 | " if amt > max_amt:\n", 141 | " data[\"Total bill amount\"] = amt\n", 142 | " except Exception as e:\n", 143 | " pass\n", 144 | " # Creating a list of Items\n", 145 | " elif (ent.label_ == \"Items\"):\n", 146 | " try:\n", 147 | " items_list.append(ent.text)\n", 148 | " except Exception as e:\n", 149 | " print(e)\n", 150 | " # Checking if the detected key is already present in the key,\n", 151 | " # If yes then we create a new key to store that value instead of overwriting the previous one\n", 152 | " else:\n", 153 | " if ent.label_ in data.keys():\n", 154 | " data[ent.label_+\"-\"+str(i)] = ent.text\n", 155 | " i +=1\n", 156 | " else:\n", 157 | " data[ent.label_] = ent.text\n", 158 | " # Staring the list of items using the Items key in the dictionary\n", 159 | " data[\"Items\"]=items_list\n", 160 | " # Sorting all the elements of the dictionary\n", 161 | " data = dict(sorted(data.items()))\n", 162 | " # Printing final result\n", 163 | " print(json.dumps(data, indent=2))" 164 | ], 165 | "execution_count": 0, 166 | "outputs": [] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "metadata": { 171 | "id": "CKx5IwLNxHyf", 172 | "colab_type": "code", 173 | "outputId": "9987f7eb-1963-4c3b-d564-cd28cb5953c2", 174 | "colab": { 175 | "base_uri": "https://localhost:8080/", 176 | "height": 272 177 | } 178 | }, 179 | "source": [ 180 | "%time\n", 181 | "# Calling the function to get the output\n", 182 | "getOutput(\"img\", \"1.1.png\")" 183 | ], 184 | "execution_count": 50, 185 | "outputs": [ 186 | { 187 | "output_type": "stream", 188 | "text": [ 189 | "CPU times: user 2 µs, sys: 2 µs, total: 4 µs\n", 190 | "Wall time: 7.87 µs\n", 191 | "{\n", 192 | " \"Date\": \"Date:2019-10-28\",\n", 193 | " \"GSTIN\": \"33AAPFP2374MIZR\",\n", 194 | " \"Invoice number\": \"219\",\n", 195 | " \"Items\": [\n", 196 | " \"Tandoori\\nPizzaiolo\",\n", 197 | " \"Kebab Cobb\\nSalad\"\n", 198 | " ],\n", 199 | " \"Store address\": \"Palladium FC 04, No 142,\\nVelachery Main Road\\nChennai-600042\",\n", 200 | " \"Store name\": \"lyfe by soul Garden Bistro\",\n", 201 | " \"Time\": \"21:21:55\",\n", 202 | " \"Total bill amount\": 890.0\n", 203 | "}\n" 204 | ], 205 | "name": "stdout" 206 | } 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "metadata": { 212 | "id": "ls1CnqmMVZsb", 213 | "colab_type": "code", 214 | "colab": { 215 | "base_uri": "https://localhost:8080/", 216 | "height": 476 217 | }, 218 | "outputId": "a2523225-76ed-469f-c8b3-527bbc3be9dc" 219 | }, 220 | "source": [ 221 | "%time\n", 222 | "# Calling the function to get the output\n", 223 | "getOutput(\"img\", \"1.jpg\")" 224 | ], 225 | "execution_count": 51, 226 | "outputs": [ 227 | { 228 | "output_type": "stream", 229 | "text": [ 230 | "CPU times: user 2 µs, sys: 1 µs, total: 3 µs\n", 231 | "Wall time: 4.77 µs\n", 232 | "{\n", 233 | " \"Date\": \"28-Nov-19\",\n", 234 | " \"GSTIN\": \"33AATCG73851125\",\n", 235 | " \"Invoice number\": \"LTN02B1920003774\",\n", 236 | " \"Items\": [\n", 237 | " \"VEG RICE BOWL MEA\",\n", 238 | " \"CLASSIC LEMONADE\",\n", 239 | " \"MILD BASTING\\nGAL\",\n", 240 | " \"VEG RICE BOWL MEA\",\n", 241 | " \"CLASSIC LEMONADE\",\n", 242 | " \"MILD BASTING\\n\",\n", 243 | " \"VEG RICE BOWL MEA\\nGAL\",\n", 244 | " \"CLASSIC LEMONADE\",\n", 245 | " \"MILD BASTING GAL\",\n", 246 | " \"QUARTER CHICKEN M\",\n", 247 | " \"CORN ON THE COB\",\n", 248 | " \"CLASSIC LEMONADE\",\n", 249 | " \"MILD BASTING GAL\"\n", 250 | " ],\n", 251 | " \"Store address\": \"Unit No: UG-41,PMC,0ld Door.No. 66, New\\nDoor No. 142, Velaohery, Channai\",\n", 252 | " \"Store name\": \"Calito's\",\n", 253 | " \"Store name-1\": \"Galito's\",\n", 254 | " \"Time\": \"16:47\",\n", 255 | " \"Total bill amount\": 762.0\n", 256 | "}\n" 257 | ], 258 | "name": "stdout" 259 | } 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "metadata": { 265 | "id": "MBHe7reoEItk", 266 | "colab_type": "code", 267 | "colab": { 268 | "base_uri": "https://localhost:8080/", 269 | "height": 221 270 | }, 271 | "outputId": "9f77b84d-c491-4e1c-fe0a-0957bd0d9978" 272 | }, 273 | "source": [ 274 | "%time\n", 275 | "# Calling the function to get the output\n", 276 | "getOutput(\"img\", \"2.jpg\")" 277 | ], 278 | "execution_count": 52, 279 | "outputs": [ 280 | { 281 | "output_type": "stream", 282 | "text": [ 283 | "CPU times: user 2 µs, sys: 2 µs, total: 4 µs\n", 284 | "Wall time: 7.15 µs\n", 285 | "{\n", 286 | " \"Date\": \"Date:2019-11-28\",\n", 287 | " \"Invoice number\": \"1925\",\n", 288 | " \"Items\": [\n", 289 | " \"\\u0421\\u041d\\u041e\\u0421OLAT\\u0415 \\u041e\\nVERLOAD\"\n", 290 | " ],\n", 291 | " \"Store name\": \"Belgian Waffle\",\n", 292 | " \"Time\": \"18:41:46\",\n", 293 | " \"Total bill amount\": 140.0\n", 294 | "}\n" 295 | ], 296 | "name": "stdout" 297 | } 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "metadata": { 303 | "id": "pYcOXy-pEIyQ", 304 | "colab_type": "code", 305 | "colab": { 306 | "base_uri": "https://localhost:8080/", 307 | "height": 255 308 | }, 309 | "outputId": "3cd0a5a2-7c71-4040-bbe5-54743b95ec15" 310 | }, 311 | "source": [ 312 | "%time\n", 313 | "# Calling the function to get the output\n", 314 | "getOutput(\"img\", \"3.jpg\")" 315 | ], 316 | "execution_count": 53, 317 | "outputs": [ 318 | { 319 | "output_type": "stream", 320 | "text": [ 321 | "CPU times: user 3 µs, sys: 0 ns, total: 3 µs\n", 322 | "Wall time: 6.91 µs\n", 323 | "{\n", 324 | " \"GSTIN\": \"33AAECT2235P 1Z6\",\n", 325 | " \"Invoice number\": \"PM7332\",\n", 326 | " \"Items\": [\n", 327 | " \"H/S TOP XXL\",\n", 328 | " \"TOP H/S B XXL\",\n", 329 | " \"PAPER BAG M\"\n", 330 | " ],\n", 331 | " \"Store address\": \"142, VELACHERY ,MAIN ROAD,\\nPHOENIX MALL, SHOP NO.S-17 VELACHERY\\nVELACHERY\\nCHENNAI\",\n", 332 | " \"Time\": \"07:51 PM\",\n", 333 | " \"Total bill amount\": 269.0\n", 334 | "}\n" 335 | ], 336 | "name": "stdout" 337 | } 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "metadata": { 343 | "id": "UbJDFZUlERys", 344 | "colab_type": "code", 345 | "colab": { 346 | "base_uri": "https://localhost:8080/", 347 | "height": 306 348 | }, 349 | "outputId": "a55c9f31-56dd-4fb3-8fef-8eab337a644c" 350 | }, 351 | "source": [ 352 | "%time\n", 353 | "# Calling the function to get the output\n", 354 | "getOutput(\"txt\", \"sample.txt\")" 355 | ], 356 | "execution_count": 54, 357 | "outputs": [ 358 | { 359 | "output_type": "stream", 360 | "text": [ 361 | "CPU times: user 2 µs, sys: 1 µs, total: 3 µs\n", 362 | "Wall time: 5.48 µs\n", 363 | "{\n", 364 | " \"Date\": \"29-11-2019\",\n", 365 | " \"GSTIN\": \"33AAECA0726C1ZG\",\n", 366 | " \"Invoice number\": \"201911291623\",\n", 367 | " \"Items\": [\n", 368 | " \"YOU ARE THE GREATE 1\",\n", 369 | " \"ST MUG-FATHER\\nPRINTED PAPER MATT 1 35.00\",\n", 370 | " \"CRAFT PAPER BAG- H 1 12.00\"\n", 371 | " ],\n", 372 | " \"Store address\": \"PHOENIX MARKETCITY\\nS-23,IIND FLOOR, 142, VELACHERY MAIN ROAD,\\nCHENNAI-600042\",\n", 373 | " \"Store name\": \"ARCHIES\",\n", 374 | " \"Store name-1\": \"ARCHIES\",\n", 375 | " \"Time\": \"16:22\",\n", 376 | " \"Total bill amount\": 434.0\n", 377 | "}\n" 378 | ], 379 | "name": "stdout" 380 | } 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "metadata": { 386 | "id": "ewE8T-KDEdsI", 387 | "colab_type": "code", 388 | "colab": { 389 | "base_uri": "https://localhost:8080/", 390 | "height": 238 391 | }, 392 | "outputId": "43c851e8-ff03-40e5-b865-b2b9986662f5" 393 | }, 394 | "source": [ 395 | "%time\n", 396 | "# Calling the function to get the output\n", 397 | "getOutput(\"img\", \"sea-lands.jpeg\")" 398 | ], 399 | "execution_count": 56, 400 | "outputs": [ 401 | { 402 | "output_type": "stream", 403 | "text": [ 404 | "CPU times: user 3 µs, sys: 0 ns, total: 3 µs\n", 405 | "Wall time: 6.2 µs\n", 406 | "{\n", 407 | " \"GSTIN\": \"27AAIPS4809H1ZE\",\n", 408 | " \"Items\": [\n", 409 | " \"VEG TRIPLE SEZ FRIED\",\n", 410 | " \"RICE\\nCOLD DRINK(500ML)\",\n", 411 | " \"COLD DRINK\"\n", 412 | " ],\n", 413 | " \"Store name\": \"SEA LAND\",\n", 414 | " \"Time\": \"03:30 PM\",\n", 415 | " \"Total bill amount\": 262.5\n", 416 | "}\n" 417 | ], 418 | "name": "stdout" 419 | } 420 | ] 421 | } 422 | ] 423 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Invoice-Text-Extraction 2 | File and Folder Information 3 | 4 | # run.py 5 | Python script that give back all the recognized information from an image or invoice. 6 | 7 | Syntax: python run.py --modelDir "models/FOLDER" --fileType "img (for image file) or txt(for text file)" --file "path of img or text file" 8 | example: python run.py --modelDir models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA --fileType img --file sample.jpg 9 | python run.py --modelDir models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA --fileType txt --file sample.txt 10 | 11 | # model 12 | Folder that contains all the Spacy Models 13 | 14 | AIDL_NER_DO-0.30_EP-20_80_PERC_DATA : Dropout-0.3, Epochs-20, trained on 80% of the Data 15 | AIDL_NER_DO-0.30_EP-20_90_PERC_DATA : Dropout-0.3, Epochs-20, trained on 90% of the Data 16 | AIDL_NER_DO-0.30_EP-20_100_PERC_DATA : Dropout-0.3, Epochs-20, trained on 100% of the Data 17 | 18 | # requirements.txt 19 | File that has all the dependencies needed in order to run this program 20 | 21 | # Installation 22 | virtualenv --python "path to Python3 64bit" aidl2020-team-ace 23 | cd aidl2020-team-ace 24 | Scripts\activate 25 | pip install -r requirements.txt 26 | python run.py --modelDir models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA --fileType img --file sample.jpg 27 | 28 | #### Output #### 29 | { 30 | "Date": "29-11-2019", 31 | "Invoice number": "201911291623", 32 | "Items": [ 33 | "YOU ARE THE GREATE\nST MUG-FATHER", 34 | "PRINTED PAPER MATT\nER\nCRAFT PAPER BAG- H" 35 | ], 36 | "Store address": "PHOENIX MARKETCITY\n", 37 | "Store address-2": "S-23,IIND FLOOR, 142, VELACHERY MAIN ROAD,\nCHENNAI-600042", 38 | "Store name": "ARCHIES", 39 | "Store name-1": "ARCHIES", 40 | "Time": "16:22", 41 | "Total bill amount": 434.0 42 | } 43 | -------------------------------------------------------------------------------- /models.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models.zip -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/meta.json: -------------------------------------------------------------------------------- 1 | {"lang":"en","name":"model","version":"0.0.0","spacy_version":">=2.1.9","description":"","author":"","email":"","url":"","license":"","vectors":{"width":0,"vectors":0,"keys":0,"name":"spacy_pretrained_vectors"},"pipeline":["ner"]} -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/cfg: -------------------------------------------------------------------------------- 1 | { 2 | "beam_width":1, 3 | "beam_density":0.0, 4 | "beam_update_prob":1.0, 5 | "cnn_maxout_pieces":3, 6 | "nr_class":33, 7 | "hidden_depth":1, 8 | "token_vector_width":96, 9 | "hidden_width":64, 10 | "maxout_pieces":2, 11 | "pretrained_vectors":null, 12 | "bilstm_depth":0 13 | } -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/model -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/moves: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/ner/moves -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/tokenizer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/tokenizer -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/key2row: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/key2row -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/lexemes.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/lexemes.bin -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/vectors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_100_PERC_DATA/vocab/vectors -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/meta.json: -------------------------------------------------------------------------------- 1 | {"lang":"en","name":"model","version":"0.0.0","spacy_version":">=2.1.9","description":"","author":"","email":"","url":"","license":"","vectors":{"width":0,"vectors":0,"keys":0,"name":"spacy_pretrained_vectors"},"pipeline":["ner"]} -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/ner/cfg: -------------------------------------------------------------------------------- 1 | { 2 | "beam_width":1, 3 | "beam_density":0.0, 4 | "beam_update_prob":1.0, 5 | "cnn_maxout_pieces":3, 6 | "nr_class":33, 7 | "hidden_depth":1, 8 | "token_vector_width":96, 9 | "hidden_width":64, 10 | "maxout_pieces":2, 11 | "pretrained_vectors":null, 12 | "bilstm_depth":0 13 | } -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/ner/model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/ner/model -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/ner/moves: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/ner/moves -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/tokenizer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/tokenizer -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/vocab/key2row: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/vocab/key2row -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/vocab/lexemes.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/vocab/lexemes.bin -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/vocab/vectors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_80_PERC_DATA/vocab/vectors -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/meta.json: -------------------------------------------------------------------------------- 1 | {"lang":"en","name":"model","version":"0.0.0","spacy_version":">=2.1.9","description":"","author":"","email":"","url":"","license":"","vectors":{"width":0,"vectors":0,"keys":0,"name":"spacy_pretrained_vectors"},"pipeline":["ner"]} -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/ner/cfg: -------------------------------------------------------------------------------- 1 | { 2 | "beam_width":1, 3 | "beam_density":0.0, 4 | "beam_update_prob":1.0, 5 | "cnn_maxout_pieces":3, 6 | "nr_class":33, 7 | "hidden_depth":1, 8 | "token_vector_width":96, 9 | "hidden_width":64, 10 | "maxout_pieces":2, 11 | "pretrained_vectors":null, 12 | "bilstm_depth":0 13 | } -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/ner/model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/ner/model -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/ner/moves: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/ner/moves -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/tokenizer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/tokenizer -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/vocab/key2row: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/vocab/key2row -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/vocab/lexemes.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/vocab/lexemes.bin -------------------------------------------------------------------------------- /models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/vocab/vectors: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA/vocab/vectors -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | blis==0.4.1 2 | cachetools==4.0.0 3 | catalogue==1.0.0 4 | certifi==2019.11.28 5 | chardet==3.0.4 6 | cymem==2.0.3 7 | google-api-core==1.16.0 8 | google-auth==1.11.2 9 | google-cloud-vision==1.0.0 10 | googleapis-common-protos==1.51.0 11 | grpcio==1.27.2 12 | idna==2.9 13 | murmurhash==1.0.2 14 | numpy==1.18.1 15 | plac==1.1.3 16 | preshed==3.0.2 17 | protobuf==3.11.3 18 | pyasn1==0.4.8 19 | pyasn1-modules==0.2.8 20 | pytz==2019.3 21 | requests==2.23.0 22 | rsa==4.0 23 | six==1.14.0 24 | spacy==2.2.3 25 | srsly==1.0.2 26 | thinc==7.3.1 27 | tqdm==4.43.0 28 | urllib3==1.25.8 29 | wasabi==0.6.0 30 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | import os 3 | import json 4 | from google.cloud import vision 5 | import io 6 | import argparse 7 | 8 | os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="fyp-bot-fkvpth-63ef51dcf510.json" 9 | 10 | # Adding an argument parser 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--modelDir", help="Path to model files", default="models/AIDL_NER_DO-0.30_EP-20_90_PERC_DATA") 13 | parser.add_argument("--fileType", help="type of file, img or txt", default="img") 14 | parser.add_argument("--file", help="path of img or text file", default="sample.jpg") 15 | args = parser.parse_args() 16 | 17 | # Initializing vision API 18 | client = vision.ImageAnnotatorClient() 19 | 20 | # Loading the saved Spacy model 21 | nlp = spacy.load(args.modelDir) 22 | 23 | def getOutput(type, data): 24 | """ 25 | Parameters: type: type of data, either img or txt 26 | Output: Prints the dictionary 27 | """ 28 | textToPredict = "" 29 | # Checking if file type is img or not 30 | if (type == "img"): 31 | with io.open(data, 'rb') as image_file: 32 | # Reading file contente 33 | content = image_file.read() 34 | # Creating image format to match Vision API format 35 | image = vision.types.Image(content=content) 36 | # Getting results from Vision API 37 | text_response = client.text_detection(image=image) 38 | # Getting the text from the response 39 | texts = [text.description for text in text_response.text_annotations] 40 | # Storing data in variable 41 | textToPredict = texts[0] 42 | else: 43 | # Opening txt file 44 | f = open(data, "r") 45 | # Storing data in variable 46 | textToPredict = f.read() 47 | # Sending textual data to Spacy model for NER 48 | doc = nlp(textToPredict) 49 | max_amt = 0 50 | i = 1 51 | data = {} 52 | items_list = [] 53 | # Iterating over every entitiy to create a dictionary 54 | for ent in doc.ents: 55 | # Saving only one instance of Total Bill Amount 56 | if (ent.label_ == "Total bill amount"): 57 | try: 58 | amt = float(ent.text) 59 | if amt > max_amt: 60 | data["Total bill amount"] = amt 61 | except Exception as e: 62 | pass 63 | # Creating a list of Items 64 | elif (ent.label_ == "Items"): 65 | try: 66 | items_list.append(ent.text) 67 | except Exception as e: 68 | print(e) 69 | # Checking if the detected key is already present in the key, 70 | # If yes then we create a new key to store that value instead of overwriting the previous one 71 | else: 72 | if ent.label_ in data.keys(): 73 | data[ent.label_+"-"+str(i)] = ent.text 74 | i +=1 75 | else: 76 | data[ent.label_] = ent.text 77 | # Staring the list of items using the Items key in the dictionary 78 | data["Items"]=items_list 79 | # Sorting all the elements of the dictionary 80 | data = dict(sorted(data.items())) 81 | # Printing final result 82 | print(json.dumps(data, indent=2)) 83 | 84 | # Giving a call to the main function in order to extract information 85 | getOutput(args.fileType, args.file) -------------------------------------------------------------------------------- /sample.txt: -------------------------------------------------------------------------------- 1 | 2 | ARCHIES LIMITED 3 | ARCHIES LTD PHOENIX MARKETCITY 4 | S-23,IIND FLOOR, 142, VELACHERY MAIN ROAD, 5 | CHENNAI-600042 6 | PH:72990 36438 7 | GSTIN NO: 33AAECA0726C1ZG 8 | REGD.OFFICE:PLOTNO 191-F,SECTOR-4,IMT 9 | MANESAR, GURUGRAM, HARYANA. 10 | H.E.F.23-08-2010 11 | CIN NO: L36999HR 1990PLCO41175 12 | EMAIL ID: archiesđarchiesonl ine.com 13 | WEBSITE: wwW.archiesonl ine.com 14 | TAX INVOICE 15 | "*** * 16 | *-*- 17 | No.PMC19010926 18 | Date :29-11-2019 19 | Time :16:22 20 | HSN Description ty Rate Amount 21 | CODE 22 | RsP 23 | Rs_P 24 | 399.00 25 | 35.00 26 | 12.00 27 | YOU ARE THE GREATE 1 399.00 28 | ST MUG-FATHER 29 | PRINTED PAPER MATT 1 35.00 30 | R 31 | CRAFT PAPER BAG- H 1 12.00 32 | ORIZONTAL 33 | Total 34 | 446.00 35 | DISCOUNT 36 | ROUNDING OFF 37 | 11.99 38 | -0.01 39 | Net Value 40 | 434.00 41 | TAXABLE AMT RATE 42 | CGST 43 | SGST 44 | HSN 45 | 31.24 12 46 | 4802 47 | 4819 48 | 6912 49 | .88 50 | 0.00 51 | 21.38 52 | 88 53 | 0.00 54 | 21.38 55 | 1 56 | 0.01 12% 57 | 356.24 12% 58 | 23.26 59 | TOTAL 60 | 23.26 61 | 387.49 62 | Rupees Four Hundred Thirty Four Only 63 | Cash-Rs.434.00 64 | SUBJECT T0 CHENNAI JURISDICTION 65 | TIN NO: 33280461027 66 | CST NO: 637381 DT 11-10-20000 67 | B111 ID: 201911291623 68 | Cash Received: 504.00 69 | Bi11 70 | Balance Retd: 70.00 71 | Amount: 434.00 72 | . . . -------------------------------------------------------------------------------- /sea-lands.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sameer-m-dev/Invoice-Text-Extraction/e5270e1b70ec52c98fe5537eeea2a550a6f65112/sea-lands.jpeg --------------------------------------------------------------------------------