├── layoutlmv3 ├── class_list.txt ├── 06628fa4-invoice_1.jpg ├── 42ab3e95-invoice_2.jpg ├── 77beb7a5-invoice_10.jpg ├── 77beb7a5-invoice_12.jpg ├── 77beb7a5-invoice_3.jpg ├── 77beb7a5-invoice_6.jpg ├── 77beb7a5-invoice_8.jpg ├── c37c4871-invoice_11.jpg ├── c37c4871-invoice_4.jpg ├── c37c4871-invoice_5.jpg ├── c37c4871-invoice_7.jpg ├── c37c4871-invoice_9.jpg ├── test.txt └── train.txt ├── README.md ├── requirements.txt ├── LICENSE └── layoutlmv3.py /layoutlmv3/class_list.txt: -------------------------------------------------------------------------------- 1 | invoice_no, date, amount -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LayoutLMV3 2 | This repo consists of the code as discussed in the Medium blog. 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | label-studio==1.6.0 2 | pytesseract==0.3.10 3 | jupyterlab 4 | shapely 5 | scikit-learn -------------------------------------------------------------------------------- /layoutlmv3/06628fa4-invoice_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/06628fa4-invoice_1.jpg -------------------------------------------------------------------------------- /layoutlmv3/42ab3e95-invoice_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/42ab3e95-invoice_2.jpg -------------------------------------------------------------------------------- /layoutlmv3/77beb7a5-invoice_10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/77beb7a5-invoice_10.jpg -------------------------------------------------------------------------------- /layoutlmv3/77beb7a5-invoice_12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/77beb7a5-invoice_12.jpg -------------------------------------------------------------------------------- /layoutlmv3/77beb7a5-invoice_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/77beb7a5-invoice_3.jpg -------------------------------------------------------------------------------- /layoutlmv3/77beb7a5-invoice_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/77beb7a5-invoice_6.jpg -------------------------------------------------------------------------------- /layoutlmv3/77beb7a5-invoice_8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/77beb7a5-invoice_8.jpg -------------------------------------------------------------------------------- /layoutlmv3/c37c4871-invoice_11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/c37c4871-invoice_11.jpg -------------------------------------------------------------------------------- /layoutlmv3/c37c4871-invoice_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/c37c4871-invoice_4.jpg -------------------------------------------------------------------------------- /layoutlmv3/c37c4871-invoice_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/c37c4871-invoice_5.jpg -------------------------------------------------------------------------------- /layoutlmv3/c37c4871-invoice_7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/c37c4871-invoice_7.jpg -------------------------------------------------------------------------------- /layoutlmv3/c37c4871-invoice_9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shivarama23/LayoutLMV3/HEAD/layoutlmv3/c37c4871-invoice_9.jpg -------------------------------------------------------------------------------- /layoutlmv3/test.txt: -------------------------------------------------------------------------------- 1 | {'id': 3, 'file_name': '06628fa4-invoice_1.jpg', 'tokens': ['3022-9461]', '4/13/95]', '4/20/99]', '4/17/95', '4/17/95]', 'tia,__047503.'], 'bboxes': [[16, 276, 92, 286], [112, 275, 171, 288], [190, 274, 249, 288], [258, 275, 316, 288], [335, 264, 401, 291], [612, 64, 697, 79]], 'ner_tags': [0, 1, 1, 1, 1, 0]} 2 | {'id': 2, 'file_name': '42ab3e95-invoice_2.jpg', 'tokens': ['D-5000', 'KS1n', '90', '23', 'Sep.91', 'TOTAL:'], 'bboxes': [[81, 211, 131, 221], [142, 211, 175, 221], [186, 211, 201, 221], [584, 270, 597, 280], [608, 270, 657, 283], [495, 515, 544, 524]], 'ner_tags': [0, 0, 0, 1, 1, 2]} 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 shivarama23 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /layoutlmv3/train.txt: -------------------------------------------------------------------------------- 1 | {'id': 0, 'file_name': 'c37c4871-invoice_4.jpg', 'tokens': ['socio', 'INVOICE', 'NO”', '19639', 'e7/ie/ay'], 'bboxes': [[507, 480, 558, 489], [466, 198, 523, 207], [533, 198, 549, 207], [576, 197, 617, 207], [549, 211, 617, 222]], 'ner_tags': [2, 0, 0, 0, 1]} 2 | {'id': 1, 'file_name': '77beb7a5-invoice_3.jpg', 'tokens': ['November', '6,', '1981', '#3124', '$2,700.00'], 'bboxes': [[119, 223, 181, 234], [189, 223, 202, 236], [213, 223, 242, 234], [284, 407, 322, 419], [541, 526, 613, 542]], 'ner_tags': [1, 1, 1, 0, 2]} 3 | {'id': 0, 'file_name': 'c37c4871-invoice_5.jpg', 'tokens': ['socio', 'INVOICE', 'NO”', '19639', 'e7/ie/ay'], 'bboxes': [[507, 480, 558, 489], [466, 198, 523, 207], [533, 198, 549, 207], [576, 197, 617, 207], [549, 211, 617, 222]], 'ner_tags': [2, 0, 0, 0, 1]} 4 | {'id': 1, 'file_name': '77beb7a5-invoice_6.jpg', 'tokens': ['November', '6,', '1981', '#3124', '$2,700.00'], 'bboxes': [[119, 223, 181, 234], [189, 223, 202, 236], [213, 223, 242, 234], [284, 407, 322, 419], [541, 526, 613, 542]], 'ner_tags': [1, 1, 1, 0, 2]} 5 | {'id': 0, 'file_name': 'c37c4871-invoice_7.jpg', 'tokens': ['socio', 'INVOICE', 'NO”', '19639', 'e7/ie/ay'], 'bboxes': [[507, 480, 558, 489], [466, 198, 523, 207], [533, 198, 549, 207], [576, 197, 617, 207], [549, 211, 617, 222]], 'ner_tags': [2, 0, 0, 0, 1]} 6 | {'id': 1, 'file_name': '77beb7a5-invoice_8.jpg', 'tokens': ['November', '6,', '1981', '#3124', '$2,700.00'], 'bboxes': [[119, 223, 181, 234], [189, 223, 202, 236], [213, 223, 242, 234], [284, 407, 322, 419], [541, 526, 613, 542]], 'ner_tags': [1, 1, 1, 0, 2]} 7 | {'id': 0, 'file_name': 'c37c4871-invoice_9.jpg', 'tokens': ['socio', 'INVOICE', 'NO”', '19639', 'e7/ie/ay'], 'bboxes': [[507, 480, 558, 489], [466, 198, 523, 207], [533, 198, 549, 207], [576, 197, 617, 207], [549, 211, 617, 222]], 'ner_tags': [2, 0, 0, 0, 1]} 8 | {'id': 1, 'file_name': '77beb7a5-invoice_10.jpg', 'tokens': ['November', '6,', '1981', '#3124', '$2,700.00'], 'bboxes': [[119, 223, 181, 234], [189, 223, 202, 236], [213, 223, 242, 234], [284, 407, 322, 419], [541, 526, 613, 542]], 'ner_tags': [1, 1, 1, 0, 2]} 9 | {'id': 0, 'file_name': 'c37c4871-invoice_11.jpg', 'tokens': ['socio', 'INVOICE', 'NO”', '19639', 'e7/ie/ay'], 'bboxes': [[507, 480, 558, 489], [466, 198, 523, 207], [533, 198, 549, 207], [576, 197, 617, 207], [549, 211, 617, 222]], 'ner_tags': [2, 0, 0, 0, 1]} 10 | {'id': 1, 'file_name': '77beb7a5-invoice_12.jpg', 'tokens': ['November', '6,', '1981', '#3124', '$2,700.00'], 'bboxes': [[119, 223, 181, 234], [189, 223, 202, 236], [213, 223, 242, 234], [284, 407, 322, 419], [541, 526, 613, 542]], 'ner_tags': [1, 1, 1, 0, 2]} 11 | -------------------------------------------------------------------------------- /layoutlmv3.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import ast 4 | from pathlib import Path 5 | import datasets 6 | from PIL import Image 7 | import pandas as pd 8 | 9 | logger = datasets.logging.get_logger(__name__) 10 | _CITATION = """\ 11 | @article{, 12 | title={}, 13 | author={}, 14 | journal={}, 15 | year={}, 16 | volume={} 17 | } 18 | """ 19 | _DESCRIPTION = """\ 20 | This is a sample dataset for training layoutlmv3 model on custom annotated data. 21 | """ 22 | 23 | def load_image(image_path): 24 | image = Image.open(image_path).convert("RGB") 25 | w, h = image.size 26 | return image, (w,h) 27 | 28 | def normalize_bbox(bbox, size): 29 | return [ 30 | int(1000 * bbox[0] / size[0]), 31 | int(1000 * bbox[1] / size[1]), 32 | int(1000 * bbox[2] / size[0]), 33 | int(1000 * bbox[3] / size[1]), 34 | ] 35 | 36 | 37 | _URLS = [] 38 | data_path = r'./' 39 | 40 | class DatasetConfig(datasets.BuilderConfig): 41 | """BuilderConfig for InvoiceExtraction Dataset""" 42 | def __init__(self, **kwargs): 43 | """BuilderConfig for InvoiceExtraction Dataset. 44 | Args: 45 | **kwargs: keyword arguments forwarded to super. 46 | """ 47 | super(DatasetConfig, self).__init__(**kwargs) 48 | 49 | 50 | class InvoiceExtraction(datasets.GeneratorBasedBuilder): 51 | BUILDER_CONFIGS = [ 52 | DatasetConfig(name="InvoiceExtraction", version=datasets.Version("1.0.0"), description="InvoiceExtraction dataset"), 53 | ] 54 | 55 | def _info(self): 56 | return datasets.DatasetInfo( 57 | description=_DESCRIPTION, 58 | features=datasets.Features( 59 | { 60 | "id": datasets.Value("string"), 61 | "tokens": datasets.Sequence(datasets.Value("string")), 62 | "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))), 63 | "ner_tags": datasets.Sequence( 64 | datasets.features.ClassLabel( 65 | names = ['invoice_no', 'date', 'amount'] 66 | ) 67 | ), 68 | "image_path": datasets.Value("string"), 69 | "image": datasets.features.Image() 70 | } 71 | ), 72 | supervised_keys=None, 73 | citation=_CITATION, 74 | homepage="", 75 | ) 76 | 77 | 78 | 79 | 80 | def _split_generators(self, dl_manager): 81 | """Returns SplitGenerators.""" 82 | """Uses local files located with data_dir""" 83 | dest = os.path.join(data_path, 'layoutlmv3') 84 | 85 | return [ 86 | datasets.SplitGenerator( 87 | name=datasets.Split.TRAIN, gen_kwargs={"filepath": os.path.join(dest, "train.txt"), "dest": dest} 88 | ), 89 | datasets.SplitGenerator( 90 | name=datasets.Split.TEST, gen_kwargs={"filepath": os.path.join(dest, "test.txt"), "dest": dest} 91 | ), 92 | ] 93 | 94 | def _generate_examples(self, filepath, dest): 95 | 96 | df = pd.read_csv(os.path.join(dest, 'class_list.txt'), delimiter='\s', header=None) 97 | id2labels = dict(zip(df[0].tolist(), df[1].tolist())) 98 | 99 | 100 | logger.info("⏳ Generating examples from = %s", filepath) 101 | 102 | item_list = [] 103 | with open(filepath, 'r', encoding='utf-8') as f: 104 | for line in f: 105 | item_list.append(line.rstrip('\n\r')) 106 | print(item_list) 107 | for guid, fname in enumerate(item_list): 108 | print(fname) 109 | data = ast.literal_eval(fname) 110 | image_path = os.path.join(dest, data['file_name']) 111 | image, size = load_image(image_path) 112 | boxes = data['bboxes'] 113 | 114 | text = data['tokens'] 115 | label = data['ner_tags'] 116 | 117 | #print(boxes) 118 | #for i in boxes: 119 | # print(i) 120 | boxes = [normalize_bbox(box, size) for box in boxes] 121 | flag=0 122 | #print(image_path) 123 | for i in boxes: 124 | #print(i) 125 | for j in i: 126 | if j>1000: 127 | flag+=1 128 | #print(j) 129 | pass 130 | if flag>0: print(image_path) 131 | 132 | yield guid, {"id": str(guid), "tokens": text, "bboxes": boxes, "ner_tags": label, "image_path": image_path, "image": image} --------------------------------------------------------------------------------