├── .gitattributes ├── .gitignore ├── README.md ├── SimSong.ttc ├── Using_Table_Transformer_for_table_detection_and_table_structure_recognition.ipynb ├── data ├── download_data.sh ├── merge_all.ipynb ├── process_icdar2013.py ├── voc2coco_detection.ipynb ├── voc2coco_structure.ipynb ├── voc2coco_structure_fintabnet.ipynb └── voc2coco_structure_icdar2013.ipynb ├── postprocess.py ├── table_structure_recognition.ipynb ├── yolov5 ├── .DS_Store ├── data │ ├── .DS_Store │ ├── custom-detection.yaml │ └── custom-structure.yaml ├── runs │ ├── .DS_Store │ ├── train │ │ ├── .DS_Store │ │ ├── yolov5s-custom-detection-800 │ │ │ ├── F1_curve.png │ │ │ ├── PR_curve.png │ │ │ ├── P_curve.png │ │ │ ├── R_curve.png │ │ │ ├── confusion_matrix.png │ │ │ ├── events.out.tfevents.1708692027.ip-172-16-65-49.ec2.internal.21611.0 │ │ │ ├── hyp.yaml │ │ │ ├── labels.jpg │ │ │ ├── labels_correlogram.jpg │ │ │ ├── opt.yaml │ │ │ ├── results.csv │ │ │ ├── results.png │ │ │ ├── train_batch0.jpg │ │ │ ├── train_batch1.jpg │ │ │ ├── train_batch2.jpg │ │ │ ├── val_batch0_labels.jpg │ │ │ ├── val_batch0_pred.jpg │ │ │ ├── val_batch1_labels.jpg │ │ │ ├── val_batch1_pred.jpg │ │ │ ├── val_batch2_labels.jpg │ │ │ ├── val_batch2_pred.jpg │ │ │ └── weights │ │ │ │ ├── best.pt │ │ │ │ └── last.pt │ │ ├── yolov5s-custom-detection │ │ │ ├── .DS_Store │ │ │ ├── F1_curve.png │ │ │ ├── PR_curve.png │ │ │ ├── P_curve.png │ │ │ ├── R_curve.png │ │ │ ├── confusion_matrix.png │ │ │ ├── events.out.tfevents.1663140451.ip-172-16-93-185.ec2.internal.12050.0 │ │ │ ├── hyp.yaml │ │ │ ├── labels.jpg │ │ │ ├── labels_correlogram.jpg │ │ │ ├── opt.yaml │ │ │ ├── results.csv │ │ │ ├── results.png │ │ │ ├── train_batch0.jpg │ │ │ ├── train_batch1.jpg │ │ │ ├── train_batch2.jpg │ │ │ ├── val_batch0_labels.jpg │ │ │ ├── val_batch0_pred.jpg │ │ │ ├── val_batch1_labels.jpg │ │ │ ├── val_batch1_pred.jpg │ │ │ ├── val_batch2_labels.jpg │ │ │ ├── val_batch2_pred.jpg │ │ │ └── weights │ │ │ │ ├── .DS_Store │ │ │ │ └── best.pt │ │ └── yolov5s-custom-structure │ │ │ ├── .DS_Store │ │ │ ├── F1_curve.png │ │ │ ├── PR_curve.png │ │ │ ├── P_curve.png │ │ │ ├── R_curve.png │ │ │ ├── confusion_matrix.png │ │ │ ├── events.out.tfevents.1663282037.ip-172-16-93-185.ec2.internal.17535.0 │ │ │ ├── hyp.yaml │ │ │ ├── labels.jpg │ │ │ ├── labels_correlogram.jpg │ │ │ ├── opt.yaml │ │ │ ├── results.csv │ │ │ ├── results.png │ │ │ ├── train_batch0.jpg │ │ │ ├── train_batch1.jpg │ │ │ ├── train_batch2.jpg │ │ │ ├── val_batch0_labels.jpg │ │ │ ├── val_batch0_pred.jpg │ │ │ ├── val_batch1_labels.jpg │ │ │ ├── val_batch1_pred.jpg │ │ │ ├── val_batch2_labels.jpg │ │ │ ├── val_batch2_pred.jpg │ │ │ └── weights │ │ │ ├── .DS_Store │ │ │ └── best.pt │ └── val │ │ ├── .DS_Store │ │ ├── yolov5s-custom-detection-800 │ │ ├── F1_curve.png │ │ ├── PR_curve.png │ │ ├── P_curve.png │ │ ├── R_curve.png │ │ ├── confusion_matrix.png │ │ ├── val_batch0_labels.jpg │ │ ├── val_batch0_pred.jpg │ │ ├── val_batch1_labels.jpg │ │ ├── val_batch1_pred.jpg │ │ ├── val_batch2_labels.jpg │ │ └── val_batch2_pred.jpg │ │ ├── yolov5s-custom-detection │ │ ├── F1_curve.png │ │ ├── PR_curve.png │ │ ├── P_curve.png │ │ ├── R_curve.png │ │ ├── confusion_matrix.png │ │ ├── val_batch0_labels.jpg │ │ ├── val_batch0_pred.jpg │ │ ├── val_batch1_labels.jpg │ │ ├── val_batch1_pred.jpg │ │ ├── val_batch2_labels.jpg │ │ └── val_batch2_pred.jpg │ │ └── yolov5s-custom-structure │ │ ├── F1_curve.png │ │ ├── PR_curve.png │ │ ├── P_curve.png │ │ ├── R_curve.png │ │ ├── confusion_matrix.png │ │ ├── val_batch0_labels.jpg │ │ ├── val_batch0_pred.jpg │ │ ├── val_batch1_labels.jpg │ │ ├── val_batch1_pred.jpg │ │ ├── val_batch2_labels.jpg │ │ └── val_batch2_pred.jpg ├── train_PubTables-1M_detection.sh └── train_PubTables-1M_structure.sh ├── yolov8 ├── data │ ├── .DS_Store │ ├── custom-detection.yaml │ ├── custom-structure-all.yaml │ ├── custom-structure-fintabnet.yaml │ ├── custom-structure-icdar2013.yaml │ └── custom-structure.yaml ├── evaluate_ICDAR-2013.c_structure.sh ├── runs │ └── detect │ │ ├── yolov8s-custom-detection-val │ │ ├── F1_curve.png │ │ ├── PR_curve.png │ │ ├── P_curve.png │ │ ├── R_curve.png │ │ ├── confusion_matrix.png │ │ ├── confusion_matrix_normalized.png │ │ ├── val_batch0_labels.jpg │ │ └── val_batch0_pred.jpg │ │ ├── yolov8s-custom-detection │ │ ├── F1_curve.png │ │ ├── PR_curve.png │ │ ├── P_curve.png │ │ ├── R_curve.png │ │ ├── args.yaml │ │ ├── confusion_matrix.png │ │ ├── confusion_matrix_normalized.png │ │ ├── labels.jpg │ │ ├── labels_correlogram.jpg │ │ ├── results.csv │ │ ├── results.png │ │ ├── train_batch0.jpg │ │ ├── train_batch1.jpg │ │ ├── train_batch2.jpg │ │ ├── val_batch0_labels.jpg │ │ ├── val_batch0_pred.jpg │ │ ├── val_batch1_labels.jpg │ │ ├── val_batch1_pred.jpg │ │ ├── val_batch2_labels.jpg │ │ ├── val_batch2_pred.jpg │ │ └── weights │ │ │ ├── best.pt │ │ │ └── last.pt │ │ ├── yolov8s-custom-structure-all-icdar2013-val │ │ ├── F1_curve.png │ │ ├── PR_curve.png │ │ ├── P_curve.png │ │ ├── R_curve.png │ │ ├── confusion_matrix.png │ │ ├── confusion_matrix_normalized.png │ │ ├── val_batch0_labels.jpg │ │ ├── val_batch0_pred.jpg │ │ ├── val_batch1_labels.jpg │ │ ├── val_batch1_pred.jpg │ │ ├── val_batch2_labels.jpg │ │ └── val_batch2_pred.jpg │ │ ├── yolov8s-custom-structure-all-val │ │ ├── F1_curve.png │ │ ├── PR_curve.png │ │ ├── P_curve.png │ │ ├── R_curve.png │ │ ├── confusion_matrix.png │ │ ├── confusion_matrix_normalized.png │ │ ├── val_batch0_labels.jpg │ │ ├── val_batch0_pred.jpg │ │ ├── val_batch1_labels.jpg │ │ ├── val_batch1_pred.jpg │ │ ├── val_batch2_labels.jpg │ │ └── val_batch2_pred.jpg │ │ ├── yolov8s-custom-structure-all │ │ ├── F1_curve.png │ │ ├── PR_curve.png │ │ ├── P_curve.png │ │ ├── R_curve.png │ │ ├── args.yaml │ │ ├── confusion_matrix.png │ │ ├── confusion_matrix_normalized.png │ │ ├── labels.jpg │ │ ├── labels_correlogram.jpg │ │ ├── results.csv │ │ ├── results.png │ │ ├── train_batch0.jpg │ │ ├── train_batch1.jpg │ │ ├── train_batch2.jpg │ │ ├── val_batch0_labels.jpg │ │ ├── val_batch0_pred.jpg │ │ ├── val_batch1_labels.jpg │ │ ├── val_batch1_pred.jpg │ │ ├── val_batch2_labels.jpg │ │ ├── val_batch2_pred.jpg │ │ └── weights │ │ │ ├── best.pt │ │ │ └── last.pt │ │ ├── yolov8s-custom-structure-icdar2013-val │ │ ├── F1_curve.png │ │ ├── PR_curve.png │ │ ├── P_curve.png │ │ ├── R_curve.png │ │ ├── confusion_matrix.png │ │ ├── confusion_matrix_normalized.png │ │ ├── val_batch0_labels.jpg │ │ ├── val_batch0_pred.jpg │ │ ├── val_batch1_labels.jpg │ │ ├── val_batch1_pred.jpg │ │ ├── val_batch2_labels.jpg │ │ └── val_batch2_pred.jpg │ │ ├── yolov8s-custom-structure-val │ │ ├── F1_curve.png │ │ ├── PR_curve.png │ │ ├── P_curve.png │ │ ├── R_curve.png │ │ ├── confusion_matrix.png │ │ ├── confusion_matrix_normalized.png │ │ ├── val_batch0_labels.jpg │ │ ├── val_batch0_pred.jpg │ │ ├── val_batch1_labels.jpg │ │ ├── val_batch1_pred.jpg │ │ ├── val_batch2_labels.jpg │ │ └── val_batch2_pred.jpg │ │ └── yolov8s-custom-structure │ │ ├── F1_curve.png │ │ ├── PR_curve.png │ │ ├── P_curve.png │ │ ├── R_curve.png │ │ ├── args.yaml │ │ ├── confusion_matrix.png │ │ ├── confusion_matrix_normalized.png │ │ ├── labels.jpg │ │ ├── labels_correlogram.jpg │ │ ├── results.csv │ │ ├── results.png │ │ ├── train_batch0.jpg │ │ ├── train_batch1.jpg │ │ ├── train_batch2.jpg │ │ ├── val_batch0_labels.jpg │ │ ├── val_batch0_pred.jpg │ │ ├── val_batch1_labels.jpg │ │ ├── val_batch1_pred.jpg │ │ ├── val_batch2_labels.jpg │ │ ├── val_batch2_pred.jpg │ │ └── weights │ │ ├── best.pt │ │ └── last.pt ├── train_FinTabNet.c_structure.sh ├── train_PubTables-1M_detection.sh ├── train_PubTables-1M_structure.sh └── train_all_structure.sh └── zh_val_0.jpg /.gitattributes: -------------------------------------------------------------------------------- 1 | *.pt filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # table_structure_recognition 2 | 3 | Table detection and table structure recognition using Yolov5/Yolov8, and you can get the same (even better) result compared with Table Transformer (TATR) with smaller models. 4 | 5 | ## Dataset 6 | 7 | You can download PubTables-1M from [Microsoft Research Open Data](https://msropendata.com/datasets/505fcbe3-1383-42b1-913a-f651b8b712d3), and uncompress `PubTables-1M-Image_Page_Detection_PASCAL_VOC.tar.gz` and `PubTables-1M-Image_Table_Structure_PASCAL_VOC.tar.gz` to the directory `data/pubtables-1m/`. Or you can download PubTables-1M, FinTabNet.c and ICDAR-2013.c using [data/download_data.sh](data/download_data.sh). 8 | 9 | Then, you can run the 4 notebooks ([data/voc2coco_detection.ipynb](data/voc2coco_detection.ipynb), [data/voc2coco_structure.ipynb](data/voc2coco_structure.ipynb), [data/voc2coco_structure_fintabnet.ipynb](data/voc2coco_structure_fintabnet.ipynb), and [data/voc2coco_structure_icdar2013.ipynb](data/voc2coco_structure_icdar2013.ipynb)) to convert VOC format to COCO format. 10 | 11 | ## Train Model (Yolov5) 12 | 13 | You can clone latest Yolov5 code from [https://github.com/ultralytics/yolov5](https://github.com/ultralytics/yolov5) to the directory `yolov5/`, and run the 2 scripts to train table detection model ([yolov5/train_PubTables-1M_detection.sh](yolov5/train_PubTables-1M_detection.sh)) and table structure recognition model ([yolov5/train_PubTables-1M_structure.sh](yolov5/train_PubTables-1M_structure.sh)). You may need to change the variable `path` of the yaml files according to your environment in the directory `yolov5/data/`. 14 | 15 | I have trained each model using yolov5s for 10 epochs, and you can use the models in the directory `yolov5/runs/` for fast try or finetune from the checkpoints. 16 | 17 | ## Train Model (Yolov8) 18 | 19 | You can change to the directory `yolov8/`, and run the 2 scripts to train table detection model ([yolov8/train_PubTables-1M_detection.sh](yolov8/train_PubTables-1M_detection.sh)) and table structure recognition model ([yolov8/train_PubTables-1M_structure.sh](yolov8/train_PubTables-1M_structure.sh)). You may need to change the variable `path` of the yaml files according to your environment in the directory `yolov8/data/`. 20 | 21 | I have trained each model using yolov8s for 10 epochs, and you can use the models in the directory `yolov8/runs/detect/` for fast try or finetune from the checkpoints. 22 | 23 | ## Use Model 24 | 25 | You can run the notebook [table_structure_recognition.ipynb](table_structure_recognition.ipynb) to convert a table image to an excel file. Please pay attention to the `ocr` function, you should use [all-in-one-ai](https://www.amazonaws.cn/en/solutions/horizontal/guidance/all-in-one-ai/), or PaddleOCR, or any OCR service to get the ocr result. -------------------------------------------------------------------------------- /SimSong.ttc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/SimSong.ttc -------------------------------------------------------------------------------- /data/download_data.sh: -------------------------------------------------------------------------------- 1 | # sudo apt update 2 | # sudo apt install git-lfs -y 3 | git lfs install 4 | 5 | git clone https://huggingface.co/datasets/bsmock/FinTabNet.c 6 | cd FinTabNet.c && bash extract_fintabnet.sh 7 | 8 | git clone https://huggingface.co/datasets/bsmock/pubtables-1m 9 | cd pubtables-1m && bash extract_structure_dataset.sh 10 | 11 | # git clone https://huggingface.co/datasets/bsmock/ICDAR-2013-Table-Competition-Corrected 12 | # cd ICDAR-2013-Table-Competition-Corrected && tar xvf ICDAR-2013-Table-Competition-Corrected.tar.gz 13 | # pip install editdistance 14 | # python process_icdar2013.py --data_dir ICDAR-2013-Table-Competition-Corrected/ICDAR-2013-Table-Competition-Corrected --output_dir ICDAR-2013.c 15 | 16 | git clone https://huggingface.co/datasets/bsmock/ICDAR-2013.c 17 | cd ICDAR-2013.c && bash extract_icdar2013.sh -------------------------------------------------------------------------------- /data/merge_all.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "id": "0bff9744-9c68-4d0e-ab53-dc50bb7900fd", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import os\n", 11 | "import shutil\n", 12 | "from tqdm import tqdm" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 8, 18 | "id": "6ef97804-0902-4846-b662-0cc0d4949885", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "coco_base_dir_1 = 'pubtables-1m/PubTables-1M-Structure-COCO'\n", 23 | "coco_base_dir_2 = 'FinTabNet.c/FinTabNet.c-Structure-COCO'\n", 24 | "coco_base_dir_all = 'All-Structure-COCO'" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 9, 30 | "id": "329ed238-7340-466c-8336-0d52e22338b6", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "!cp -r $coco_base_dir_1 $coco_base_dir_all" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 11, 40 | "id": "5573c516", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "!cp -r $coco_base_dir_2/* $coco_base_dir_all" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "id": "0e7909db-8ec6-4e0b-aa33-7ac57ab79b77", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [] 54 | } 55 | ], 56 | "metadata": { 57 | "kernelspec": { 58 | "display_name": "pytorch", 59 | "language": "python", 60 | "name": "python3" 61 | }, 62 | "language_info": { 63 | "codemirror_mode": { 64 | "name": "ipython", 65 | "version": 3 66 | }, 67 | "file_extension": ".py", 68 | "mimetype": "text/x-python", 69 | "name": "python", 70 | "nbconvert_exporter": "python", 71 | "pygments_lexer": "ipython3", 72 | "version": "3.11.9" 73 | } 74 | }, 75 | "nbformat": 4, 76 | "nbformat_minor": 5 77 | } 78 | -------------------------------------------------------------------------------- /data/voc2coco_detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "261b8e28-12b2-4a43-af71-35310b03b68e", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "!pip install xmltodict" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "0bff9744-9c68-4d0e-ab53-dc50bb7900fd", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import os\n", 21 | "import shutil\n", 22 | "import xmltodict\n", 23 | "from tqdm import tqdm" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "id": "6ef97804-0902-4846-b662-0cc0d4949885", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "voc_base_dir = 'pubtables-1m/PubTables-1M-Detection'\n", 34 | "coco_base_dir = 'pubtables-1m/PubTables-1M-Detection-COCO'" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "329ed238-7340-466c-8336-0d52e22338b6", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "!mkdir -p $coco_base_dir\n", 45 | "!mkdir -p $coco_base_dir/images/train\n", 46 | "!mkdir -p $coco_base_dir/images/val\n", 47 | "!mkdir -p $coco_base_dir/images/test\n", 48 | "!mkdir -p $coco_base_dir/labels/train\n", 49 | "!mkdir -p $coco_base_dir/labels/val\n", 50 | "!mkdir -p $coco_base_dir/labels/test" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "5573c516", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "import multiprocessing\n", 61 | "from joblib import Parallel, delayed\n", 62 | "\n", 63 | "max_processes = multiprocessing.cpu_count()\n", 64 | "print('max_processes:', max_processes)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "id": "e18c4548-3ed8-4e4e-ab44-6b593a6af331", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "class_names_map = {'table': 0, 'table rotated':1}\n", 75 | "\n", 76 | "def voc2coco_single(xml_label_filename):\n", 77 | " xml_label_filename = xml_label_filename.strip()\n", 78 | " if xml_label_filename.endswith('xml'):\n", 79 | " file_object = open(os.path.join(voc_base_dir, xml_label_filename), encoding='utf-8') \n", 80 | " try:\n", 81 | " all_the_xmlStr = file_object.read()\n", 82 | " finally:\n", 83 | " file_object.close()\n", 84 | " convertedDict = xmltodict.parse(all_the_xmlStr)\n", 85 | " # print(convertedDict)\n", 86 | " # print(len(convertedDict['annotation']['object']))\n", 87 | " fix_width = int(convertedDict['annotation']['size']['width'])\n", 88 | " fix_height = int(convertedDict['annotation']['size']['height'])\n", 89 | " if 'object' in convertedDict['annotation']:\n", 90 | " objs = convertedDict['annotation']['object']\n", 91 | " if not isinstance(objs,list):\n", 92 | " objs = [objs]\n", 93 | "# print('objs:', objs)\n", 94 | " with open(os.path.join(coco_base_dir, 'labels', xml_label_filename[:-4]+'.txt'), 'w') as fout:\n", 95 | " for annotation in objs:\n", 96 | " if annotation['name'] not in class_names_map:\n", 97 | " class_names_map[annotation['name']] = len(class_names_map)\n", 98 | " class_id = class_names_map[annotation['name']]\n", 99 | " # class_id = 0\n", 100 | "\n", 101 | " xmin = int(float(annotation['bndbox']['xmin']))\n", 102 | " ymin = int(float(annotation['bndbox']['ymin']))\n", 103 | " xmax = int(float(annotation['bndbox']['xmax']))\n", 104 | " ymax = int(float(annotation['bndbox']['ymax']))\n", 105 | "\n", 106 | " w = xmax-xmin\n", 107 | " h = ymax-ymin\n", 108 | "\n", 109 | " if w>0 and h>0:\n", 110 | " center_x = (xmin+xmax)/2\n", 111 | " center_y = (ymin+ymax)/2\n", 112 | " fout.write(str(class_id)+' '+str(center_x/fix_width)+' '+str(center_y/fix_height)+' '+str(w/fix_width)+' '+str(h/fix_height)+'\\n')\n", 113 | " else:\n", 114 | " print('[BUG] xml_label_filename:', xml_label_filename)\n", 115 | "\n", 116 | "def voc2coco(split='train'):\n", 117 | " with open(os.path.join(voc_base_dir, split+'_filelist.txt'), 'r') as fin:\n", 118 | " xml_label_filenames = fin.readlines()\n", 119 | " print(split, len(xml_label_filenames))\n", 120 | " Parallel(n_jobs=max_processes)(delayed(voc2coco_single)(xml_label_filename) for xml_label_filename in tqdm(xml_label_filenames)) " 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "id": "17b6ad73-7d4b-41c1-b82c-311c391bb3b1", 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "voc2coco('train')\n", 131 | "voc2coco('val')\n", 132 | "voc2coco('test')\n", 133 | "\n", 134 | "print('class_names_map:', class_names_map)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "id": "26cb6ad5-9c94-4eba-a6f6-02a31881e9ec", 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "def copy_images(split='train'):\n", 145 | " with open(os.path.join(voc_base_dir, split+'_filelist.txt'), 'r') as fin:\n", 146 | " xml_label_filenames = fin.readlines()\n", 147 | " print(split, len(xml_label_filenames))\n", 148 | " for xml_label_filename in tqdm(xml_label_filenames):\n", 149 | " xml_label_filename = xml_label_filename.strip()\n", 150 | " if xml_label_filename.endswith('xml'):\n", 151 | " jpg_filename = os.path.join(voc_base_dir, 'images', xml_label_filename.split('/')[1].replace('xml', 'jpg'))\n", 152 | " new_jpg_filename = os.path.join(coco_base_dir, 'images', xml_label_filename.replace('xml', 'jpg'))\n", 153 | " # print(jpg_filename, new_jpg_filename)\n", 154 | " try:\n", 155 | " # shutil.copy(jpg_filename, new_jpg_filename)\n", 156 | " shutil.move(jpg_filename, new_jpg_filename)\n", 157 | " # break\n", 158 | " except:\n", 159 | " continue" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "id": "5cafe5e3-ef2d-4923-9df5-d2f91bcf548a", 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "copy_images('train')\n", 170 | "copy_images('val')\n", 171 | "copy_images('test')" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "id": "0e7909db-8ec6-4e0b-aa33-7ac57ab79b77", 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "conda_pytorch_p310", 186 | "language": "python", 187 | "name": "conda_pytorch_p310" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.10.14" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 5 204 | } 205 | -------------------------------------------------------------------------------- /data/voc2coco_structure.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "261b8e28-12b2-4a43-af71-35310b03b68e", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "!pip install xmltodict" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "0bff9744-9c68-4d0e-ab53-dc50bb7900fd", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import os\n", 21 | "import shutil\n", 22 | "import xmltodict\n", 23 | "from tqdm import tqdm" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "id": "6ef97804-0902-4846-b662-0cc0d4949885", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "voc_base_dir = 'pubtables-1m/PubTables-1M-Structure'\n", 34 | "coco_base_dir = 'pubtables-1m/PubTables-1M-Structure-COCO'" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "329ed238-7340-466c-8336-0d52e22338b6", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "!mkdir -p $coco_base_dir\n", 45 | "!mkdir -p $coco_base_dir/images/train\n", 46 | "!mkdir -p $coco_base_dir/images/val\n", 47 | "!mkdir -p $coco_base_dir/images/test\n", 48 | "!mkdir -p $coco_base_dir/labels/train\n", 49 | "!mkdir -p $coco_base_dir/labels/val\n", 50 | "!mkdir -p $coco_base_dir/labels/test" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "88b44964-6d11-4906-b00c-0de6c806b56c", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "import multiprocessing\n", 61 | "from joblib import Parallel, delayed\n", 62 | "\n", 63 | "max_processes = multiprocessing.cpu_count()\n", 64 | "print('max_processes:', max_processes)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "id": "e18c4548-3ed8-4e4e-ab44-6b593a6af331", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "class_names_map = {'table':0, 'table column':1, 'table row':2, 'table column header':3, 'table projected row header':4, 'table spanning cell':5, 'table grid cell':6}\n", 75 | "\n", 76 | "def voc2coco_single(xml_label_filename):\n", 77 | " xml_label_filename = xml_label_filename.strip()\n", 78 | " if xml_label_filename.endswith('xml'):\n", 79 | " file_object = open(os.path.join(voc_base_dir, xml_label_filename), encoding='utf-8') \n", 80 | " try:\n", 81 | " all_the_xmlStr = file_object.read()\n", 82 | " finally:\n", 83 | " file_object.close()\n", 84 | " convertedDict = xmltodict.parse(all_the_xmlStr)\n", 85 | " # print(convertedDict)\n", 86 | " # print(len(convertedDict['annotation']['object']))\n", 87 | " fix_width = int(convertedDict['annotation']['size']['width'])\n", 88 | " fix_height = int(convertedDict['annotation']['size']['height'])\n", 89 | " if 'object' in convertedDict['annotation']:\n", 90 | " objs = convertedDict['annotation']['object']\n", 91 | " if not isinstance(objs,list):\n", 92 | " objs = [objs]\n", 93 | "# print('objs:', objs)\n", 94 | " with open(os.path.join(coco_base_dir, 'labels', xml_label_filename[:-4]+'.txt'), 'w') as fout:\n", 95 | " for annotation in objs:\n", 96 | " if annotation['name'] not in class_names_map:\n", 97 | " class_names_map[annotation['name']] = len(class_names_map)\n", 98 | " class_id = class_names_map[annotation['name']]\n", 99 | " # class_id = 0\n", 100 | "\n", 101 | " xmin = int(float(annotation['bndbox']['xmin']))\n", 102 | " ymin = int(float(annotation['bndbox']['ymin']))\n", 103 | " xmax = int(float(annotation['bndbox']['xmax']))\n", 104 | " ymax = int(float(annotation['bndbox']['ymax']))\n", 105 | "\n", 106 | " w = xmax-xmin\n", 107 | " h = ymax-ymin\n", 108 | "\n", 109 | " if w>0 and h>0:\n", 110 | " center_x = (xmin+xmax)/2\n", 111 | " center_y = (ymin+ymax)/2\n", 112 | " fout.write(str(class_id)+' '+str(center_x/fix_width)+' '+str(center_y/fix_height)+' '+str(w/fix_width)+' '+str(h/fix_height)+'\\n')\n", 113 | " else:\n", 114 | " print('[BUG] xml_label_filename:', xml_label_filename)\n", 115 | "\n", 116 | "def voc2coco(split='train'):\n", 117 | " with open(os.path.join(voc_base_dir, split+'_filelist.txt'), 'r') as fin:\n", 118 | " xml_label_filenames = fin.readlines()\n", 119 | " print(split, len(xml_label_filenames))\n", 120 | " Parallel(n_jobs=max_processes)(delayed(voc2coco_single)(xml_label_filename) for xml_label_filename in tqdm(xml_label_filenames)) " 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "id": "17b6ad73-7d4b-41c1-b82c-311c391bb3b1", 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "voc2coco('train')\n", 131 | "voc2coco('val')\n", 132 | "voc2coco('test')\n", 133 | "\n", 134 | "print('class_names_map:', class_names_map)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "id": "26cb6ad5-9c94-4eba-a6f6-02a31881e9ec", 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "def copy_images(split='train'):\n", 145 | " with open(os.path.join(voc_base_dir, split+'_filelist.txt'), 'r') as fin:\n", 146 | " xml_label_filenames = fin.readlines()\n", 147 | " print(split, len(xml_label_filenames))\n", 148 | " for xml_label_filename in tqdm(xml_label_filenames):\n", 149 | " xml_label_filename = xml_label_filename.strip()\n", 150 | " if xml_label_filename.endswith('xml'):\n", 151 | " jpg_filename = os.path.join(voc_base_dir, 'images', xml_label_filename.split('/')[1].replace('xml', 'jpg'))\n", 152 | " new_jpg_filename = os.path.join(coco_base_dir, 'images', xml_label_filename.replace('xml', 'jpg'))\n", 153 | " # print(jpg_filename, new_jpg_filename)\n", 154 | " try:\n", 155 | " # shutil.copy(jpg_filename, new_jpg_filename)\n", 156 | " shutil.move(jpg_filename, new_jpg_filename)\n", 157 | " # break\n", 158 | " except:\n", 159 | " continue" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "id": "5cafe5e3-ef2d-4923-9df5-d2f91bcf548a", 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "copy_images('train')\n", 170 | "copy_images('val')\n", 171 | "copy_images('test')" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "id": "0e7909db-8ec6-4e0b-aa33-7ac57ab79b77", 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "conda_pytorch_p310", 186 | "language": "python", 187 | "name": "conda_pytorch_p310" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.10.14" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 5 204 | } 205 | -------------------------------------------------------------------------------- /data/voc2coco_structure_fintabnet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "261b8e28-12b2-4a43-af71-35310b03b68e", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "!pip install xmltodict" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "0bff9744-9c68-4d0e-ab53-dc50bb7900fd", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import os\n", 21 | "import shutil\n", 22 | "import xmltodict\n", 23 | "from tqdm import tqdm" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "id": "6ef97804-0902-4846-b662-0cc0d4949885", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "voc_base_dir = 'FinTabNet.c/FinTabNet.c-Structure'\n", 34 | "coco_base_dir = 'FinTabNet.c/FinTabNet.c-Structure-COCO'" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "329ed238-7340-466c-8336-0d52e22338b6", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "!mkdir -p $coco_base_dir\n", 45 | "!mkdir -p $coco_base_dir/images/train\n", 46 | "!mkdir -p $coco_base_dir/images/val\n", 47 | "!mkdir -p $coco_base_dir/images/test\n", 48 | "!mkdir -p $coco_base_dir/labels/train\n", 49 | "!mkdir -p $coco_base_dir/labels/val\n", 50 | "!mkdir -p $coco_base_dir/labels/test" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "88b44964-6d11-4906-b00c-0de6c806b56c", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "import multiprocessing\n", 61 | "from joblib import Parallel, delayed\n", 62 | "\n", 63 | "max_processes = multiprocessing.cpu_count()\n", 64 | "print('max_processes:', max_processes)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "id": "e18c4548-3ed8-4e4e-ab44-6b593a6af331", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "class_names_map = {'table':0, 'table column':1, 'table row':2, 'table column header':3, 'table projected row header':4, 'table spanning cell':5, 'table grid cell':6}\n", 75 | "\n", 76 | "def voc2coco_single(xml_label_filename):\n", 77 | " xml_label_filename = xml_label_filename.strip()\n", 78 | " if xml_label_filename.endswith('xml'):\n", 79 | " file_object = open(os.path.join(voc_base_dir, xml_label_filename), encoding='utf-8') \n", 80 | " try:\n", 81 | " all_the_xmlStr = file_object.read()\n", 82 | " finally:\n", 83 | " file_object.close()\n", 84 | " convertedDict = xmltodict.parse(all_the_xmlStr)\n", 85 | " # print(convertedDict)\n", 86 | " # print(len(convertedDict['annotation']['object']))\n", 87 | " fix_width = int(convertedDict['annotation']['size']['width'])\n", 88 | " fix_height = int(convertedDict['annotation']['size']['height'])\n", 89 | " if 'object' in convertedDict['annotation']:\n", 90 | " objs = convertedDict['annotation']['object']\n", 91 | " if not isinstance(objs,list):\n", 92 | " objs = [objs]\n", 93 | "# print('objs:', objs)\n", 94 | " with open(os.path.join(coco_base_dir, 'labels', xml_label_filename[:-4]+'.txt'), 'w') as fout:\n", 95 | " for annotation in objs:\n", 96 | " if annotation['name'] not in class_names_map:\n", 97 | " class_names_map[annotation['name']] = len(class_names_map)\n", 98 | " class_id = class_names_map[annotation['name']]\n", 99 | " # class_id = 0\n", 100 | "\n", 101 | " xmin = int(float(annotation['bndbox']['xmin']))\n", 102 | " ymin = int(float(annotation['bndbox']['ymin']))\n", 103 | " xmax = int(float(annotation['bndbox']['xmax']))\n", 104 | " ymax = int(float(annotation['bndbox']['ymax']))\n", 105 | "\n", 106 | " w = xmax-xmin\n", 107 | " h = ymax-ymin\n", 108 | "\n", 109 | " if w>0 and h>0:\n", 110 | " center_x = (xmin+xmax)/2\n", 111 | " center_y = (ymin+ymax)/2\n", 112 | " fout.write(str(class_id)+' '+str(center_x/fix_width)+' '+str(center_y/fix_height)+' '+str(w/fix_width)+' '+str(h/fix_height)+'\\n')\n", 113 | " else:\n", 114 | " print('[BUG] xml_label_filename:', xml_label_filename)\n", 115 | "\n", 116 | "def voc2coco(split='train'):\n", 117 | " xml_label_filenames = os.listdir(os.path.join(voc_base_dir, split))\n", 118 | " for i in range(len(xml_label_filenames)):\n", 119 | " xml_label_filenames[i] = split+'/'+xml_label_filenames[i]\n", 120 | " print(split, len(xml_label_filenames))\n", 121 | " Parallel(n_jobs=max_processes)(delayed(voc2coco_single)(xml_label_filename) for xml_label_filename in tqdm(xml_label_filenames)) " 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "id": "17b6ad73-7d4b-41c1-b82c-311c391bb3b1", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "voc2coco('train')\n", 132 | "voc2coco('val')\n", 133 | "voc2coco('test')\n", 134 | "\n", 135 | "print('class_names_map:', class_names_map)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "id": "26cb6ad5-9c94-4eba-a6f6-02a31881e9ec", 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "def copy_images(split='train'):\n", 146 | " xml_label_filenames = os.listdir(os.path.join(voc_base_dir, split))\n", 147 | " for i in range(len(xml_label_filenames)):\n", 148 | " xml_label_filenames[i] = split+'/'+xml_label_filenames[i]\n", 149 | " print(split, len(xml_label_filenames))\n", 150 | " for xml_label_filename in tqdm(xml_label_filenames):\n", 151 | " xml_label_filename = xml_label_filename.strip()\n", 152 | " if xml_label_filename.endswith('xml'):\n", 153 | " jpg_filename = os.path.join(voc_base_dir, 'images', xml_label_filename.split('/')[1].replace('xml', 'jpg'))\n", 154 | " new_jpg_filename = os.path.join(coco_base_dir, 'images', xml_label_filename.replace('xml', 'jpg'))\n", 155 | " # print(jpg_filename, new_jpg_filename)\n", 156 | " try:\n", 157 | " # shutil.copy(jpg_filename, new_jpg_filename)\n", 158 | " shutil.move(jpg_filename, new_jpg_filename)\n", 159 | " # break\n", 160 | " except:\n", 161 | " continue" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "id": "5cafe5e3-ef2d-4923-9df5-d2f91bcf548a", 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "copy_images('train')\n", 172 | "copy_images('val')\n", 173 | "copy_images('test')" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "id": "0e7909db-8ec6-4e0b-aa33-7ac57ab79b77", 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "conda_pytorch_p310", 188 | "language": "python", 189 | "name": "conda_pytorch_p310" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.10.14" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 5 206 | } 207 | -------------------------------------------------------------------------------- /data/voc2coco_structure_icdar2013.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "261b8e28-12b2-4a43-af71-35310b03b68e", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "!pip install xmltodict" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "0bff9744-9c68-4d0e-ab53-dc50bb7900fd", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import os\n", 21 | "import shutil\n", 22 | "import xmltodict\n", 23 | "from tqdm import tqdm" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "id": "6ef97804-0902-4846-b662-0cc0d4949885", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "voc_base_dir = 'ICDAR-2013.c/ICDAR-2013.c-Structure'\n", 34 | "coco_base_dir = 'ICDAR-2013.c/ICDAR-2013.c-Structure-COCO'" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "329ed238-7340-466c-8336-0d52e22338b6", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "!mkdir -p $coco_base_dir\n", 45 | "!mkdir -p $coco_base_dir/images/train\n", 46 | "!mkdir -p $coco_base_dir/images/val\n", 47 | "!mkdir -p $coco_base_dir/images/test\n", 48 | "!mkdir -p $coco_base_dir/labels/train\n", 49 | "!mkdir -p $coco_base_dir/labels/val\n", 50 | "!mkdir -p $coco_base_dir/labels/test" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "88b44964-6d11-4906-b00c-0de6c806b56c", 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "import multiprocessing\n", 61 | "from joblib import Parallel, delayed\n", 62 | "\n", 63 | "max_processes = multiprocessing.cpu_count()\n", 64 | "print('max_processes:', max_processes)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "id": "e18c4548-3ed8-4e4e-ab44-6b593a6af331", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "class_names_map = {'table':0, 'table column':1, 'table row':2, 'table column header':3, 'table projected row header':4, 'table spanning cell':5, 'table grid cell':6}\n", 75 | "\n", 76 | "def voc2coco_single(xml_label_filename):\n", 77 | " xml_label_filename = xml_label_filename.strip()\n", 78 | " if xml_label_filename.endswith('xml'):\n", 79 | " file_object = open(os.path.join(voc_base_dir, xml_label_filename), encoding='utf-8') \n", 80 | " try:\n", 81 | " all_the_xmlStr = file_object.read()\n", 82 | " finally:\n", 83 | " file_object.close()\n", 84 | " convertedDict = xmltodict.parse(all_the_xmlStr)\n", 85 | " # print(convertedDict)\n", 86 | " # print(len(convertedDict['annotation']['object']))\n", 87 | " fix_width = int(convertedDict['annotation']['size']['width'])\n", 88 | " fix_height = int(convertedDict['annotation']['size']['height'])\n", 89 | " if 'object' in convertedDict['annotation']:\n", 90 | " objs = convertedDict['annotation']['object']\n", 91 | " if not isinstance(objs,list):\n", 92 | " objs = [objs]\n", 93 | "# print('objs:', objs)\n", 94 | " with open(os.path.join(coco_base_dir, 'labels', xml_label_filename[:-4]+'.txt'), 'w') as fout:\n", 95 | " for annotation in objs:\n", 96 | " if annotation['name'] not in class_names_map:\n", 97 | " class_names_map[annotation['name']] = len(class_names_map)\n", 98 | " class_id = class_names_map[annotation['name']]\n", 99 | " # class_id = 0\n", 100 | "\n", 101 | " xmin = int(float(annotation['bndbox']['xmin']))\n", 102 | " ymin = int(float(annotation['bndbox']['ymin']))\n", 103 | " xmax = int(float(annotation['bndbox']['xmax']))\n", 104 | " ymax = int(float(annotation['bndbox']['ymax']))\n", 105 | "\n", 106 | " w = xmax-xmin\n", 107 | " h = ymax-ymin\n", 108 | "\n", 109 | " if w>0 and h>0:\n", 110 | " center_x = (xmin+xmax)/2\n", 111 | " center_y = (ymin+ymax)/2\n", 112 | " fout.write(str(class_id)+' '+str(center_x/fix_width)+' '+str(center_y/fix_height)+' '+str(w/fix_width)+' '+str(h/fix_height)+'\\n')\n", 113 | " else:\n", 114 | " print('[BUG] xml_label_filename:', xml_label_filename)\n", 115 | "\n", 116 | "def voc2coco(split='train'):\n", 117 | " xml_label_filenames = os.listdir(os.path.join(voc_base_dir, split))\n", 118 | " for i in range(len(xml_label_filenames)):\n", 119 | " xml_label_filenames[i] = split+'/'+xml_label_filenames[i]\n", 120 | " print(split, len(xml_label_filenames))\n", 121 | " Parallel(n_jobs=max_processes)(delayed(voc2coco_single)(xml_label_filename) for xml_label_filename in tqdm(xml_label_filenames)) " 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "id": "17b6ad73-7d4b-41c1-b82c-311c391bb3b1", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "voc2coco('train')\n", 132 | "voc2coco('val')\n", 133 | "voc2coco('test')\n", 134 | "\n", 135 | "print('class_names_map:', class_names_map)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "id": "26cb6ad5-9c94-4eba-a6f6-02a31881e9ec", 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "def copy_images(split='train'):\n", 146 | " xml_label_filenames = os.listdir(os.path.join(voc_base_dir, split))\n", 147 | " for i in range(len(xml_label_filenames)):\n", 148 | " xml_label_filenames[i] = split+'/'+xml_label_filenames[i]\n", 149 | " print(split, len(xml_label_filenames))\n", 150 | " for xml_label_filename in tqdm(xml_label_filenames):\n", 151 | " xml_label_filename = xml_label_filename.strip()\n", 152 | " if xml_label_filename.endswith('xml'):\n", 153 | " jpg_filename = os.path.join(voc_base_dir, 'images', xml_label_filename.split('/')[1].replace('xml', 'jpg'))\n", 154 | " new_jpg_filename = os.path.join(coco_base_dir, 'images', xml_label_filename.replace('xml', 'jpg'))\n", 155 | " # print(jpg_filename, new_jpg_filename)\n", 156 | " try:\n", 157 | " # shutil.copy(jpg_filename, new_jpg_filename)\n", 158 | " shutil.move(jpg_filename, new_jpg_filename)\n", 159 | " # break\n", 160 | " except:\n", 161 | " continue" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "id": "5cafe5e3-ef2d-4923-9df5-d2f91bcf548a", 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "copy_images('train')\n", 172 | "copy_images('val')\n", 173 | "copy_images('test')" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "id": "0e7909db-8ec6-4e0b-aa33-7ac57ab79b77", 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "conda_pytorch_p310", 188 | "language": "python", 189 | "name": "conda_pytorch_p310" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.10.14" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 5 206 | } 207 | -------------------------------------------------------------------------------- /postprocess.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2021 Microsoft Corporation 3 | """ 4 | from collections import defaultdict 5 | 6 | from fitz import Rect 7 | 8 | 9 | def apply_threshold(objects, threshold): 10 | """ 11 | Filter out objects below a certain score. 12 | """ 13 | return [obj for obj in objects if obj['score'] >= threshold] 14 | 15 | 16 | def apply_class_thresholds(bboxes, labels, scores, class_names, class_thresholds): 17 | """ 18 | Filter out bounding boxes whose confidence is below the confidence threshold for 19 | its associated class label. 20 | """ 21 | # Apply class-specific thresholds 22 | indices_above_threshold = [idx for idx, (score, label) in enumerate(zip(scores, labels)) 23 | if score >= class_thresholds[ 24 | class_names[label] 25 | ] 26 | ] 27 | bboxes = [bboxes[idx] for idx in indices_above_threshold] 28 | scores = [scores[idx] for idx in indices_above_threshold] 29 | labels = [labels[idx] for idx in indices_above_threshold] 30 | 31 | return bboxes, scores, labels 32 | 33 | 34 | def iou(bbox1, bbox2): 35 | """ 36 | Compute the intersection-over-union of two bounding boxes. 37 | """ 38 | intersection = Rect(bbox1).intersect(bbox2) 39 | union = Rect(bbox1).include_rect(bbox2) 40 | 41 | union_area = union.get_area() # getArea() 42 | if union_area > 0: 43 | return intersection.get_area() / union.get_area() # .getArea() 44 | 45 | return 0 46 | 47 | 48 | def iob(bbox1, bbox2): 49 | """ 50 | Compute the intersection area over box area, for bbox1. 51 | """ 52 | intersection = Rect(bbox1).intersect(bbox2) 53 | 54 | bbox1_area = Rect(bbox1).get_area() # .getArea() 55 | if bbox1_area > 0: 56 | return intersection.get_area() / bbox1_area # getArea() 57 | 58 | return 0 59 | 60 | 61 | def objects_to_cells(table, objects_in_table, tokens_in_table, class_map, class_thresholds): 62 | """ 63 | Process the bounding boxes produced by the table structure recognition model 64 | and the token/word/span bounding boxes into table cells. 65 | 66 | Also return a confidence score based on how well the text was able to be 67 | uniquely slotted into the cells detected by the table model. 68 | """ 69 | 70 | table_structures = objects_to_table_structures(table, objects_in_table, tokens_in_table, class_map, 71 | class_thresholds) 72 | 73 | # Check for a valid table 74 | if len(table_structures['columns']) < 1 or len(table_structures['rows']) < 1: 75 | cells = []#None 76 | confidence_score = 0 77 | else: 78 | cells, confidence_score = table_structure_to_cells(table_structures, tokens_in_table, table['bbox']) 79 | 80 | return table_structures, cells, confidence_score 81 | 82 | 83 | def objects_to_table_structures(table_object, objects_in_table, tokens_in_table, class_names, class_thresholds): 84 | """ 85 | Process the bounding boxes produced by the table structure recognition model into 86 | a *consistent* set of table structures (rows, columns, supercells, headers). 87 | This entails resolving conflicts/overlaps, and ensuring the boxes meet certain alignment 88 | conditions (for example: rows should all have the same width, etc.). 89 | """ 90 | 91 | page_num = table_object['page_num'] 92 | 93 | table_structures = {} 94 | 95 | columns = [obj for obj in objects_in_table if class_names[obj['label']] == 'table column'] 96 | rows = [obj for obj in objects_in_table if class_names[obj['label']] == 'table row'] 97 | headers = [obj for obj in objects_in_table if class_names[obj['label']] == 'table column header'] 98 | supercells = [obj for obj in objects_in_table if class_names[obj['label']] == 'table spanning cell'] 99 | for obj in supercells: 100 | obj['subheader'] = False 101 | subheaders = [obj for obj in objects_in_table if class_names[obj['label']] == 'table projected row header'] 102 | for obj in subheaders: 103 | obj['subheader'] = True 104 | supercells += subheaders 105 | for obj in rows: 106 | obj['header'] = False 107 | for header_obj in headers: 108 | if iob(obj['bbox'], header_obj['bbox']) >= 0.5: 109 | obj['header'] = True 110 | 111 | for row in rows: 112 | row['page'] = page_num 113 | 114 | for column in columns: 115 | column['page'] = page_num 116 | 117 | #Refine table structures 118 | rows = refine_rows(rows, tokens_in_table, class_thresholds['table row']) 119 | columns = refine_columns(columns, tokens_in_table, class_thresholds['table column']) 120 | 121 | # Shrink table bbox to just the total height of the rows 122 | # and the total width of the columns 123 | row_rect = Rect() 124 | for obj in rows: 125 | row_rect.include_rect(obj['bbox']) 126 | column_rect = Rect() 127 | for obj in columns: 128 | column_rect.include_rect(obj['bbox']) 129 | table_object['row_column_bbox'] = [column_rect[0], row_rect[1], column_rect[2], row_rect[3]] 130 | table_object['bbox'] = table_object['row_column_bbox'] 131 | 132 | # Process the rows and columns into a complete segmented table 133 | columns = align_columns(columns, table_object['row_column_bbox']) 134 | rows = align_rows(rows, table_object['row_column_bbox']) 135 | 136 | table_structures['rows'] = rows 137 | table_structures['columns'] = columns 138 | table_structures['headers'] = headers 139 | table_structures['supercells'] = supercells 140 | 141 | if len(rows) > 0 and len(columns) > 1: 142 | table_structures = refine_table_structures(table_object['bbox'], table_structures, tokens_in_table, class_thresholds) 143 | 144 | return table_structures 145 | 146 | 147 | def refine_rows(rows, page_spans, score_threshold): 148 | """ 149 | Apply operations to the detected rows, such as 150 | thresholding, NMS, and alignment. 151 | """ 152 | 153 | rows = nms_by_containment(rows, page_spans, overlap_threshold=0.5) 154 | # remove_objects_without_content(page_spans, rows) # TODO 155 | if len(rows) > 1: 156 | rows = sort_objects_top_to_bottom(rows) 157 | 158 | return rows 159 | 160 | 161 | def refine_columns(columns, page_spans, score_threshold): 162 | """ 163 | Apply operations to the detected columns, such as 164 | thresholding, NMS, and alignment. 165 | """ 166 | 167 | columns = nms_by_containment(columns, page_spans, overlap_threshold=0.5) 168 | # remove_objects_without_content(page_spans, columns) # TODO 169 | if len(columns) > 1: 170 | columns = sort_objects_left_to_right(columns) 171 | 172 | return columns 173 | 174 | 175 | def nms_by_containment(container_objects, package_objects, overlap_threshold=0.5): 176 | """ 177 | Non-maxima suppression (NMS) of objects based on shared containment of other objects. 178 | """ 179 | container_objects = sort_objects_by_score(container_objects) 180 | num_objects = len(container_objects) 181 | suppression = [False for obj in container_objects] 182 | 183 | packages_by_container, _, _ = slot_into_containers(container_objects, package_objects, overlap_threshold=overlap_threshold, 184 | unique_assignment=True, forced_assignment=False) 185 | 186 | for object2_num in range(1, num_objects): 187 | object2_packages = set(packages_by_container[object2_num]) 188 | if len(object2_packages) == 0: 189 | suppression[object2_num] = True 190 | for object1_num in range(object2_num): 191 | if not suppression[object1_num]: 192 | object1_packages = set(packages_by_container[object1_num]) 193 | if len(object2_packages.intersection(object1_packages)) > 0: 194 | suppression[object2_num] = True 195 | 196 | final_objects = [obj for idx, obj in enumerate(container_objects) if not suppression[idx]] 197 | return final_objects 198 | 199 | 200 | def slot_into_containers(container_objects, package_objects, overlap_threshold=0.5, 201 | unique_assignment=True, forced_assignment=False): 202 | """ 203 | Slot a collection of objects into the container they occupy most (the container which holds the largest fraction of the object). 204 | """ 205 | best_match_scores = [] 206 | 207 | container_assignments = [[] for container in container_objects] 208 | package_assignments = [[] for package in package_objects] 209 | 210 | if len(container_objects) == 0 or len(package_objects) == 0: 211 | return container_assignments, package_assignments, best_match_scores 212 | 213 | match_scores = defaultdict(dict) 214 | for package_num, package in enumerate(package_objects): 215 | match_scores = [] 216 | package_rect = Rect(package['bbox']) 217 | package_area = package_rect.get_area() # getArea() 218 | for container_num, container in enumerate(container_objects): 219 | container_rect = Rect(container['bbox']) 220 | intersect_area = container_rect.intersect(package['bbox']).get_area() # getArea() 221 | overlap_fraction = intersect_area / package_area 222 | match_scores.append({'container': container, 'container_num': container_num, 'score': overlap_fraction}) 223 | 224 | sorted_match_scores = sort_objects_by_score(match_scores) 225 | 226 | best_match_score = sorted_match_scores[0] 227 | best_match_scores.append(best_match_score['score']) 228 | if forced_assignment or best_match_score['score'] >= overlap_threshold: 229 | container_assignments[best_match_score['container_num']].append(package_num) 230 | package_assignments[package_num].append(best_match_score['container_num']) 231 | 232 | if not unique_assignment: # slot package into all eligible slots 233 | for match_score in sorted_match_scores[1:]: 234 | if match_score['score'] >= overlap_threshold: 235 | container_assignments[match_score['container_num']].append(package_num) 236 | package_assignments[package_num].append(match_score['container_num']) 237 | else: 238 | break 239 | 240 | return container_assignments, package_assignments, best_match_scores 241 | 242 | 243 | def sort_objects_by_score(objects, reverse=True): 244 | """ 245 | Put any set of objects in order from high score to low score. 246 | """ 247 | if reverse: 248 | sign = -1 249 | else: 250 | sign = 1 251 | return sorted(objects, key=lambda k: sign*k['score']) 252 | 253 | 254 | def remove_objects_without_content(page_spans, objects): 255 | """ 256 | Remove any objects (these can be rows, columns, supercells, etc.) that don't 257 | have any text associated with them. 258 | """ 259 | for obj in objects[:]: 260 | object_text, _ = extract_text_inside_bbox(page_spans, obj['bbox']) 261 | if len(object_text.strip()) == 0: 262 | objects.remove(obj) 263 | 264 | 265 | def extract_text_inside_bbox(spans, bbox): 266 | """ 267 | Extract the text inside a bounding box. 268 | """ 269 | bbox_spans = get_bbox_span_subset(spans, bbox) 270 | bbox_text = extract_text_from_spans(bbox_spans, remove_integer_superscripts=True) 271 | 272 | return bbox_text, bbox_spans 273 | 274 | 275 | def get_bbox_span_subset(spans, bbox, threshold=0.5): 276 | """ 277 | Reduce the set of spans to those that fall within a bounding box. 278 | 279 | threshold: the fraction of the span that must overlap with the bbox. 280 | """ 281 | span_subset = [] 282 | for span in spans: 283 | if overlaps(span['bbox'], bbox, threshold): 284 | span_subset.append(span) 285 | return span_subset 286 | 287 | 288 | def overlaps(bbox1, bbox2, threshold=0.5): 289 | """ 290 | Test if more than "threshold" fraction of bbox1 overlaps with bbox2. 291 | """ 292 | rect1 = Rect(list(bbox1)) 293 | area1 = rect1.get_area() # .getArea() 294 | if area1 == 0: 295 | return False 296 | return rect1.intersect(list(bbox2)).get_area()/area1 >= threshold # getArea() 297 | 298 | 299 | def extract_text_from_spans(spans, join_with_space=True, remove_integer_superscripts=True): 300 | """ 301 | Convert a collection of page tokens/words/spans into a single text string. 302 | """ 303 | 304 | if join_with_space: 305 | join_char = " " 306 | else: 307 | join_char = "" 308 | spans_copy = spans[:] 309 | 310 | if remove_integer_superscripts: 311 | for span in spans: 312 | flags = span['flags'] 313 | if flags & 2**0: # superscript flag 314 | if is_int(span['text']): 315 | spans_copy.remove(span) 316 | else: 317 | span['superscript'] = True 318 | 319 | if len(spans_copy) == 0: 320 | return "" 321 | 322 | spans_copy.sort(key=lambda span: span['span_num']) 323 | spans_copy.sort(key=lambda span: span['line_num']) 324 | spans_copy.sort(key=lambda span: span['block_num']) 325 | 326 | # Force the span at the end of every line within a block to have exactly one space 327 | # unless the line ends with a space or ends with a non-space followed by a hyphen 328 | line_texts = [] 329 | line_span_texts = [spans_copy[0]['text']] 330 | for span1, span2 in zip(spans_copy[:-1], spans_copy[1:]): 331 | if not span1['block_num'] == span2['block_num'] or not span1['line_num'] == span2['line_num']: 332 | line_text = join_char.join(line_span_texts).strip() 333 | if (len(line_text) > 0 334 | and not line_text[-1] == ' ' 335 | and not (len(line_text) > 1 and line_text[-1] == "-" and not line_text[-2] == ' ')): 336 | if not join_with_space: 337 | line_text += ' ' 338 | line_texts.append(line_text) 339 | line_span_texts = [span2['text']] 340 | else: 341 | line_span_texts.append(span2['text']) 342 | line_text = join_char.join(line_span_texts) 343 | line_texts.append(line_text) 344 | 345 | return join_char.join(line_texts).strip() 346 | 347 | 348 | def sort_objects_left_to_right(objs): 349 | """ 350 | Put the objects in order from left to right. 351 | """ 352 | return sorted(objs, key=lambda k: k['bbox'][0] + k['bbox'][2]) 353 | 354 | 355 | def sort_objects_top_to_bottom(objs): 356 | """ 357 | Put the objects in order from top to bottom. 358 | """ 359 | return sorted(objs, key=lambda k: k['bbox'][1] + k['bbox'][3]) 360 | 361 | 362 | def align_columns(columns, bbox): 363 | """ 364 | For every column, align the top and bottom boundaries to the final 365 | table bounding box. 366 | """ 367 | try: 368 | for column in columns: 369 | column['bbox'][1] = bbox[1] 370 | column['bbox'][3] = bbox[3] 371 | except Exception as err: 372 | print("Could not align columns: {}".format(err)) 373 | pass 374 | 375 | return columns 376 | 377 | 378 | def align_rows(rows, bbox): 379 | """ 380 | For every row, align the left and right boundaries to the final 381 | table bounding box. 382 | """ 383 | try: 384 | for row in rows: 385 | row['bbox'][0] = bbox[0] 386 | row['bbox'][2] = bbox[2] 387 | except Exception as err: 388 | print("Could not align rows: {}".format(err)) 389 | pass 390 | 391 | return rows 392 | 393 | 394 | def refine_table_structures(table_bbox, table_structures, page_spans, class_thresholds): 395 | """ 396 | Apply operations to the detected table structure objects such as 397 | thresholding, NMS, and alignment. 398 | """ 399 | rows = table_structures["rows"] 400 | columns = table_structures['columns'] 401 | 402 | #columns = fill_column_gaps(columns, table_bbox) 403 | #rows = fill_row_gaps(rows, table_bbox) 404 | 405 | # Process the headers 406 | headers = table_structures['headers'] 407 | headers = apply_threshold(headers, class_thresholds["table column header"]) 408 | headers = nms(headers) 409 | headers = align_headers(headers, rows) 410 | 411 | # Process supercells 412 | supercells = [elem for elem in table_structures['supercells'] if not elem['subheader']] 413 | subheaders = [elem for elem in table_structures['supercells'] if elem['subheader']] 414 | supercells = apply_threshold(supercells, class_thresholds["table spanning cell"]) 415 | subheaders = apply_threshold(subheaders, class_thresholds["table projected row header"]) 416 | supercells += subheaders 417 | # Align before NMS for supercells because alignment brings them into agreement 418 | # with rows and columns first; if supercells still overlap after this operation, 419 | # the threshold for NMS can basically be lowered to just above 0 420 | supercells = align_supercells(supercells, rows, columns) 421 | supercells = nms_supercells(supercells) 422 | 423 | header_supercell_tree(supercells) 424 | 425 | table_structures['columns'] = columns 426 | table_structures['rows'] = rows 427 | table_structures['supercells'] = supercells 428 | table_structures['headers'] = headers 429 | 430 | return table_structures 431 | 432 | 433 | def nms(objects, match_criteria="object2_overlap", match_threshold=0.05, keep_metric="score", keep_higher=True): 434 | """ 435 | A customizable version of non-maxima suppression (NMS). 436 | 437 | Default behavior: If a lower-confidence object overlaps more than 5% of its area 438 | with a higher-confidence object, remove the lower-confidence object. 439 | 440 | objects: set of dicts; each object dict must have a 'bbox' and a 'score' field 441 | match_criteria: how to measure how much two objects "overlap" 442 | match_threshold: the cutoff for determining that overlap requires suppression of one object 443 | keep_metric: which metric to use to determine the object to keep 444 | keep_higher: if True, keep the object with the higher metric; otherwise, keep the lower 445 | """ 446 | if len(objects) == 0: 447 | return [] 448 | 449 | if keep_metric=="score": 450 | objects = sort_objects_by_score(objects, reverse=keep_higher) 451 | elif keep_metric=="area": 452 | objects = sort_objects_by_area(objects, reverse=keep_higher) 453 | 454 | num_objects = len(objects) 455 | suppression = [False for obj in objects] 456 | 457 | for object2_num in range(1, num_objects): 458 | object2_rect = Rect(objects[object2_num]['bbox']) 459 | object2_area = object2_rect.get_area() # .getArea() 460 | for object1_num in range(object2_num): 461 | if not suppression[object1_num]: 462 | object1_rect = Rect(objects[object1_num]['bbox']) 463 | object1_area = object1_rect.get_area() # .getArea() 464 | intersect_area = object1_rect.intersect(object2_rect).get_area() # .getArea() 465 | try: 466 | if match_criteria=="object1_overlap": 467 | metric = intersect_area / object1_area 468 | elif match_criteria=="object2_overlap": 469 | metric = intersect_area / object2_area 470 | elif match_criteria=="iou": 471 | metric = intersect_area / (object1_area + object2_area - intersect_area) 472 | if metric >= match_threshold: 473 | suppression[object2_num] = True 474 | break 475 | except Exception: 476 | # Intended to recover from divide-by-zero 477 | pass 478 | 479 | return [obj for idx, obj in enumerate(objects) if not suppression[idx]] 480 | 481 | 482 | def align_headers(headers, rows): 483 | """ 484 | Adjust the header boundary to be the convex hull of the rows it intersects 485 | at least 50% of the height of. 486 | 487 | For now, we are not supporting tables with multiple headers, so we need to 488 | eliminate anything besides the top-most header. 489 | """ 490 | 491 | aligned_headers = [] 492 | 493 | for row in rows: 494 | row['header'] = False 495 | 496 | header_row_nums = [] 497 | for header in headers: 498 | for row_num, row in enumerate(rows): 499 | row_height = row['bbox'][3] - row['bbox'][1] 500 | min_row_overlap = max(row['bbox'][1], header['bbox'][1]) 501 | max_row_overlap = min(row['bbox'][3], header['bbox'][3]) 502 | overlap_height = max_row_overlap - min_row_overlap 503 | if overlap_height / row_height >= 0.5: 504 | header_row_nums.append(row_num) 505 | 506 | if len(header_row_nums) == 0: 507 | return aligned_headers 508 | 509 | header_rect = Rect() 510 | if header_row_nums[0] > 0: 511 | header_row_nums = list(range(header_row_nums[0]+1)) + header_row_nums 512 | 513 | last_row_num = -1 514 | for row_num in header_row_nums: 515 | if row_num == last_row_num + 1: 516 | row = rows[row_num] 517 | row['header'] = True 518 | header_rect = header_rect.include_rect(row['bbox']) 519 | last_row_num = row_num 520 | else: 521 | # Break as soon as a non-header row is encountered. 522 | # This ignores any subsequent rows in the table labeled as a header. 523 | # Having more than 1 header is not supported currently. 524 | break 525 | 526 | header = {'bbox': list(header_rect)} 527 | aligned_headers.append(header) 528 | 529 | return aligned_headers 530 | 531 | 532 | def align_supercells(supercells, rows, columns): 533 | """ 534 | For each supercell, align it to the rows it intersects 50% of the height of, 535 | and the columns it intersects 50% of the width of. 536 | Eliminate supercells for which there are no rows and columns it intersects 50% with. 537 | """ 538 | aligned_supercells = [] 539 | 540 | for supercell in supercells: 541 | supercell['header'] = False 542 | row_bbox_rect = None 543 | col_bbox_rect = None 544 | intersecting_header_rows = set() 545 | intersecting_data_rows = set() 546 | for row_num, row in enumerate(rows): 547 | row_height = row['bbox'][3] - row['bbox'][1] 548 | supercell_height = supercell['bbox'][3] - supercell['bbox'][1] 549 | min_row_overlap = max(row['bbox'][1], supercell['bbox'][1]) 550 | max_row_overlap = min(row['bbox'][3], supercell['bbox'][3]) 551 | overlap_height = max_row_overlap - min_row_overlap 552 | if 'span' in supercell: 553 | overlap_fraction = max(overlap_height/row_height, 554 | overlap_height/supercell_height) 555 | else: 556 | overlap_fraction = overlap_height / row_height 557 | if overlap_fraction >= 0.5: 558 | if 'header' in row and row['header']: 559 | intersecting_header_rows.add(row_num) 560 | else: 561 | intersecting_data_rows.add(row_num) 562 | 563 | # Supercell cannot span across the header boundary; eliminate whichever 564 | # group of rows is the smallest 565 | supercell['header'] = False 566 | if len(intersecting_data_rows) > 0 and len(intersecting_header_rows) > 0: 567 | if len(intersecting_data_rows) > len(intersecting_header_rows): 568 | intersecting_header_rows = set() 569 | else: 570 | intersecting_data_rows = set() 571 | if len(intersecting_header_rows) > 0: 572 | supercell['header'] = True 573 | elif 'span' in supercell: 574 | continue # Require span supercell to be in the header 575 | intersecting_rows = intersecting_data_rows.union(intersecting_header_rows) 576 | # Determine vertical span of aligned supercell 577 | for row_num in intersecting_rows: 578 | if row_bbox_rect is None: 579 | row_bbox_rect = Rect(rows[row_num]['bbox']) 580 | else: 581 | row_bbox_rect = row_bbox_rect.include_rect(rows[row_num]['bbox']) 582 | if row_bbox_rect is None: 583 | continue 584 | 585 | intersecting_cols = [] 586 | for col_num, col in enumerate(columns): 587 | col_width = col['bbox'][2] - col['bbox'][0] 588 | supercell_width = supercell['bbox'][2] - supercell['bbox'][0] 589 | min_col_overlap = max(col['bbox'][0], supercell['bbox'][0]) 590 | max_col_overlap = min(col['bbox'][2], supercell['bbox'][2]) 591 | overlap_width = max_col_overlap - min_col_overlap 592 | if 'span' in supercell: 593 | overlap_fraction = max(overlap_width/col_width, 594 | overlap_width/supercell_width) 595 | # Multiply by 2 effectively lowers the threshold to 0.25 596 | if supercell['header']: 597 | overlap_fraction = overlap_fraction * 2 598 | else: 599 | overlap_fraction = overlap_width / col_width 600 | if overlap_fraction >= 0.5: 601 | intersecting_cols.append(col_num) 602 | if col_bbox_rect is None: 603 | col_bbox_rect = Rect(col['bbox']) 604 | else: 605 | col_bbox_rect = col_bbox_rect.include_rect(col['bbox']) 606 | if col_bbox_rect is None: 607 | continue 608 | 609 | supercell_bbox = list(row_bbox_rect.intersect(col_bbox_rect)) 610 | supercell['bbox'] = supercell_bbox 611 | 612 | # Only a true supercell if it joins across multiple rows or columns 613 | if (len(intersecting_rows) > 0 and len(intersecting_cols) > 0 614 | and (len(intersecting_rows) > 1 or len(intersecting_cols) > 1)): 615 | supercell['row_numbers'] = list(intersecting_rows) 616 | supercell['column_numbers'] = intersecting_cols 617 | aligned_supercells.append(supercell) 618 | 619 | # A span supercell in the header means there must be supercells above it in the header 620 | if 'span' in supercell and supercell['header'] and len(supercell['column_numbers']) > 1: 621 | for row_num in range(0, min(supercell['row_numbers'])): 622 | new_supercell = {'row_numbers': [row_num], 'column_numbers': supercell['column_numbers'], 623 | 'score': supercell['score'], 'propagated': True} 624 | new_supercell_columns = [columns[idx] for idx in supercell['column_numbers']] 625 | new_supercell_rows = [rows[idx] for idx in supercell['row_numbers']] 626 | bbox = [min([column['bbox'][0] for column in new_supercell_columns]), 627 | min([row['bbox'][1] for row in new_supercell_rows]), 628 | max([column['bbox'][2] for column in new_supercell_columns]), 629 | max([row['bbox'][3] for row in new_supercell_rows])] 630 | new_supercell['bbox'] = bbox 631 | aligned_supercells.append(new_supercell) 632 | 633 | return aligned_supercells 634 | 635 | 636 | def nms_supercells(supercells): 637 | """ 638 | A NMS scheme for supercells that first attempts to shrink supercells to 639 | resolve overlap. 640 | If two supercells overlap the same (sub)cell, shrink the lower confidence 641 | supercell to resolve the overlap. If shrunk supercell is empty, remove it. 642 | """ 643 | 644 | supercells = sort_objects_by_score(supercells) 645 | num_supercells = len(supercells) 646 | suppression = [False for supercell in supercells] 647 | 648 | for supercell2_num in range(1, num_supercells): 649 | supercell2 = supercells[supercell2_num] 650 | for supercell1_num in range(supercell2_num): 651 | supercell1 = supercells[supercell1_num] 652 | remove_supercell_overlap(supercell1, supercell2) 653 | if ((len(supercell2['row_numbers']) < 2 and len(supercell2['column_numbers']) < 2) 654 | or len(supercell2['row_numbers']) == 0 or len(supercell2['column_numbers']) == 0): 655 | suppression[supercell2_num] = True 656 | 657 | return [obj for idx, obj in enumerate(supercells) if not suppression[idx]] 658 | 659 | 660 | def header_supercell_tree(supercells): 661 | """ 662 | Make sure no supercell in the header is below more than one supercell in any row above it. 663 | The cells in the header form a tree, but a supercell with more than one supercell in a row 664 | above it means that some cell has more than one parent, which is not allowed. Eliminate 665 | any supercell that would cause this to be violated. 666 | """ 667 | header_supercells = [supercell for supercell in supercells if 'header' in supercell and supercell['header']] 668 | header_supercells = sort_objects_by_score(header_supercells) 669 | 670 | for header_supercell in header_supercells[:]: 671 | ancestors_by_row = defaultdict(int) 672 | min_row = min(header_supercell['row_numbers']) 673 | for header_supercell2 in header_supercells: 674 | max_row2 = max(header_supercell2['row_numbers']) 675 | if max_row2 < min_row: 676 | if (set(header_supercell['column_numbers']).issubset( 677 | set(header_supercell2['column_numbers']))): 678 | for row2 in header_supercell2['row_numbers']: 679 | ancestors_by_row[row2] += 1 680 | for row in range(0, min_row): 681 | if not ancestors_by_row[row] == 1: 682 | supercells.remove(header_supercell) 683 | break 684 | 685 | 686 | def table_structure_to_cells(table_structures, table_spans, table_bbox): 687 | """ 688 | Assuming the row, column, supercell, and header bounding boxes have 689 | been refined into a set of consistent table structures, process these 690 | table structures into table cells. This is a universal representation 691 | format for the table, which can later be exported to Pandas or CSV formats. 692 | Classify the cells as header/access cells or data cells 693 | based on if they intersect with the header bounding box. 694 | """ 695 | columns = table_structures['columns'] 696 | rows = table_structures['rows'] 697 | supercells = table_structures['supercells'] 698 | cells = [] 699 | subcells = [] 700 | 701 | # Identify complete cells and subcells 702 | for column_num, column in enumerate(columns): 703 | for row_num, row in enumerate(rows): 704 | column_rect = Rect(list(column['bbox'])) 705 | row_rect = Rect(list(row['bbox'])) 706 | cell_rect = row_rect.intersect(column_rect) 707 | header = 'header' in row and row['header'] 708 | cell = {'bbox': list(cell_rect), 'column_nums': [column_num], 'row_nums': [row_num], 709 | 'header': header} 710 | 711 | cell['subcell'] = False 712 | for supercell in supercells: 713 | supercell_rect = Rect(list(supercell['bbox'])) 714 | if (supercell_rect.intersect(cell_rect).get_area() # .getArea() 715 | / cell_rect.get_area()) > 0.5: # getArea() 716 | cell['subcell'] = True 717 | break 718 | 719 | if cell['subcell']: 720 | subcells.append(cell) 721 | else: 722 | #cell_text = extract_text_inside_bbox(table_spans, cell['bbox']) 723 | #cell['cell_text'] = cell_text 724 | cell['subheader'] = False 725 | cells.append(cell) 726 | 727 | for supercell in supercells: 728 | supercell_rect = Rect(list(supercell['bbox'])) 729 | cell_columns = set() 730 | cell_rows = set() 731 | cell_rect = None 732 | header = True 733 | for subcell in subcells: 734 | subcell_rect = Rect(list(subcell['bbox'])) 735 | subcell_rect_area = subcell_rect.get_area() # .getArea() 736 | if (subcell_rect.intersect(supercell_rect).get_area() # .getArea() 737 | / subcell_rect_area) > 0.5: 738 | if cell_rect is None: 739 | cell_rect = Rect(list(subcell['bbox'])) 740 | else: 741 | cell_rect.include_rect(Rect(list(subcell['bbox']))) 742 | cell_rows = cell_rows.union(set(subcell['row_nums'])) 743 | cell_columns = cell_columns.union(set(subcell['column_nums'])) 744 | # By convention here, all subcells must be classified 745 | # as header cells for a supercell to be classified as a header cell; 746 | # otherwise, this could lead to a non-rectangular header region 747 | header = header and 'header' in subcell and subcell['header'] 748 | if len(cell_rows) > 0 and len(cell_columns) > 0: 749 | cell = {'bbox': list(cell_rect), 'column_nums': list(cell_columns), 'row_nums': list(cell_rows), 750 | 'header': header, 'subheader': supercell['subheader']} 751 | cells.append(cell) 752 | 753 | # Compute a confidence score based on how well the page tokens 754 | # slot into the cells reported by the model 755 | _, _, cell_match_scores = slot_into_containers(cells, table_spans) 756 | try: 757 | mean_match_score = sum(cell_match_scores) / len(cell_match_scores) 758 | min_match_score = min(cell_match_scores) 759 | confidence_score = (mean_match_score + min_match_score)/2 760 | except: 761 | confidence_score = 0 762 | 763 | # Dilate rows and columns before final extraction 764 | #dilated_columns = fill_column_gaps(columns, table_bbox) 765 | dilated_columns = columns 766 | #dilated_rows = fill_row_gaps(rows, table_bbox) 767 | dilated_rows = rows 768 | for cell in cells: 769 | column_rect = Rect() 770 | for column_num in cell['column_nums']: 771 | column_rect.include_rect(list(dilated_columns[column_num]['bbox'])) 772 | row_rect = Rect() 773 | for row_num in cell['row_nums']: 774 | row_rect.include_rect(list(dilated_rows[row_num]['bbox'])) 775 | cell_rect = column_rect.intersect(row_rect) 776 | cell['bbox'] = list(cell_rect) 777 | 778 | span_nums_by_cell, _, _ = slot_into_containers(cells, table_spans, overlap_threshold=0.001, 779 | unique_assignment=True, forced_assignment=False) 780 | 781 | for cell, cell_span_nums in zip(cells, span_nums_by_cell): 782 | cell_spans = [table_spans[num] for num in cell_span_nums] 783 | # TODO: Refine how text is extracted; should be character-based, not span-based; 784 | # but need to associate 785 | # cell['cell_text'] = extract_text_from_spans(cell_spans, remove_integer_superscripts=False) # TODO 786 | cell['spans'] = cell_spans 787 | 788 | # Adjust the row, column, and cell bounding boxes to reflect the extracted text 789 | num_rows = len(rows) 790 | rows = sort_objects_top_to_bottom(rows) 791 | num_columns = len(columns) 792 | columns = sort_objects_left_to_right(columns) 793 | min_y_values_by_row = defaultdict(list) 794 | max_y_values_by_row = defaultdict(list) 795 | min_x_values_by_column = defaultdict(list) 796 | max_x_values_by_column = defaultdict(list) 797 | for cell in cells: 798 | min_row = min(cell["row_nums"]) 799 | max_row = max(cell["row_nums"]) 800 | min_column = min(cell["column_nums"]) 801 | max_column = max(cell["column_nums"]) 802 | for span in cell['spans']: 803 | min_x_values_by_column[min_column].append(span['bbox'][0]) 804 | min_y_values_by_row[min_row].append(span['bbox'][1]) 805 | max_x_values_by_column[max_column].append(span['bbox'][2]) 806 | max_y_values_by_row[max_row].append(span['bbox'][3]) 807 | for row_num, row in enumerate(rows): 808 | if len(min_x_values_by_column[0]) > 0: 809 | row['bbox'][0] = min(min_x_values_by_column[0]) 810 | if len(min_y_values_by_row[row_num]) > 0: 811 | row['bbox'][1] = min(min_y_values_by_row[row_num]) 812 | if len(max_x_values_by_column[num_columns-1]) > 0: 813 | row['bbox'][2] = max(max_x_values_by_column[num_columns-1]) 814 | if len(max_y_values_by_row[row_num]) > 0: 815 | row['bbox'][3] = max(max_y_values_by_row[row_num]) 816 | for column_num, column in enumerate(columns): 817 | if len(min_x_values_by_column[column_num]) > 0: 818 | column['bbox'][0] = min(min_x_values_by_column[column_num]) 819 | if len(min_y_values_by_row[0]) > 0: 820 | column['bbox'][1] = min(min_y_values_by_row[0]) 821 | if len(max_x_values_by_column[column_num]) > 0: 822 | column['bbox'][2] = max(max_x_values_by_column[column_num]) 823 | if len(max_y_values_by_row[num_rows-1]) > 0: 824 | column['bbox'][3] = max(max_y_values_by_row[num_rows-1]) 825 | for cell in cells: 826 | row_rect = Rect() 827 | column_rect = Rect() 828 | for row_num in cell['row_nums']: 829 | row_rect.include_rect(list(rows[row_num]['bbox'])) 830 | for column_num in cell['column_nums']: 831 | column_rect.include_rect(list(columns[column_num]['bbox'])) 832 | cell_rect = row_rect.intersect(column_rect) 833 | if cell_rect.get_area() > 0: # getArea() 834 | cell['bbox'] = list(cell_rect) 835 | pass 836 | 837 | return cells, confidence_score 838 | 839 | 840 | def remove_supercell_overlap(supercell1, supercell2): 841 | """ 842 | This function resolves overlap between supercells (supercells must be 843 | disjoint) by iteratively shrinking supercells by the fewest grid cells 844 | necessary to resolve the overlap. 845 | Example: 846 | If two supercells overlap at grid cell (R, C), and supercell #1 is less 847 | confident than supercell #2, we eliminate either row R from supercell #1 848 | or column C from supercell #1 by comparing the number of columns in row R 849 | versus the number of rows in column C. If the number of columns in row R 850 | is less than the number of rows in column C, we eliminate row R from 851 | supercell #1. This resolves the overlap by removing fewer grid cells from 852 | supercell #1 than if we eliminated column C from it. 853 | """ 854 | common_rows = set(supercell1['row_numbers']).intersection(set(supercell2['row_numbers'])) 855 | common_columns = set(supercell1['column_numbers']).intersection(set(supercell2['column_numbers'])) 856 | 857 | # While the supercells have overlapping grid cells, continue shrinking the less-confident 858 | # supercell one row or one column at a time 859 | while len(common_rows) > 0 and len(common_columns) > 0: 860 | # Try to shrink the supercell as little as possible to remove the overlap; 861 | # if the supercell has fewer rows than columns, remove an overlapping column, 862 | # because this removes fewer grid cells from the supercell; 863 | # otherwise remove an overlapping row 864 | if len(supercell2['row_numbers']) < len(supercell2['column_numbers']): 865 | min_column = min(supercell2['column_numbers']) 866 | max_column = max(supercell2['column_numbers']) 867 | if max_column in common_columns: 868 | common_columns.remove(max_column) 869 | supercell2['column_numbers'].remove(max_column) 870 | elif min_column in common_columns: 871 | common_columns.remove(min_column) 872 | supercell2['column_numbers'].remove(min_column) 873 | else: 874 | supercell2['column_numbers'] = [] 875 | common_columns = set() 876 | else: 877 | min_row = min(supercell2['row_numbers']) 878 | max_row = max(supercell2['row_numbers']) 879 | if max_row in common_rows: 880 | common_rows.remove(max_row) 881 | supercell2['row_numbers'].remove(max_row) 882 | elif min_row in common_rows: 883 | common_rows.remove(min_row) 884 | supercell2['row_numbers'].remove(min_row) 885 | else: 886 | supercell2['row_numbers'] = [] 887 | common_rows = set() 888 | -------------------------------------------------------------------------------- /table_structure_recognition.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "5f01a536-b761-4a33-acad-0df75ba58b6b", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "!pip install PyMuPDF ultralytics" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "6f433848-5ab2-4392-a9d6-65d7ac0d99d4", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "!pip install paddlepaddle-gpu paddleocr" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "id": "edfbd74f-3c31-4096-97a6-f35ae1958edf", 29 | "metadata": { 30 | "tags": [] 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "# import torch\n", 35 | "# detection_model = torch.hub.load('ultralytics/yolov5', 'custom', 'yolov5/runs/train/yolov5s-custom-detection/weights/best.pt', force_reload=True)\n", 36 | "# structure_model = torch.hub.load('ultralytics/yolov5', 'custom', 'yolov5/runs/train/yolov5s-custom-structure/weights/best.pt', force_reload=True)\n", 37 | "\n", 38 | "from ultralytics import YOLO\n", 39 | "device = 'cuda:0'\n", 40 | "detection_model = YOLO('yolov8/runs/detect/yolov8s-custom-detection/weights/best.pt').to(device)\n", 41 | "structure_model = YOLO('yolov8/runs/detect/yolov8s-custom-structure-all/weights/best.pt').to(device)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "4908e7d0-eef1-429f-9bf4-4648d5871d0d", 48 | "metadata": { 49 | "tags": [] 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "import cv2\n", 54 | "\n", 55 | "# imgsz=640\n", 56 | "\n", 57 | "# def table_detection(filename):\n", 58 | "# image = cv2.imread(filename)\n", 59 | "# pred = detection_model(image, size=imgsz)\n", 60 | "# pred = pred.xywhn[0]\n", 61 | "# result = pred.cpu().numpy()\n", 62 | "# return result\n", 63 | "\n", 64 | "# def table_structure(filename):\n", 65 | "# imgsz = 1024\n", 66 | "# image = cv2.imread(filename)\n", 67 | "# pred = structure_model(image, size=imgsz)\n", 68 | "# pred = pred.xywhn[0]\n", 69 | "# result = pred.cpu().numpy()\n", 70 | "# return result\n", 71 | "\n", 72 | "def table_detection(filename):\n", 73 | " imgsz = 800\n", 74 | " image = cv2.imread(filename)\n", 75 | " pred = detection_model.predict(image, imgsz=imgsz)\n", 76 | " pred = pred[0].boxes\n", 77 | " result = pred.cpu().numpy()\n", 78 | " result_list = [list(result.xywhn[i]) + [result.conf[i], result.cls[i]] for i in range(result.shape[0])]\n", 79 | " return result_list\n", 80 | "\n", 81 | "def table_structure(filename):\n", 82 | " imgsz = 1024\n", 83 | " image = cv2.imread(filename)\n", 84 | " pred = structure_model.predict(image, imgsz=imgsz)\n", 85 | " pred = pred[0].boxes\n", 86 | " result = pred.cpu().numpy()\n", 87 | " result_list = [list(result.xywhn[i]) + [result.conf[i], result.cls[i]] for i in range(result.shape[0])]\n", 88 | " return result_list" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "id": "7c5be84e-84f3-4f33-b66a-5b6a4a3b25fe", 95 | "metadata": { 96 | "tags": [] 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "detection_class_names = ['table', 'table rotated']\n", 101 | "\n", 102 | "def crop_image(filename, detection_result):\n", 103 | " crop_filenames = []\n", 104 | " image = cv2.imread(filename)\n", 105 | " width = image.shape[1]\n", 106 | " height = image.shape[0]\n", 107 | " # print(width, height)\n", 108 | " for i, result in enumerate(detection_result):\n", 109 | " class_id = int(result[5])\n", 110 | " score = float(result[4])\n", 111 | " min_x = result[0]\n", 112 | " min_y = result[1]\n", 113 | " w = result[2]\n", 114 | " h = result[3]\n", 115 | " \n", 116 | " # x1 = max(0, int((min_x-w/2-0.02)*width)) # TODO expand 2%\n", 117 | " # y1 = max(0, int((min_y-h/2-0.02)*height)) # TODO expand 2%\n", 118 | " # x2 = min(width, int((min_x+w/2+0.02)*width)) # TODO expand 2%\n", 119 | " # y2 = min(height, int((min_y+h/2+0.02)*height)) # TODO expand 2%\n", 120 | " x1 = max(0, int((min_x-w/2)*width)-10) # TODO expand 10px\n", 121 | " y1 = max(0, int((min_y-h/2)*height)-10) # TODO expand 10px\n", 122 | " x2 = min(width, int((min_x+w/2)*width)+10) # TODO expand 10px\n", 123 | " y2 = min(height, int((min_y+h/2)*height)+10) # TODO expand 10px\n", 124 | " # print(x1, y1, x2, y2)\n", 125 | " crop_image = image[y1:y2, x1:x2, :]\n", 126 | " crop_filename = filename[:-4]+'_'+str(i)+'_'+detection_class_names[class_id]+filename[-4:]\n", 127 | " crop_filenames.append(crop_filename)\n", 128 | " cv2.imwrite(crop_filename, crop_image)\n", 129 | " return crop_filenames" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "id": "8fd2838d-4942-45a0-b295-f81920bbfbf6", 136 | "metadata": { 137 | "tags": [] 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "# import os\n", 142 | "# import json\n", 143 | "# import requests\n", 144 | "\n", 145 | "# url = 'https://[ALL-IN-ONE-AI-URL]/inference?endpoint_name=[ENDPOINT-NAME]'\n", 146 | "\n", 147 | "# headers = {'Content-Type': 'image/png'}\n", 148 | "\n", 149 | "# def ocr(img_path):\n", 150 | "# words_filename = img_path[:-4]+'_words.json'\n", 151 | " \n", 152 | "# if os.path.exists(words_filename):\n", 153 | "# return words_filename\n", 154 | " \n", 155 | "# if not img_path.endswith('png'):\n", 156 | "# headers['Content-Type'] = 'image/'+img_path.split('.')[-1]\n", 157 | "# # print(headers)\n", 158 | "\n", 159 | "# with open(img_path, 'rb') as f:\n", 160 | "# data = f.read()\n", 161 | "# response = requests.post(url, headers=headers, data=data)\n", 162 | "# # print(response)\n", 163 | " \n", 164 | "# result = json.loads(response.text)\n", 165 | "# # print(result)\n", 166 | "\n", 167 | "# new_result = []\n", 168 | "# for label, bbox in zip(result['label'], result['bbox']):\n", 169 | "# new_result.append({'bbox': [bbox[0][0], bbox[0][1], bbox[2][0], bbox[2][1]], 'text': label})\n", 170 | "\n", 171 | "# json.dump(new_result, open(words_filename, 'w'), ensure_ascii=False)\n", 172 | "# return words_filename" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "id": "51b4f857-fda7-4e16-aec2-2134a2f9316d", 179 | "metadata": { 180 | "tags": [] 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "import os\n", 185 | "import json\n", 186 | "from paddleocr import PaddleOCR\n", 187 | "\n", 188 | "ocr_model = PaddleOCR(use_angle_cls=True, lang=\"ch\", det_limit_side_len=1920) # TODO use large det_limit_side_len to get better OCR result\n", 189 | "\n", 190 | "def ocr(img_path):\n", 191 | " words_filename = img_path[:-4]+'_words.json'\n", 192 | " \n", 193 | " # if os.path.exists(words_filename):\n", 194 | " # return words_filename\n", 195 | "\n", 196 | " result = ocr_model.ocr(img_path, cls=True)\n", 197 | " result = result[0]\n", 198 | " new_result = []\n", 199 | " if result is not None:\n", 200 | " bounding_boxes = [line[0] for line in result]\n", 201 | " txts = [line[1][0] for line in result]\n", 202 | " scores = [line[1][1] for line in result]\n", 203 | " # print('txts:', txts)\n", 204 | " # print('scores:', scores)\n", 205 | " # print('bounding_boxes:', bounding_boxes)\n", 206 | " for label, bbox in zip(txts, bounding_boxes):\n", 207 | " new_result.append({'bbox': [bbox[0][0], bbox[0][1], bbox[2][0], bbox[2][1]], 'text': label})\n", 208 | "\n", 209 | " json.dump(new_result, open(words_filename, 'w'), ensure_ascii=False)\n", 210 | " \n", 211 | " return words_filename" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "id": "8e1e9552-0fd8-4123-9488-1750640c4709", 218 | "metadata": { 219 | "tags": [] 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "def visualize_structure(filename, structure_result):\n", 224 | " image = cv2.imread(filename)\n", 225 | " width = image.shape[1]\n", 226 | " height = image.shape[0]\n", 227 | " # print(width, height)\n", 228 | " for i, result in enumerate(structure_result):\n", 229 | " class_id = int(result[5])\n", 230 | " score = float(result[4])\n", 231 | " min_x = result[0]\n", 232 | " min_y = result[1]\n", 233 | " w = result[2]\n", 234 | " h = result[3]\n", 235 | " \n", 236 | " x1 = int((min_x-w/2)*width)\n", 237 | " y1 = int((min_y-h/2)*height)\n", 238 | " x2 = int((min_x+w/2)*width)\n", 239 | " y2 = int((min_y+h/2)*height)\n", 240 | " # print(x1, y1, x2, y2)\n", 241 | " \n", 242 | " if score >= 0.5:\n", 243 | " cv2.rectangle(image, (x1, y1), (x2, y2), color=(0,0,255))\n", 244 | " cv2.putText(image, str(i)+'-'+str(class_id), (x1-10, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0,0,255))\n", 245 | " new_filename = filename[:-4]+'_structure'+filename[-4:]\n", 246 | " cv2.imwrite(new_filename, image)\n", 247 | " return new_filename" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "id": "67bf6e53-30e6-4124-bb56-ab69d56dc4c8", 254 | "metadata": { 255 | "tags": [] 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "import json\n", 260 | "import postprocess\n", 261 | "\n", 262 | "structure_class_names = [\n", 263 | " 'table', 'table column', 'table row', 'table column header',\n", 264 | " 'table projected row header', 'table spanning cell', 'no object'\n", 265 | "]\n", 266 | "structure_class_map = {k: v for v, k in enumerate(structure_class_names)}\n", 267 | "structure_class_thresholds = {\n", 268 | " \"table\": 0.5,\n", 269 | " \"table column\": 0.5,\n", 270 | " \"table row\": 0.5,\n", 271 | " \"table column header\": 0.5,\n", 272 | " \"table projected row header\": 0.5,\n", 273 | " \"table spanning cell\": 0.5,\n", 274 | " \"no object\": 10\n", 275 | "}\n", 276 | "\n", 277 | "def convert_stucture(words_filename, filename, structure_result):\n", 278 | " image = cv2.imread(filename)\n", 279 | " width = image.shape[1]\n", 280 | " height = image.shape[0]\n", 281 | " # print(width, height)\n", 282 | " \n", 283 | " bboxes = []\n", 284 | " scores = []\n", 285 | " labels = []\n", 286 | " for i, result in enumerate(structure_result):\n", 287 | " class_id = int(result[5])\n", 288 | " score = float(result[4])\n", 289 | " min_x = result[0]\n", 290 | " min_y = result[1]\n", 291 | " w = result[2]\n", 292 | " h = result[3]\n", 293 | " \n", 294 | " x1 = int((min_x-w/2)*width)\n", 295 | " y1 = int((min_y-h/2)*height)\n", 296 | " x2 = int((min_x+w/2)*width)\n", 297 | " y2 = int((min_y+h/2)*height)\n", 298 | " # print(x1, y1, x2, y2)\n", 299 | "\n", 300 | " bboxes.append([x1, y1, x2, y2])\n", 301 | " scores.append(score)\n", 302 | " labels.append(class_id)\n", 303 | " \n", 304 | " table_objects = []\n", 305 | " for bbox, score, label in zip(bboxes, scores, labels):\n", 306 | " table_objects.append({'bbox': bbox, 'score': score, 'label': label})\n", 307 | " # print('table_objects:', table_objects)\n", 308 | " \n", 309 | " table = {'objects': table_objects, 'page_num': 0}\n", 310 | " \n", 311 | " table_class_objects = [obj for obj in table_objects if obj['label'] == structure_class_map['table']]\n", 312 | " if len(table_class_objects) > 1:\n", 313 | " table_class_objects = sorted(table_class_objects, key=lambda x: x['score'], reverse=True)\n", 314 | " try:\n", 315 | " table_bbox = list(table_class_objects[0]['bbox'])\n", 316 | " except:\n", 317 | " table_bbox = (0,0,1000,1000)\n", 318 | " # print('table_class_objects:', table_class_objects)\n", 319 | " # print('table_bbox:', table_bbox)\n", 320 | " \n", 321 | " page_tokens = json.load(open(words_filename, 'r'))\n", 322 | " tokens_in_table = [token for token in page_tokens if postprocess.iob(token['bbox'], table_bbox) >= 0.5]\n", 323 | " # print('tokens_in_table:', tokens_in_table)\n", 324 | " \n", 325 | " table_structures, cells, confidence_score = postprocess.objects_to_cells(table, table_objects, tokens_in_table, structure_class_names, structure_class_thresholds)\n", 326 | " \n", 327 | " return table_structures, cells, confidence_score" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "id": "57f147ee-7eb3-4bf8-8184-b05bd204cbd7", 334 | "metadata": { 335 | "tags": [] 336 | }, 337 | "outputs": [], 338 | "source": [ 339 | "import numpy as np\n", 340 | "import pandas as pd\n", 341 | "from PIL import Image, ImageDraw, ImageFont\n", 342 | "\n", 343 | "def visualize_cells(filename, table_structures, cells):\n", 344 | " image = cv2.imread(filename)\n", 345 | " width = image.shape[1]\n", 346 | " height = image.shape[0]\n", 347 | " # print(width, height)\n", 348 | " empty_image = np.zeros((height, width, 3), np.uint8)\n", 349 | " empty_image.fill(255)\n", 350 | " empty_image = Image.fromarray(cv2.cvtColor(empty_image, cv2.COLOR_BGR2RGB))\n", 351 | " draw = ImageDraw.Draw(empty_image)\n", 352 | " fontStyle = ImageFont.truetype(\"SimSong.ttc\", 10, encoding=\"utf-8\")\n", 353 | " \n", 354 | " num_cols = len(table_structures['columns'])\n", 355 | " num_rows = len(table_structures['rows'])\n", 356 | " data_rows = [['' for _ in range(num_cols)] for _ in range(num_rows)]\n", 357 | " for i, cell in enumerate(cells):\n", 358 | " bbox = cell['bbox']\n", 359 | " x1 = int(bbox[0])\n", 360 | " y1 = int(bbox[1])\n", 361 | " x2 = int(bbox[2])\n", 362 | " y2 = int(bbox[3])\n", 363 | " col_num = cell['column_nums'][0]\n", 364 | " row_num = cell['row_nums'][0]\n", 365 | " spans = cell['spans']\n", 366 | " text = ''\n", 367 | " for span in spans:\n", 368 | " if 'text' in span:\n", 369 | " text += span['text'] \n", 370 | " data_rows[row_num][col_num] = text\n", 371 | " \n", 372 | " # print('text:', text)\n", 373 | " text_len = len(text)\n", 374 | " # print('text_len:', text_len)\n", 375 | " cell_width = x2-x1\n", 376 | " # print('cell_width:', cell_width)\n", 377 | " num_per_line = cell_width//10\n", 378 | " # print('num_per_line:', num_per_line)\n", 379 | " if num_per_line != 0:\n", 380 | " line_num = text_len//num_per_line\n", 381 | " else:\n", 382 | " line_num = 0\n", 383 | " # print('line_num:', line_num)\n", 384 | " new_text = text[:num_per_line]+'\\n'\n", 385 | " for j in range(line_num):\n", 386 | " new_text += text[(j+1)*num_per_line:(j+2)*num_per_line]+'\\n'\n", 387 | " # print('new_text:', new_text)\n", 388 | " text = new_text\n", 389 | " \n", 390 | " cv2.rectangle(image, (x1, y1), (x2, y2), color=(0,255,0))\n", 391 | " cv2.putText(image, str(row_num)+'-'+str(col_num), (x1, y1+30), cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0,0,255))\n", 392 | " \n", 393 | " # cv2.rectangle(empty_image, (x1, y1), (x2, y2), color=(0,0,255))\n", 394 | " # cv2.putText(empty_image, str(row_num)+'-'+str(col_num), (x1-10, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0,0,255))\n", 395 | " # cv2.putText(empty_image, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0,0,255))\n", 396 | " draw.rectangle([(x1, y1), (x2, y2)], (255,255,255), (0,255,0))\n", 397 | " draw.text((x1-20, y1), str(row_num)+'-'+str(col_num), (255,0,0), font=fontStyle)\n", 398 | " draw.text((x1, y1), text, (0,0,255), font=fontStyle)\n", 399 | " new_filename = filename[:-4]+'_cells'+filename[-4:]\n", 400 | " cv2.imwrite(new_filename, image)\n", 401 | " reconstruct_filename = filename[:-4]+'_reconstruct'+filename[-4:]\n", 402 | " empty_image = cv2.cvtColor(np.asarray(empty_image), cv2.COLOR_RGB2BGR)\n", 403 | " cv2.imwrite(reconstruct_filename, empty_image)\n", 404 | " excel_filename = filename[:-4]+'.xlsx'\n", 405 | " data = pd.DataFrame(data_rows)\n", 406 | " data.to_excel(excel_filename, index=False, header=False)\n", 407 | " return new_filename" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "id": "e2b9e3b8-637b-4712-b783-60435f6392ee", 414 | "metadata": { 415 | "tags": [] 416 | }, 417 | "outputs": [], 418 | "source": [ 419 | "import xml.etree.ElementTree as ET\n", 420 | "\n", 421 | "def cells_to_html(cells):\n", 422 | " cells = sorted(cells, key=lambda k: min(k['column_nums']))\n", 423 | " cells = sorted(cells, key=lambda k: min(k['row_nums']))\n", 424 | "\n", 425 | " table = ET.Element(\"table\")\n", 426 | " table.set('style', 'border-collapse: collapse;')\n", 427 | " current_row = -1\n", 428 | "\n", 429 | " for cell in cells:\n", 430 | " this_row = min(cell['row_nums'])\n", 431 | "\n", 432 | " attrib = {}\n", 433 | " colspan = len(cell['column_nums'])\n", 434 | " if colspan > 1:\n", 435 | " attrib['colspan'] = str(colspan)\n", 436 | " rowspan = len(cell['row_nums'])\n", 437 | " if rowspan > 1:\n", 438 | " attrib['rowspan'] = str(rowspan)\n", 439 | " if this_row > current_row:\n", 440 | " current_row = this_row\n", 441 | " if 'column header' in cell:\n", 442 | " cell_tag = \"th\"\n", 443 | " row = ET.SubElement(table, \"thead\")\n", 444 | " row.set('style', 'border: 1px solid black;')\n", 445 | " else:\n", 446 | " cell_tag = \"td\"\n", 447 | " row = ET.SubElement(table, \"tr\")\n", 448 | " row.set('style', 'border: 1px solid black;')\n", 449 | " tcell = ET.SubElement(row, cell_tag, attrib=attrib)\n", 450 | " tcell.set('style', 'border: 1px solid black; padding: 5px;')\n", 451 | " tcell.text = ''\n", 452 | " for span in cell['spans']:\n", 453 | " tcell.text += span['text']+'\\n'\n", 454 | "\n", 455 | " return str(ET.tostring(table, encoding=\"unicode\", short_empty_elements=False))" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "id": "1728b5dc-2ecc-48f5-872e-69a9ac88a791", 462 | "metadata": { 463 | "scrolled": true, 464 | "tags": [] 465 | }, 466 | "outputs": [], 467 | "source": [ 468 | "sample_filename = 'zh_val_0.jpg'\n", 469 | "# sample_filename = 'demo/image.png'\n", 470 | "\n", 471 | "sample_detection_result = table_detection(sample_filename)\n", 472 | "# print('sample_detection_result:', sample_detection_result)\n", 473 | "\n", 474 | "sample_crop_filenames = crop_image(sample_filename, sample_detection_result)\n", 475 | "# print('sample_crop_filenames:', sample_crop_filenames)\n", 476 | "\n", 477 | "for crop_filename in sample_crop_filenames:\n", 478 | " words_filename = ocr(crop_filename)\n", 479 | " # print('words_filename:', words_filename)\n", 480 | " structure_result = table_structure(crop_filename)\n", 481 | " # print('structure_result:', structure_result)\n", 482 | " structure_filename = visualize_structure(crop_filename, structure_result)\n", 483 | " # print('structure_filename:', structure_filename)\n", 484 | " table_structures, cells, confidence_score = convert_stucture(words_filename, crop_filename, structure_result)\n", 485 | " # print('table_structures:', table_structures)\n", 486 | " # print('cells:', cells)\n", 487 | " # print('confidence_score:', confidence_score)\n", 488 | " cells_filename = visualize_cells(crop_filename, table_structures, cells)\n", 489 | " # print('cells_filename:', cells_filename)\n", 490 | " \n", 491 | " html = cells_to_html(cells)\n", 492 | " html_filename = crop_filename[:-4]+'.html'\n", 493 | " with open(html_filename, 'w') as f:\n", 494 | " f.write(html)" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": null, 500 | "id": "0bd4994f-a469-493d-9d6d-2026fc659706", 501 | "metadata": {}, 502 | "outputs": [], 503 | "source": [] 504 | } 505 | ], 506 | "metadata": { 507 | "kernelspec": { 508 | "display_name": "conda_pytorch_p310", 509 | "language": "python", 510 | "name": "conda_pytorch_p310" 511 | }, 512 | "language_info": { 513 | "codemirror_mode": { 514 | "name": "ipython", 515 | "version": 3 516 | }, 517 | "file_extension": ".py", 518 | "mimetype": "text/x-python", 519 | "name": "python", 520 | "nbconvert_exporter": "python", 521 | "pygments_lexer": "ipython3", 522 | "version": "3.10.14" 523 | } 524 | }, 525 | "nbformat": 4, 526 | "nbformat_minor": 5 527 | } 528 | -------------------------------------------------------------------------------- /yolov5/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/.DS_Store -------------------------------------------------------------------------------- /yolov5/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/data/.DS_Store -------------------------------------------------------------------------------- /yolov5/data/custom-detection.yaml: -------------------------------------------------------------------------------- 1 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 2 | path: /home/ec2-user/SageMaker/table_structure_recognition/data/pubtables-1m/PubTables-1M-Detection-COCO # dataset root dir 3 | train: images/train # train images (relative to 'path') 128 images 4 | val: images/val # val images (relative to 'path') 128 images 5 | test: images/test # test images (optional) 6 | 7 | # Classes 8 | names: 9 | 0: table 10 | 1: table rotated 11 | -------------------------------------------------------------------------------- /yolov5/data/custom-structure.yaml: -------------------------------------------------------------------------------- 1 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 2 | path: /home/ec2-user/SageMaker/table_structure_recognition/data/pubtables-1m/PubTables-1M-Structure-COCO # dataset root dir 3 | train: images/train # train images (relative to 'path') 128 images 4 | val: images/val # val images (relative to 'path') 128 images 5 | test: images/test # test images (optional) 6 | 7 | # Classes 8 | names: 9 | 0: table 10 | 1: table column 11 | 2: table row 12 | 3: table column header 13 | 4: table projected row header 14 | 5: table spanning cell 15 | 6: table grid cell 16 | -------------------------------------------------------------------------------- /yolov5/runs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/.DS_Store -------------------------------------------------------------------------------- /yolov5/runs/train/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/.DS_Store -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/F1_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/PR_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/P_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/R_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/confusion_matrix.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/events.out.tfevents.1708692027.ip-172-16-65-49.ec2.internal.21611.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/events.out.tfevents.1708692027.ip-172-16-65-49.ec2.internal.21611.0 -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/hyp.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 2 | lrf: 0.01 3 | momentum: 0.937 4 | weight_decay: 0.0005 5 | warmup_epochs: 3.0 6 | warmup_momentum: 0.8 7 | warmup_bias_lr: 0.1 8 | box: 0.05 9 | cls: 0.5 10 | cls_pw: 1.0 11 | obj: 1.0 12 | obj_pw: 1.0 13 | iou_t: 0.2 14 | anchor_t: 4.0 15 | fl_gamma: 0.0 16 | hsv_h: 0.015 17 | hsv_s: 0.7 18 | hsv_v: 0.4 19 | degrees: 0.0 20 | translate: 0.1 21 | scale: 0.5 22 | shear: 0.0 23 | perspective: 0.0 24 | flipud: 0.0 25 | fliplr: 0.5 26 | mosaic: 1.0 27 | mixup: 0.0 28 | copy_paste: 0.0 29 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/labels_correlogram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/labels_correlogram.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/opt.yaml: -------------------------------------------------------------------------------- 1 | weights: yolov5s.pt 2 | cfg: '' 3 | data: data/custom-detection.yaml 4 | hyp: 5 | lr0: 0.01 6 | lrf: 0.01 7 | momentum: 0.937 8 | weight_decay: 0.0005 9 | warmup_epochs: 3.0 10 | warmup_momentum: 0.8 11 | warmup_bias_lr: 0.1 12 | box: 0.05 13 | cls: 0.5 14 | cls_pw: 1.0 15 | obj: 1.0 16 | obj_pw: 1.0 17 | iou_t: 0.2 18 | anchor_t: 4.0 19 | fl_gamma: 0.0 20 | hsv_h: 0.015 21 | hsv_s: 0.7 22 | hsv_v: 0.4 23 | degrees: 0.0 24 | translate: 0.1 25 | scale: 0.5 26 | shear: 0.0 27 | perspective: 0.0 28 | flipud: 0.0 29 | fliplr: 0.5 30 | mosaic: 1.0 31 | mixup: 0.0 32 | copy_paste: 0.0 33 | epochs: 10 34 | batch_size: 64 35 | imgsz: 800 36 | rect: false 37 | resume: false 38 | nosave: false 39 | noval: false 40 | noautoanchor: false 41 | noplots: false 42 | evolve: null 43 | evolve_population: data/hyps 44 | resume_evolve: null 45 | bucket: '' 46 | cache: null 47 | image_weights: false 48 | device: '0' 49 | multi_scale: false 50 | single_cls: false 51 | optimizer: SGD 52 | sync_bn: false 53 | workers: 8 54 | project: runs/train 55 | name: yolov5s-custom-detection-800 56 | exist_ok: false 57 | quad: false 58 | cos_lr: false 59 | label_smoothing: 0.0 60 | patience: 100 61 | freeze: 62 | - 0 63 | save_period: -1 64 | seed: 0 65 | local_rank: -1 66 | entity: null 67 | upload_dataset: false 68 | bbox_interval: -1 69 | artifact_alias: latest 70 | ndjson_console: false 71 | ndjson_file: false 72 | save_dir: runs/train/yolov5s-custom-detection-800 73 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/results.csv: -------------------------------------------------------------------------------- 1 | epoch, train/box_loss, train/obj_loss, train/cls_loss, metrics/precision, metrics/recall, metrics/mAP_0.5,metrics/mAP_0.5:0.95, val/box_loss, val/obj_loss, val/cls_loss, x/lr0, x/lr1, x/lr2 2 | 0, 0.025459, 0.0094753, 0.0013068, 0.98107, 0.96859, 0.99145, 0.88426, 0.0041913, 0.0019805, 0.00015475, 0.070004, 0.0033329, 0.0033329 3 | 1, 0.013971, 0.0049815, 0.00019846, 0.98317, 0.9873, 0.99188, 0.9395, 0.0027004, 0.001424, 6.6819e-05, 0.039344, 0.0060062, 0.0060062 4 | 2, 0.010525, 0.0042624, 0.00012562, 0.98926, 0.98986, 0.99262, 0.94917, 0.0024449, 0.0012931, 3.5531e-05, 0.0080243, 0.0080196, 0.0080196 5 | 3, 0.0088666, 0.0038601, 9.7887e-05, 0.99145, 0.99005, 0.9939, 0.95868, 0.0025282, 0.0012791, 2.3691e-05, 0.00703, 0.00703, 0.00703 6 | 4, 0.0082705, 0.003636, 8.1939e-05, 0.99351, 0.98974, 0.99411, 0.96584, 0.0024162, 0.001227, 1.9988e-05, 0.00703, 0.00703, 0.00703 7 | 5, 0.007491, 0.0033884, 7.1507e-05, 0.99259, 0.99112, 0.99447, 0.9716, 0.0021361, 0.0011254, 1.6743e-05, 0.00604, 0.00604, 0.00604 8 | 6, 0.006983, 0.0032051, 6.7502e-05, 0.99185, 0.99196, 0.99465, 0.97512, 0.0018977, 0.0010313, 1.507e-05, 0.00505, 0.00505, 0.00505 9 | 7, 0.0062835, 0.0029857, 5.3266e-05, 0.99494, 0.98939, 0.9947, 0.97815, 0.001745, 0.00094889, 1.4388e-05, 0.00406, 0.00406, 0.00406 10 | 8, 0.0057577, 0.0027869, 4.8541e-05, 0.99336, 0.99296, 0.99485, 0.9801, 0.0016398, 0.00088513, 1.3895e-05, 0.00307, 0.00307, 0.00307 11 | 9, 0.0050442, 0.0025601, 4.3484e-05, 0.99561, 0.99388, 0.99488, 0.98072, 0.0015566, 0.00082622, 1.3195e-05, 0.00208, 0.00208, 0.00208 12 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/results.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/train_batch0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/train_batch0.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/train_batch1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/train_batch1.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/train_batch2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/train_batch2.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection-800/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/weights/best.pt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6f20aebd4461ee21ce921e434a1742e42ea55428aeede924691f4167ad8d6fa4 3 | size 14460029 4 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection-800/weights/last.pt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:428319d38f1ecd69b11bb82ca1c19ae80e22b5d401046f5b7f8ab742fcbcf7f1 3 | size 14460029 4 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/.DS_Store -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/F1_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/PR_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/P_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/R_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/confusion_matrix.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/events.out.tfevents.1663140451.ip-172-16-93-185.ec2.internal.12050.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/events.out.tfevents.1663140451.ip-172-16-93-185.ec2.internal.12050.0 -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/hyp.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 2 | lrf: 0.01 3 | momentum: 0.937 4 | weight_decay: 0.0005 5 | warmup_epochs: 3.0 6 | warmup_momentum: 0.8 7 | warmup_bias_lr: 0.1 8 | box: 0.05 9 | cls: 0.5 10 | cls_pw: 1.0 11 | obj: 1.0 12 | obj_pw: 1.0 13 | iou_t: 0.2 14 | anchor_t: 4.0 15 | fl_gamma: 0.0 16 | hsv_h: 0.015 17 | hsv_s: 0.7 18 | hsv_v: 0.4 19 | degrees: 0.0 20 | translate: 0.1 21 | scale: 0.5 22 | shear: 0.0 23 | perspective: 0.0 24 | flipud: 0.0 25 | fliplr: 0.5 26 | mosaic: 1.0 27 | mixup: 0.0 28 | copy_paste: 0.0 29 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/labels_correlogram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/labels_correlogram.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/opt.yaml: -------------------------------------------------------------------------------- 1 | weights: yolov5s.pt 2 | cfg: '' 3 | data: data/custom.yaml 4 | hyp: 5 | lr0: 0.01 6 | lrf: 0.01 7 | momentum: 0.937 8 | weight_decay: 0.0005 9 | warmup_epochs: 3.0 10 | warmup_momentum: 0.8 11 | warmup_bias_lr: 0.1 12 | box: 0.05 13 | cls: 0.5 14 | cls_pw: 1.0 15 | obj: 1.0 16 | obj_pw: 1.0 17 | iou_t: 0.2 18 | anchor_t: 4.0 19 | fl_gamma: 0.0 20 | hsv_h: 0.015 21 | hsv_s: 0.7 22 | hsv_v: 0.4 23 | degrees: 0.0 24 | translate: 0.1 25 | scale: 0.5 26 | shear: 0.0 27 | perspective: 0.0 28 | flipud: 0.0 29 | fliplr: 0.5 30 | mosaic: 1.0 31 | mixup: 0.0 32 | copy_paste: 0.0 33 | epochs: 10 34 | batch_size: 64 35 | imgsz: 640 36 | rect: false 37 | resume: false 38 | nosave: false 39 | noval: false 40 | noautoanchor: false 41 | noplots: false 42 | evolve: null 43 | bucket: '' 44 | cache: null 45 | image_weights: false 46 | device: '0' 47 | multi_scale: false 48 | single_cls: false 49 | optimizer: SGD 50 | sync_bn: false 51 | workers: 8 52 | project: runs/train 53 | name: yolov5s-custom 54 | exist_ok: false 55 | quad: false 56 | cos_lr: false 57 | label_smoothing: 0.0 58 | patience: 100 59 | freeze: 60 | - 0 61 | save_period: -1 62 | seed: 0 63 | local_rank: -1 64 | entity: null 65 | upload_dataset: false 66 | bbox_interval: -1 67 | artifact_alias: latest 68 | save_dir: runs/train/yolov5s-custom3 69 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/results.csv: -------------------------------------------------------------------------------- 1 | epoch, train/box_loss, train/obj_loss, train/cls_loss, metrics/precision, metrics/recall, metrics/mAP_0.5,metrics/mAP_0.5:0.95, val/box_loss, val/obj_loss, val/cls_loss, x/lr0, x/lr1, x/lr2 2 | 0, 0.02543, 0.0095226, 0.0012453, 0.99128, 0.98338, 0.9943, 0.89453, 0.0046382, 0.0018714, 0.00017937, 0.070004, 0.0033329, 0.0033329 3 | 1, 0.013739, 0.0051437, 0.00019206, 0.99432, 0.99563, 0.99483, 0.95743, 0.0029903, 0.001344, 7.6162e-05, 0.039344, 0.0060062, 0.0060062 4 | 2, 0.0097597, 0.0043323, 0.00010761, 0.99721, 0.99605, 0.99485, 0.9644, 0.0026705, 0.0011875, 3.8003e-05, 0.0080243, 0.0080196, 0.0080196 5 | 3, 0.0080571, 0.0038906, 8.5374e-05, 0.99496, 0.99715, 0.9949, 0.97139, 0.0027285, 0.0011681, 2.6947e-05, 0.00703, 0.00703, 0.00703 6 | 4, 0.0075614, 0.0036811, 7.7153e-05, 0.99636, 0.99588, 0.9949, 0.97266, 0.0025665, 0.0011194, 2.1159e-05, 0.00703, 0.00703, 0.00703 7 | 5, 0.0068545, 0.0034291, 6.488e-05, 0.99671, 0.9966, 0.99489, 0.97668, 0.0022816, 0.0010243, 1.7513e-05, 0.00604, 0.00604, 0.00604 8 | 6, 0.0063344, 0.0032332, 6.3558e-05, 0.99674, 0.99687, 0.99492, 0.9797, 0.0020485, 0.00094595, 1.5937e-05, 0.00505, 0.00505, 0.00505 9 | 7, 0.0058007, 0.0030361, 5.1855e-05, 0.99791, 0.99694, 0.99493, 0.98273, 0.0018915, 0.0008635, 1.5057e-05, 0.00406, 0.00406, 0.00406 10 | 8, 0.0053137, 0.0028336, 4.5487e-05, 0.99767, 0.99735, 0.99494, 0.98514, 0.0017719, 0.00080884, 1.4088e-05, 0.00307, 0.00307, 0.00307 11 | 9, 0.0047194, 0.0026102, 3.8066e-05, 0.99901, 0.99741, 0.99495, 0.98717, 0.0016947, 0.00075372, 1.3349e-05, 0.00208, 0.00208, 0.00208 12 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/results.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/train_batch0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/train_batch0.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/train_batch1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/train_batch1.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/train_batch2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/train_batch2.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/weights/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-detection/weights/.DS_Store -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-detection/weights/best.pt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:32127c7362c16c5839cb95c942cbc9ad1412fd953eb4b0b93758a49f01e312cb 3 | size 14397685 4 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/.DS_Store -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/F1_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/PR_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/P_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/R_curve.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/confusion_matrix.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/events.out.tfevents.1663282037.ip-172-16-93-185.ec2.internal.17535.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/events.out.tfevents.1663282037.ip-172-16-93-185.ec2.internal.17535.0 -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/hyp.yaml: -------------------------------------------------------------------------------- 1 | lr0: 0.01 2 | lrf: 0.01 3 | momentum: 0.937 4 | weight_decay: 0.0005 5 | warmup_epochs: 3.0 6 | warmup_momentum: 0.8 7 | warmup_bias_lr: 0.1 8 | box: 0.05 9 | cls: 0.5 10 | cls_pw: 1.0 11 | obj: 1.0 12 | obj_pw: 1.0 13 | iou_t: 0.2 14 | anchor_t: 4.0 15 | fl_gamma: 0.0 16 | hsv_h: 0.015 17 | hsv_s: 0.7 18 | hsv_v: 0.4 19 | degrees: 0.0 20 | translate: 0.1 21 | scale: 0.5 22 | shear: 0.0 23 | perspective: 0.0 24 | flipud: 0.0 25 | fliplr: 0.5 26 | mosaic: 1.0 27 | mixup: 0.0 28 | copy_paste: 0.0 29 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/labels_correlogram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/labels_correlogram.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/opt.yaml: -------------------------------------------------------------------------------- 1 | weights: yolov5s.pt 2 | cfg: '' 3 | data: data/custom-structure.yaml 4 | hyp: 5 | lr0: 0.01 6 | lrf: 0.01 7 | momentum: 0.937 8 | weight_decay: 0.0005 9 | warmup_epochs: 3.0 10 | warmup_momentum: 0.8 11 | warmup_bias_lr: 0.1 12 | box: 0.05 13 | cls: 0.5 14 | cls_pw: 1.0 15 | obj: 1.0 16 | obj_pw: 1.0 17 | iou_t: 0.2 18 | anchor_t: 4.0 19 | fl_gamma: 0.0 20 | hsv_h: 0.015 21 | hsv_s: 0.7 22 | hsv_v: 0.4 23 | degrees: 0.0 24 | translate: 0.1 25 | scale: 0.5 26 | shear: 0.0 27 | perspective: 0.0 28 | flipud: 0.0 29 | fliplr: 0.5 30 | mosaic: 1.0 31 | mixup: 0.0 32 | copy_paste: 0.0 33 | epochs: 10 34 | batch_size: 64 35 | imgsz: 640 36 | rect: false 37 | resume: false 38 | nosave: false 39 | noval: false 40 | noautoanchor: false 41 | noplots: false 42 | evolve: null 43 | bucket: '' 44 | cache: null 45 | image_weights: false 46 | device: '0' 47 | multi_scale: false 48 | single_cls: false 49 | optimizer: SGD 50 | sync_bn: false 51 | workers: 8 52 | project: runs/train 53 | name: yolov5s-custom-structure 54 | exist_ok: false 55 | quad: false 56 | cos_lr: false 57 | label_smoothing: 0.0 58 | patience: 100 59 | freeze: 60 | - 0 61 | save_period: -1 62 | seed: 0 63 | local_rank: -1 64 | entity: null 65 | upload_dataset: false 66 | bbox_interval: -1 67 | artifact_alias: latest 68 | save_dir: runs/train/yolov5s-custom-structure3 69 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/results.csv: -------------------------------------------------------------------------------- 1 | epoch, train/box_loss, train/obj_loss, train/cls_loss, metrics/precision, metrics/recall, metrics/mAP_0.5,metrics/mAP_0.5:0.95, val/box_loss, val/obj_loss, val/cls_loss, x/lr0, x/lr1, x/lr2 2 | 0, 0.051576, 0.12291, 0.014856, 0.84121, 0.72869, 0.82173, 0.63943, 0.0297, 0.070789, 0.0074103, 0.070003, 0.0033331, 0.0033331 3 | 1, 0.037296, 0.093321, 0.0098879, 0.83955, 0.84732, 0.89574, 0.72607, 0.026999, 0.065671, 0.0063079, 0.039343, 0.0060064, 0.0060064 4 | 2, 0.033178, 0.083219, 0.0090014, 0.84108, 0.84598, 0.90166, 0.68651, 0.029906, 0.08041, 0.0061075, 0.0080226, 0.0080198, 0.0080198 5 | 3, 0.030648, 0.076048, 0.0085772, 0.86409, 0.88081, 0.9291, 0.7734, 0.025174, 0.062281, 0.0055128, 0.00703, 0.00703, 0.00703 6 | 4, 0.029906, 0.073091, 0.0084133, 0.88107, 0.90156, 0.94234, 0.82973, 0.021893, 0.050675, 0.0051649, 0.00703, 0.00703, 0.00703 7 | 5, 0.029118, 0.070105, 0.0082719, 0.88963, 0.91311, 0.94807, 0.85034, 0.020474, 0.046454, 0.0050178, 0.00604, 0.00604, 0.00604 8 | 6, 0.028509, 0.067768, 0.0081674, 0.89407, 0.92049, 0.95193, 0.85956, 0.019993, 0.044841, 0.0049471, 0.00505, 0.00505, 0.00505 9 | 7, 0.027915, 0.065528, 0.0080734, 0.89687, 0.9252, 0.95582, 0.86662, 0.01973, 0.043828, 0.0048969, 0.00406, 0.00406, 0.00406 10 | 8, 0.027328, 0.063248, 0.0079688, 0.90021, 0.92799, 0.95985, 0.87315, 0.019531, 0.042815, 0.004849, 0.00307, 0.00307, 0.00307 11 | 9, 0.026669, 0.060658, 0.0078646, 0.90216, 0.92998, 0.96259, 0.87842, 0.019236, 0.040891, 0.004777, 0.00208, 0.00208, 0.00208 12 | -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/results.png -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/train_batch0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/train_batch0.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/train_batch1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/train_batch1.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/train_batch2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/train_batch2.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/weights/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/train/yolov5s-custom-structure/weights/.DS_Store -------------------------------------------------------------------------------- /yolov5/runs/train/yolov5s-custom-structure/weights/best.pt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:46121ab2f4aba48a7d38624c861658ffeaacd0f305e95efcf66cb017e588b700 3 | size 14371957 4 | -------------------------------------------------------------------------------- /yolov5/runs/val/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/.DS_Store -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection-800/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection-800/F1_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection-800/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection-800/PR_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection-800/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection-800/P_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection-800/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection-800/R_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection-800/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection-800/confusion_matrix.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection-800/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection-800/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection-800/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection-800/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection-800/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection-800/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection-800/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection-800/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection-800/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection-800/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection-800/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection-800/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection/F1_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection/PR_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection/P_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection/R_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection/confusion_matrix.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-detection/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-detection/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-structure/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-structure/F1_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-structure/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-structure/PR_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-structure/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-structure/P_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-structure/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-structure/R_curve.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-structure/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-structure/confusion_matrix.png -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-structure/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-structure/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-structure/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-structure/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-structure/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-structure/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-structure/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-structure/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-structure/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-structure/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov5/runs/val/yolov5s-custom-structure/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov5/runs/val/yolov5s-custom-structure/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov5/train_PubTables-1M_detection.sh: -------------------------------------------------------------------------------- 1 | # python train.py --workers 8 --device 0 --batch-size 64 --data data/custom-detection.yaml --img 640 --weights yolov5s.pt --name yolov5s-custom-detection --epochs 10 2 | 3 | # python val.py --workers 8 --device 0 --batch-size 128 --data data/custom-detection.yaml --img 640 --task test --weights runs/train/yolov5s-custom-detection/weights/best.pt --name yolov5s-custom-detection 4 | 5 | python train.py --workers 8 --device 0 --batch-size 64 --data data/custom-detection.yaml --img 800 --weights yolov5s.pt --name yolov5s-custom-detection-800 --epochs 10 6 | 7 | python val.py --workers 8 --device 0 --batch-size 128 --data data/custom-detection.yaml --img 800 --task test --weights runs/train/yolov5s-custom-detection-800/weights/best.pt --name yolov5s-custom-detection-800 8 | -------------------------------------------------------------------------------- /yolov5/train_PubTables-1M_structure.sh: -------------------------------------------------------------------------------- 1 | python train.py --workers 8 --device 0 --batch-size 64 --data data/custom-structure.yaml --img 640 --weights yolov5s.pt --name yolov5s-custom-structure --epochs 10 2 | 3 | python val.py --workers 8 --device 0 --batch-size 128 --data data/custom-structure.yaml --img 640 --task test --weights runs/train/yolov5s-custom-structure/weights/best.pt --name yolov5s-custom-structure -------------------------------------------------------------------------------- /yolov8/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/data/.DS_Store -------------------------------------------------------------------------------- /yolov8/data/custom-detection.yaml: -------------------------------------------------------------------------------- 1 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 2 | path: /opt/dlami/nvme/table_structure_recognition/data/pubtables-1m/PubTables-1M-Detection-COCO # dataset root dir 3 | train: images/train # train images (relative to 'path') 128 images 4 | val: images/val # val images (relative to 'path') 128 images 5 | test: images/test # test images (optional) 6 | 7 | # Classes 8 | names: 9 | 0: table 10 | 1: table rotated 11 | -------------------------------------------------------------------------------- /yolov8/data/custom-structure-all.yaml: -------------------------------------------------------------------------------- 1 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 2 | path: /opt/dlami/nvme/table_structure_recognition/data/All-Structure-COCO # dataset root dir 3 | train: images/train # train images (relative to 'path') 128 images 4 | val: images/val # val images (relative to 'path') 128 images 5 | test: images/test # test images (optional) 6 | 7 | # Classes 8 | names: 9 | 0: table 10 | 1: table column 11 | 2: table row 12 | 3: table column header 13 | 4: table projected row header 14 | 5: table spanning cell 15 | 6: table grid cell 16 | -------------------------------------------------------------------------------- /yolov8/data/custom-structure-fintabnet.yaml: -------------------------------------------------------------------------------- 1 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 2 | path: /opt/dlami/nvme/table_structure_recognition/data/FinTabNet.c/FinTabNet.c-Structure-COCO # dataset root dir 3 | train: images/train # train images (relative to 'path') 128 images 4 | val: images/val # val images (relative to 'path') 128 images 5 | test: images/test # test images (optional) 6 | 7 | # Classes 8 | names: 9 | 0: table 10 | 1: table column 11 | 2: table row 12 | 3: table column header 13 | 4: table projected row header 14 | 5: table spanning cell 15 | 6: table grid cell 16 | -------------------------------------------------------------------------------- /yolov8/data/custom-structure-icdar2013.yaml: -------------------------------------------------------------------------------- 1 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 2 | path: /opt/dlami/nvme/table_structure_recognition/data/ICDAR-2013.c/ICDAR-2013.c-Structure-COCO # dataset root dir 3 | train: images/train # train images (relative to 'path') 128 images 4 | val: images/val # val images (relative to 'path') 128 images 5 | test: images/test # test images (optional) 6 | 7 | # Classes 8 | names: 9 | 0: table 10 | 1: table column 11 | 2: table row 12 | 3: table column header 13 | 4: table projected row header 14 | 5: table spanning cell 15 | 6: table grid cell 16 | -------------------------------------------------------------------------------- /yolov8/data/custom-structure.yaml: -------------------------------------------------------------------------------- 1 | # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] 2 | path: /opt/dlami/nvme/table_structure_recognition/data/pubtables-1m/PubTables-1M-Structure-COCO # dataset root dir 3 | train: images/train # train images (relative to 'path') 128 images 4 | val: images/val # val images (relative to 'path') 128 images 5 | test: images/test # test images (optional) 6 | 7 | # Classes 8 | names: 9 | 0: table 10 | 1: table column 11 | 2: table row 12 | 3: table column header 13 | 4: table projected row header 14 | 5: table spanning cell 15 | 6: table grid cell 16 | -------------------------------------------------------------------------------- /yolov8/evaluate_ICDAR-2013.c_structure.sh: -------------------------------------------------------------------------------- 1 | pip install ultralytics 2 | yolo detect val split=test data=data/custom-structure-icdar2013.yaml model=runs/detect/yolov8s-custom-structure/weights/best.pt imgsz=1024 device=0 workers=8 batch=16 3 | yolo detect val split=test data=data/custom-structure-icdar2013.yaml model=runs/detect/yolov8s-custom-structure-fintabnet/weights/best.pt imgsz=1024 device=0 workers=8 batch=16 4 | yolo detect val split=test data=data/custom-structure-icdar2013.yaml model=runs/detect/yolov8s-custom-structure-all/weights/best.pt imgsz=1024 device=0 workers=8 batch=16 5 | yolo detect val split=test data=data/custom-structure-icdar2013.yaml model=runs/detect/yolov8x-custom-structure-all/weights/best.pt imgsz=1024 device=0 workers=8 batch=16 -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection-val/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection-val/F1_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection-val/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection-val/PR_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection-val/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection-val/P_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection-val/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection-val/R_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection-val/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection-val/confusion_matrix.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection-val/confusion_matrix_normalized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection-val/confusion_matrix_normalized.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection-val/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection-val/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection-val/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection-val/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/F1_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/PR_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/P_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/R_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/args.yaml: -------------------------------------------------------------------------------- 1 | task: detect 2 | mode: train 3 | model: yolov8s.pt 4 | data: data/custom-detection.yaml 5 | epochs: 10 6 | time: null 7 | patience: 100 8 | batch: 1024 9 | imgsz: 800 10 | save: true 11 | save_period: -1 12 | cache: false 13 | device: 14 | - 0 15 | - 1 16 | - 2 17 | - 3 18 | - 4 19 | - 5 20 | - 6 21 | - 7 22 | workers: 96 23 | project: null 24 | name: yolov8s-custom-detection 25 | exist_ok: false 26 | pretrained: true 27 | optimizer: auto 28 | verbose: true 29 | seed: 0 30 | deterministic: true 31 | single_cls: false 32 | rect: false 33 | cos_lr: false 34 | close_mosaic: 10 35 | resume: false 36 | amp: true 37 | fraction: 1.0 38 | profile: false 39 | freeze: null 40 | multi_scale: false 41 | overlap_mask: true 42 | mask_ratio: 4 43 | dropout: 0.0 44 | val: true 45 | split: val 46 | save_json: false 47 | save_hybrid: false 48 | conf: null 49 | iou: 0.7 50 | max_det: 300 51 | half: false 52 | dnn: false 53 | plots: true 54 | source: null 55 | vid_stride: 1 56 | stream_buffer: false 57 | visualize: false 58 | augment: false 59 | agnostic_nms: false 60 | classes: null 61 | retina_masks: false 62 | embed: null 63 | show: false 64 | save_frames: false 65 | save_txt: false 66 | save_conf: false 67 | save_crop: false 68 | show_labels: true 69 | show_conf: true 70 | show_boxes: true 71 | line_width: null 72 | format: torchscript 73 | keras: false 74 | optimize: false 75 | int8: false 76 | dynamic: false 77 | simplify: false 78 | opset: null 79 | workspace: 4 80 | nms: false 81 | lr0: 0.01 82 | lrf: 0.01 83 | momentum: 0.937 84 | weight_decay: 0.0005 85 | warmup_epochs: 3.0 86 | warmup_momentum: 0.8 87 | warmup_bias_lr: 0.1 88 | box: 7.5 89 | cls: 0.5 90 | dfl: 1.5 91 | pose: 12.0 92 | kobj: 1.0 93 | label_smoothing: 0.0 94 | nbs: 64 95 | hsv_h: 0.015 96 | hsv_s: 0.7 97 | hsv_v: 0.4 98 | degrees: 0.0 99 | translate: 0.1 100 | scale: 0.5 101 | shear: 0.0 102 | perspective: 0.0 103 | flipud: 0.0 104 | fliplr: 0.5 105 | bgr: 0.0 106 | mosaic: 1.0 107 | mixup: 0.0 108 | copy_paste: 0.0 109 | auto_augment: randaugment 110 | erasing: 0.4 111 | crop_fraction: 1.0 112 | cfg: null 113 | tracker: botsort.yaml 114 | save_dir: runs/detect/yolov8s-custom-detection 115 | -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/confusion_matrix.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/confusion_matrix_normalized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/confusion_matrix_normalized.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/labels_correlogram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/labels_correlogram.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/results.csv: -------------------------------------------------------------------------------- 1 | epoch, train/box_loss, train/cls_loss, train/dfl_loss, metrics/precision(B), metrics/recall(B), metrics/mAP50(B), metrics/mAP50-95(B), val/box_loss, val/cls_loss, val/dfl_loss, lr/pg0, lr/pg1, lr/pg2 2 | 1, 0.33575, 0.96464, 0.92713, 0.98728, 0.99414, 0.99428, 0.95451, 0.12897, 0.12808, 0.71976, 0.00023747, 0.00023747, 0.00023747 3 | 2, 0.15357, 0.14805, 0.80883, 0.9855, 0.98612, 0.993, 0.97084, 0.12993, 0.14059, 0.72312, 0.0004284, 0.0004284, 0.0004284 4 | 3, 0.14698, 0.13669, 0.80588, 0.9811, 0.97639, 0.9931, 0.97046, 0.13829, 0.19596, 0.72469, 0.0005722, 0.0005722, 0.0005722 5 | 4, 0.13475, 0.12141, 0.7984, 0.99503, 0.9978, 0.99483, 0.9908, 0.09582, 0.08904, 0.70806, 0.00050194, 0.00050194, 0.00050194 6 | 5, 0.12384, 0.11019, 0.79649, 0.99611, 0.99623, 0.99491, 0.98963, 0.12858, 0.1022, 0.71001, 0.00043126, 0.00043126, 0.00043126 7 | 6, 0.11509, 0.10122, 0.79309, 0.99715, 0.99779, 0.9949, 0.99076, 0.09851, 0.08984, 0.70762, 0.00036057, 0.00036057, 0.00036057 8 | 7, 0.10807, 0.09476, 0.79098, 0.99317, 0.99405, 0.99464, 0.99174, 0.08369, 0.07324, 0.70415, 0.00028988, 0.00028988, 0.00028988 9 | 8, 0.10112, 0.08697, 0.78724, 0.99501, 0.99903, 0.99467, 0.9921, 0.0858, 0.07762, 0.70358, 0.0002192, 0.0002192, 0.0002192 10 | 9, 0.09347, 0.08006, 0.78649, 0.99745, 0.99841, 0.99495, 0.99299, 0.07884, 0.06546, 0.70073, 0.00014851, 0.00014851, 0.00014851 11 | 10, 0.08775, 0.07239, 0.78406, 0.99601, 0.9981, 0.99492, 0.99325, inf, 0.05998, 0.70022, 7.7826e-05, 7.7826e-05, 7.7826e-05 12 | -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/results.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/train_batch0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/train_batch0.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/train_batch1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/train_batch1.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/train_batch2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/train_batch2.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-detection/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/weights/best.pt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6d7c77d9723839f582e02377794dccee2ed745f72e08f4818d1ed5a7f7c3e591 3 | size 22520345 4 | -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-detection/weights/last.pt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:73cae52f24ae70b8380ba6e940f2092f2291b87201a60a3ea9b4341b239e6c62 3 | size 22520345 4 | -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/F1_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/PR_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/P_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/R_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/confusion_matrix.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/confusion_matrix_normalized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/confusion_matrix_normalized.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-icdar2013-val/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/F1_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/PR_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/P_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/R_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/confusion_matrix.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/confusion_matrix_normalized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/confusion_matrix_normalized.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all-val/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/F1_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/PR_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/P_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/R_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/args.yaml: -------------------------------------------------------------------------------- 1 | task: detect 2 | mode: train 3 | model: yolov8s.pt 4 | data: data/custom-structure-all.yaml 5 | epochs: 10 6 | time: null 7 | patience: 100 8 | batch: 768 9 | imgsz: 1024 10 | save: true 11 | save_period: -1 12 | cache: false 13 | device: 14 | - 0 15 | - 1 16 | - 2 17 | - 3 18 | - 4 19 | - 5 20 | - 6 21 | - 7 22 | workers: 96 23 | project: null 24 | name: yolov8s-custom-structure-all 25 | exist_ok: false 26 | pretrained: true 27 | optimizer: auto 28 | verbose: true 29 | seed: 0 30 | deterministic: true 31 | single_cls: false 32 | rect: false 33 | cos_lr: false 34 | close_mosaic: 10 35 | resume: false 36 | amp: true 37 | fraction: 1.0 38 | profile: false 39 | freeze: null 40 | multi_scale: false 41 | overlap_mask: true 42 | mask_ratio: 4 43 | dropout: 0.0 44 | val: true 45 | split: val 46 | save_json: false 47 | save_hybrid: false 48 | conf: null 49 | iou: 0.7 50 | max_det: 300 51 | half: false 52 | dnn: false 53 | plots: true 54 | source: null 55 | vid_stride: 1 56 | stream_buffer: false 57 | visualize: false 58 | augment: false 59 | agnostic_nms: false 60 | classes: null 61 | retina_masks: false 62 | embed: null 63 | show: false 64 | save_frames: false 65 | save_txt: false 66 | save_conf: false 67 | save_crop: false 68 | show_labels: true 69 | show_conf: true 70 | show_boxes: true 71 | line_width: null 72 | format: torchscript 73 | keras: false 74 | optimize: false 75 | int8: false 76 | dynamic: false 77 | simplify: false 78 | opset: null 79 | workspace: 4 80 | nms: false 81 | lr0: 0.01 82 | lrf: 0.01 83 | momentum: 0.937 84 | weight_decay: 0.0005 85 | warmup_epochs: 3.0 86 | warmup_momentum: 0.8 87 | warmup_bias_lr: 0.1 88 | box: 7.5 89 | cls: 0.5 90 | dfl: 1.5 91 | pose: 12.0 92 | kobj: 1.0 93 | label_smoothing: 0.0 94 | nbs: 64 95 | hsv_h: 0.015 96 | hsv_s: 0.7 97 | hsv_v: 0.4 98 | degrees: 0.0 99 | translate: 0.1 100 | scale: 0.5 101 | shear: 0.0 102 | perspective: 0.0 103 | flipud: 0.0 104 | fliplr: 0.5 105 | bgr: 0.0 106 | mosaic: 1.0 107 | mixup: 0.0 108 | copy_paste: 0.0 109 | auto_augment: randaugment 110 | erasing: 0.4 111 | crop_fraction: 1.0 112 | cfg: null 113 | tracker: botsort.yaml 114 | save_dir: runs/detect/yolov8s-custom-structure-all 115 | -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/confusion_matrix.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/confusion_matrix_normalized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/confusion_matrix_normalized.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/labels_correlogram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/labels_correlogram.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/results.csv: -------------------------------------------------------------------------------- 1 | epoch, train/box_loss, train/cls_loss, train/dfl_loss, metrics/precision(B), metrics/recall(B), metrics/mAP50(B), metrics/mAP50-95(B), val/box_loss, val/cls_loss, val/dfl_loss, lr/pg0, lr/pg1, lr/pg2 2 | 1, 1.6507, 1.3107, 1.3009, 0.82564, 0.74994, 0.80431, 0.61503, 1.1117, inf, 0.99263, 0.0033303, 0.0033303, 0.0033303 3 | 2, 1.0613, 0.6721, 0.8716, 0.88484, 0.83503, 0.88114, 0.73071, 0.89974, inf, 0.92233, 0.0060039, 0.0060039, 0.0060039 4 | 3, 0.93686, 0.57192, 0.82294, 0.90481, 0.86186, 0.90803, 0.77581, 0.85373, inf, 0.90302, 0.0080175, 0.0080175, 0.0080175 5 | 4, 0.84143, 0.50421, 0.79702, 0.92118, 0.88422, 0.9304, 0.82235, 0.77148, 0.49503, 0.87393, 0.00703, 0.00703, 0.00703 6 | 5, 0.77593, 0.46317, 0.77982, 0.92895, 0.88972, 0.93696, 0.83769, 0.7377, 0.47648, 0.85885, 0.00604, 0.00604, 0.00604 7 | 6, 0.7376, 0.44006, 0.77022, 0.93475, 0.89944, 0.94639, 0.85363, 0.70795, 0.45352, 0.84985, 0.00505, 0.00505, 0.00505 8 | 7, 0.70342, 0.4209, 0.76208, 0.93999, 0.90821, 0.95206, 0.86547, 0.68255, 0.43528, 0.84599, 0.00406, 0.00406, 0.00406 9 | 8, 0.67003, 0.40169, 0.75468, 0.93695, 0.90993, 0.94929, 0.86872, 0.68181, 0.43141, 0.85709, 0.00307, 0.00307, 0.00307 10 | 9, 0.63742, 0.383, 0.74645, 0.94142, 0.91056, 0.95138, 0.87488, 0.66991, 0.42496, 0.85249, 0.00208, 0.00208, 0.00208 11 | 10, 0.60123, 0.36299, 0.7388, 0.94293, 0.91393, 0.9535, 0.88148, 0.65698, 0.41756, 0.84845, 0.00109, 0.00109, 0.00109 12 | -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/results.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/train_batch0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/train_batch0.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/train_batch1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/train_batch1.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/train_batch2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/train_batch2.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/weights/best.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/weights/best.pt -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-all/weights/last.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-all/weights/last.pt -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/F1_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/PR_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/P_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/R_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/confusion_matrix.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/confusion_matrix_normalized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/confusion_matrix_normalized.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-icdar2013-val/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/F1_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/PR_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/P_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/R_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/confusion_matrix.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/confusion_matrix_normalized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/confusion_matrix_normalized.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure-val/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure-val/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/F1_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/F1_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/PR_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/PR_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/P_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/P_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/R_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/R_curve.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/args.yaml: -------------------------------------------------------------------------------- 1 | task: detect 2 | mode: train 3 | model: yolov8s.pt 4 | data: data/custom-structure.yaml 5 | epochs: 10 6 | time: null 7 | patience: 100 8 | batch: 1024 9 | imgsz: 1024 10 | save: true 11 | save_period: -1 12 | cache: false 13 | device: 14 | - 0 15 | - 1 16 | - 2 17 | - 3 18 | - 4 19 | - 5 20 | - 6 21 | - 7 22 | workers: 96 23 | project: null 24 | name: yolov8s-custom-structure 25 | exist_ok: false 26 | pretrained: true 27 | optimizer: auto 28 | verbose: true 29 | seed: 0 30 | deterministic: true 31 | single_cls: false 32 | rect: false 33 | cos_lr: false 34 | close_mosaic: 10 35 | resume: false 36 | amp: true 37 | fraction: 1.0 38 | profile: false 39 | freeze: null 40 | multi_scale: false 41 | overlap_mask: true 42 | mask_ratio: 4 43 | dropout: 0.0 44 | val: true 45 | split: val 46 | save_json: false 47 | save_hybrid: false 48 | conf: null 49 | iou: 0.7 50 | max_det: 300 51 | half: false 52 | dnn: false 53 | plots: true 54 | source: null 55 | vid_stride: 1 56 | stream_buffer: false 57 | visualize: false 58 | augment: false 59 | agnostic_nms: false 60 | classes: null 61 | retina_masks: false 62 | embed: null 63 | show: false 64 | save_frames: false 65 | save_txt: false 66 | save_conf: false 67 | save_crop: false 68 | show_labels: true 69 | show_conf: true 70 | show_boxes: true 71 | line_width: null 72 | format: torchscript 73 | keras: false 74 | optimize: false 75 | int8: false 76 | dynamic: false 77 | simplify: false 78 | opset: null 79 | workspace: 4 80 | nms: false 81 | lr0: 0.01 82 | lrf: 0.01 83 | momentum: 0.937 84 | weight_decay: 0.0005 85 | warmup_epochs: 3.0 86 | warmup_momentum: 0.8 87 | warmup_bias_lr: 0.1 88 | box: 7.5 89 | cls: 0.5 90 | dfl: 1.5 91 | pose: 12.0 92 | kobj: 1.0 93 | label_smoothing: 0.0 94 | nbs: 64 95 | hsv_h: 0.015 96 | hsv_s: 0.7 97 | hsv_v: 0.4 98 | degrees: 0.0 99 | translate: 0.1 100 | scale: 0.5 101 | shear: 0.0 102 | perspective: 0.0 103 | flipud: 0.0 104 | fliplr: 0.5 105 | bgr: 0.0 106 | mosaic: 1.0 107 | mixup: 0.0 108 | copy_paste: 0.0 109 | auto_augment: randaugment 110 | erasing: 0.4 111 | crop_fraction: 1.0 112 | cfg: null 113 | tracker: botsort.yaml 114 | save_dir: runs/detect/yolov8s-custom-structure 115 | -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/confusion_matrix.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/confusion_matrix_normalized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/confusion_matrix_normalized.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/labels_correlogram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/labels_correlogram.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/results.csv: -------------------------------------------------------------------------------- 1 | epoch, train/box_loss, train/cls_loss, train/dfl_loss, metrics/precision(B), metrics/recall(B), metrics/mAP50(B), metrics/mAP50-95(B), val/box_loss, val/cls_loss, val/dfl_loss, lr/pg0, lr/pg1, lr/pg2 2 | 1, 1.2677, 1.062, 1.1772, 0.86327, 0.84227, 0.89097, 0.72813, 0.85553, inf, 0.83748, 0.00023768, 0.00023768, 0.00023768 3 | 2, 0.76484, 0.50933, 0.80793, 0.91705, 0.90509, 0.93859, 0.83215, 0.69169, inf, 0.78, 0.00042859, 0.00042859, 0.00042859 4 | 3, 0.71239, 0.45963, 0.78077, 0.92277, 0.90842, 0.94335, 0.85251, 0.64737, inf, 0.76241, 0.00057237, 0.00057237, 0.00057237 5 | 4, 0.67612, 0.42939, 0.76617, 0.91471, 0.92057, 0.95032, 0.86217, 0.63002, inf, 0.76165, 0.00050194, 0.00050194, 0.00050194 6 | 5, 0.65274, 0.41024, 0.7576, 0.93619, 0.93024, 0.96174, 0.88396, 0.59925, inf, 0.74787, 0.00043126, 0.00043126, 0.00043126 7 | 6, 0.62962, 0.39683, 0.7526, 0.94183, 0.93851, 0.96774, 0.8956, 0.58625, 0.37494, 0.7384, 0.00036057, 0.00036057, 0.00036057 8 | 7, 0.60772, 0.37815, 0.74356, 0.94229, 0.94117, 0.96919, 0.89984, 0.57431, 0.36912, 0.73603, 0.00028988, 0.00028988, 0.00028988 9 | 8, 0.59516, 0.36987, 0.74112, 0.94461, 0.9465, 0.97267, 0.90778, 0.55657, 0.35717, 0.73256, 0.0002192, 0.0002192, 0.0002192 10 | 9, 0.57995, 0.35973, 0.73652, 0.94798, 0.94913, 0.97432, 0.91112, 0.55281, 0.35269, 0.73065, 0.00014851, 0.00014851, 0.00014851 11 | 10, 0.56609, 0.34963, 0.73254, 0.95036, 0.95006, 0.97564, 0.91509, 0.54408, 0.34702, 0.72846, 7.7826e-05, 7.7826e-05, 7.7826e-05 12 | -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/results.png -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/train_batch0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/train_batch0.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/train_batch1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/train_batch1.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/train_batch2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/train_batch2.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/val_batch0_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/val_batch0_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/val_batch0_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/val_batch0_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/val_batch1_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/val_batch1_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/val_batch1_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/val_batch1_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/val_batch2_labels.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/val_batch2_labels.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/val_batch2_pred.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/yolov8/runs/detect/yolov8s-custom-structure/val_batch2_pred.jpg -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/weights/best.pt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a556a37e038e8f5aaf63f7d2cf9569cacb6a4c8c8a9409671fb761a241188ea3 3 | size 22531673 4 | -------------------------------------------------------------------------------- /yolov8/runs/detect/yolov8s-custom-structure/weights/last.pt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f9ae8889bd1fa7949f7444a7306433e7a183539f2d58c3e99ebbceca74f9eef1 3 | size 22531673 4 | -------------------------------------------------------------------------------- /yolov8/train_FinTabNet.c_structure.sh: -------------------------------------------------------------------------------- 1 | pip install ultralytics 2 | yolo detect train data=data/custom-structure-fintabnet.yaml model=yolov8s.pt name=yolov8s-custom-structure-fintabnet imgsz=1024 epochs=10 device=0,1,2,3,4,5,6,7 workers=92 batch=1024 3 | yolo detect val split=test data=data/custom-structure-fintabnet.yaml model=runs/detect/yolov8s-custom-structure-fintabnet/weights/best.pt imgsz=1024 device=0 workers=8 batch=512 -------------------------------------------------------------------------------- /yolov8/train_PubTables-1M_detection.sh: -------------------------------------------------------------------------------- 1 | pip install ultralytics 2 | yolo detect train data=data/custom-detection.yaml model=yolov8s.pt name=yolov8s-custom-detection imgsz=800 epochs=10 device=0,1,2,3,4,5,6,7 workers=96 batch=1024 3 | yolo detect val split=test data=data/custom-detection.yaml model=runs/detect/yolov8s-custom-detection/weights/best.pt imgsz=800 device=0 workers=96 batch=1024 -------------------------------------------------------------------------------- /yolov8/train_PubTables-1M_structure.sh: -------------------------------------------------------------------------------- 1 | pip install ultralytics 2 | yolo detect train data=data/custom-structure.yaml model=yolov8s.pt name=yolov8s-custom-structure imgsz=1024 epochs=10 device=0,1,2,3,4,5,6,7 workers=96 batch=1024 3 | yolo detect val split=test data=data/custom-structure.yaml model=runs/detect/yolov8s-custom-structure/weights/best.pt imgsz=1024 device=0 workers=96 batch=512 -------------------------------------------------------------------------------- /yolov8/train_all_structure.sh: -------------------------------------------------------------------------------- 1 | pip install ultralytics 2 | yolo detect train data=data/custom-structure-all.yaml model=yolov8s.pt name=yolov8s-custom-structure-all imgsz=1024 epochs=10 device=0,1,2,3,4,5,6,7 workers=96 batch=768 3 | yolo detect val split=test data=data/custom-structure-all.yaml model=runs/detect/yolov8s-custom-structure-all/weights/best.pt imgsz=1024 device=0 workers=96 batch=512 4 | 5 | # yolo detect train data=data/custom-structure-all.yaml model=yolov8x.pt name=yolov8x-custom-structure-all imgsz=1024 epochs=10 device=0,1,2,3,4,5,6,7 workers=32 batch=128 6 | -------------------------------------------------------------------------------- /zh_val_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whn09/table_structure_recognition/be3a6bf7bb3d50bb30393afb6f0c128dd2449f7c/zh_val_0.jpg --------------------------------------------------------------------------------