├── .gitignore ├── LICENSE ├── README.md ├── alphabet ├── fintabnet │ ├── character_alphabet.txt │ └── structure_alphabet.txt └── pubtabnet │ ├── character_alphabet.txt │ └── structure_alphabet.txt ├── configs ├── fintabnet.py ├── pubtab250.py ├── pubtabfin.py └── pubtabnet.py ├── datasets ├── FinTabNet.yaml ├── FinTabSub.yaml ├── PubTab250.yaml ├── PubTabNet.yaml └── PubTabSub.yaml ├── mutab ├── __init__.py ├── apis │ ├── __init__.py │ ├── test.py │ └── train.py ├── datasets │ ├── __init__.py │ ├── dataset.py │ ├── loader.py │ └── pipeline.py ├── metrics │ ├── __init__.py │ ├── metric.py │ ├── sample_pred.json │ ├── sample_real.json │ └── sample_test.json ├── models │ ├── __init__.py │ ├── backbone.py │ ├── decoder.py │ ├── encoder.py │ ├── factory.py │ ├── handler.py │ ├── loss.py │ ├── revisor.py │ └── scanner.py ├── optimizer │ ├── __init__.py │ └── factory.py └── utils │ ├── __init__.py │ └── utils.py ├── pyproject.toml ├── test.py ├── train.py └── train.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | __pycache__/ 3 | *.egg-info/ 4 | *.egg 5 | *.pth 6 | *.xz 7 | build/ 8 | !.gitignore 9 | !.github/ 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 JG1VPP 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MuTabNet 2 | 3 | End-to-End table OCR model using a hierarchical Transformer that outputs HTML tags and cell contents. 4 | 5 | ## Usage 6 | 7 | ### Install 8 | 9 | ```sh 10 | pip install -e . 11 | ``` 12 | 13 | ### Datasets 14 | 15 | Download the following datasets: 16 | 17 | - [FinTabNet](https://developer.ibm.com/data/fintabnet) 18 | - [PubTabNet](https://developer.ibm.com/exchanges/data/all/pubtabnet) 19 | - [ICDAR Task-B Test Data](https://github.com/ajjimeno/icdar-task-b) 20 | 21 | ### Preprocess 22 | 23 | Follow [MTL-TabNet instructions](https://github.com/namtuanly/MTL-TabNet#data-preprocess). 24 | The datasets must be placed in `data` directory as follows: 25 | 26 | ```sh 27 | $ ls ~/data 28 | fintabnet/ 29 | img_tables/ 30 | train/ 31 | 100000_61623.png 32 | 100001_61624.png 33 | 100002_61625.png 34 | 100003_61626.png 35 | 100004_61627.png 36 | val/ 37 | ground_truth_fintabnet.json 38 | ground_truth_pubtabnet.json 39 | icdar-task-b/ 40 | final_eval/ 41 | 000221630ba33f9118f2671a715d6962e08d6b76a5a0c77a9fe26c291df763b0.png 42 | 0005e8fe1b3ba14982336837219f285921af7c152cfc81ac88bcf52809299279.png 43 | 002b1bf2bbb7dd7ec6201174e68df6346f448cd3951e861c3f940711c769f25f.png 44 | 002bfeebe20be2e97fab46b99ce68321afb8972f6d8f131f0c1f5392819d3a23.png 45 | 002c7215e95cd4bfebffb13dc0db32ab229a6674f4f1add84518ae52b75ac0da.png 46 | final_eval.json 47 | mmocr_fintabnet/ 48 | train/ 49 | 100000_61623.txt 50 | 100001_61624.txt 51 | 100002_61625.txt 52 | 100003_61626.txt 53 | 100004_61627.txt 54 | val/ 55 | mmocr_pubtabnet/ 56 | train/ 57 | PMC1064074_007_00.txt 58 | PMC1064076_003_00.txt 59 | PMC1064076_004_00.txt 60 | PMC1064080_002_00.txt 61 | PMC1064094_007_00.txt 62 | val/ 63 | pubtabnet/ 64 | PubTabNet_2.0.0.jsonl 65 | train/ 66 | PMC1064074_007_00.png 67 | PMC1064076_003_00.png 68 | PMC1064076_004_00.png 69 | PMC1064080_002_00.png 70 | PMC1064094_007_00.png 71 | val/ 72 | ``` 73 | 74 | ### Training 75 | 76 | Run the following command to start training using four GPUs: 77 | 78 | ```sh 79 | name=pubtab250 80 | save=~/work/$name 81 | 82 | CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./train.sh ./configs/$name.py $save 4 83 | ``` 84 | 85 | ### Evaluation 86 | 87 | Run the following command to evaluate the model and calculate TEDS score: 88 | 89 | ```sh 90 | path=~/data/icdar-task-b/final_eval 91 | json=~/data/icdar-task-b/final_eval.json 92 | 93 | python test.py --conf ./configs/$name.py --ckpt $save/latest.pth --path $path --json $json 94 | ``` 95 | 96 | For FinTabNet, we use validation set including 10,656 tables as test set in imitation of the previous work. 97 | 98 | ## Requirements 99 | 100 | We recommend that you use at least four V100 32GB GPUs or two A100 80GB GPU. 101 | 102 | ## License 103 | 104 | This project is licensed under the MIT License. 105 | See LICENSE for more details. 106 | 107 | ## Citation 108 | 109 | ```latex 110 | @inproceedings{ICDAR24KAT, 111 | author={Takaya Kawakatsu}, 112 | title={Multi-Cell Decoder and Mutual Learning for Table Structure and Character Recognition}, 113 | booktitle={Document Analysis and Recognition - ICDAR 2024}, 114 | publisher={Springer Nature Switzerland}, 115 | year={2024}, 116 | pages={389--405}, 117 | } 118 | ``` 119 | -------------------------------------------------------------------------------- /alphabet/fintabnet/character_alphabet.txt: -------------------------------------------------------------------------------- 1 | L 2 | o 3 | c 4 | a 5 | t 6 | i 7 | n 8 | 9 | C 10 | u 11 | r 12 | y 13 | S 14 | e 15 | q 16 | F 17 | ( 18 | h 19 | s 20 | d 21 | ) 22 | 1 23 | , 24 | 0 25 | 9 26 | 8 27 | 5 28 | M 29 | 6 30 | P 31 | R 32 | 3 33 | x 34 | 7 35 | 2 36 | l 37 | f 38 | 4 39 | I 40 | T 41 | D 42 | m 43 | p 44 | b 45 | A 46 | z 47 | w 48 | / 49 | . 50 | $ 51 | & 52 | H 53 | E 54 | Y 55 | 56 | 57 | O 58 | g 59 | : 60 | N 61 | v 62 | - 63 | — 64 | G 65 | % 66 | B 67 | W 68 | k 69 | ’ 70 | U 71 | V 72 | j 73 | J 74 | K 75 | – 76 | Q 77 | * 78 | ' 79 | + 80 | X 81 | ; 82 | “ 83 | ” 84 | Z 85 | ≥ 86 | < 87 | = 88 | # 89 | 90 | 91 | 92 | 93 | " 94 | ¢ 95 | ó 96 | [ 97 | ] 98 | í 99 | á 100 | ¨ 101 | ™ 102 | ¥ 103 | ` 104 | ö 105 | ü 106 | † 107 | é 108 | ¸ 109 | ý 110 | ‑ 111 | ® 112 | þ 113 | § 114 | ‘ 115 | ñ 116 | ½ 117 | ! 118 | > 119 | ⁄ 120 | © 121 | ☑ 122 | ☐ 123 | ? 124 | € 125 | £ 126 | ‡ 127 | ⅞ 128 | ☒ 129 | ē 130 | − 131 | _ 132 | ¼ 133 | ¾ 134 | @ 135 | À 136 | à 137 | ~ 138 | \ 139 | } 140 | ● 141 | · 142 | ä 143 | ¤ 144 | • 145 | ç 146 | ã 147 | √ 148 |   149 | Ÿ 150 | ú 151 | ˆ 152 | ≤ 153 | ï 154 | ­ 155 | 156 | … 157 | ê 158 | ô 159 | ― 160 | ^ 161 | İ 162 | Ş 163 | è 164 | ² 165 | č 166 | ë 167 | ∙ 168 | È 169 |  170 | ³ 171 | ø 172 | å 173 | ¹ 174 | ō 175 | × 176 | -------------------------------------------------------------------------------- /alphabet/fintabnet/structure_alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 | 5 | 7 | 8 | 9 | colspan="10" 10 | colspan="11" 11 | colspan="12" 12 | colspan="13" 13 | colspan="14" 14 | colspan="15" 15 | colspan="16" 16 | colspan="18" 17 | colspan="19" 18 | colspan="2" 19 | colspan="25" 20 | colspan="3" 21 | colspan="4" 22 | colspan="5" 23 | colspan="6" 24 | colspan="7" 25 | colspan="8" 26 | colspan="9" 27 | 28 | rowspan="10" 29 | rowspan="13" 30 | rowspan="15" 31 | rowspan="16" 32 | rowspan="2" 33 | rowspan="3" 34 | rowspan="4" 35 | rowspan="5" 36 | rowspan="6" 37 | rowspan="7" 38 | rowspan="8" 39 | rowspan="9" 40 | -------------------------------------------------------------------------------- /alphabet/pubtabnet/character_alphabet.txt: -------------------------------------------------------------------------------- 1 | V 2 | a 3 | r 4 | i 5 | b 6 | l 7 | e 8 | H 9 | z 10 | d 11 | 12 | t 13 | o 14 | 9 15 | 5 16 | % 17 | C 18 | I 19 | 20 | p 21 | 22 | v 23 | u 24 | * 25 | A 26 | g 27 | ( 28 | m 29 | n 30 | ) 31 | 0 32 | . 33 | 7 34 | 1 35 | 6 36 | ≤ 37 | > 38 | 8 39 | 3 40 | – 41 | 2 42 | G 43 | 4 44 | M 45 | F 46 | T 47 | y 48 | f 49 | s 50 | L 51 | w 52 | c 53 | U 54 | h 55 | D 56 | S 57 | Q 58 | R 59 | x 60 | P 61 | - 62 | E 63 | O 64 | / 65 | k 66 | , 67 | + 68 | N 69 | K 70 | q 71 | ′ 72 | [ 73 | ] 74 | < 75 | ≥ 76 | 77 | − 78 | 79 | μ 80 | ± 81 | J 82 | j 83 | W 84 | _ 85 | Δ 86 | B 87 | “ 88 | : 89 | Y 90 | α 91 | λ 92 | ; 93 | 94 | 95 | ? 96 | ∼ 97 | 98 | 99 | = 100 | ° 101 | # 102 | ̊ 103 | ̈ 104 | ̂ 105 | ’ 106 | Z 107 | X 108 | ∗ 109 | — 110 | β 111 | ' 112 | † 113 | ~ 114 | @ 115 | " 116 | γ 117 | ↓ 118 | ↑ 119 | & 120 | ‡ 121 | χ 122 | ” 123 | σ 124 | § 125 | | 126 | ¶ 127 | ‐ 128 | × 129 | $ 130 | → 131 | √ 132 | ✓ 133 | ‘ 134 | \ 135 | ∞ 136 | π 137 | • 138 | ® 139 | ^ 140 | ∆ 141 | ≧ 142 | 143 | 144 | ́ 145 | ♀ 146 | ♂ 147 | ‒ 148 | ⁎ 149 | ▲ 150 | · 151 | £ 152 | φ 153 | Ψ 154 | ß 155 | △ 156 | ☆ 157 | ▪ 158 | η 159 | € 160 | ∧ 161 | ̃ 162 | Φ 163 | ρ 164 | ̄ 165 | δ 166 | ‰ 167 | ̧ 168 | Ω 169 | ♦ 170 | { 171 | } 172 | ̀ 173 | ∑ 174 | ∫ 175 | ø 176 | κ 177 | ε 178 | ¥ 179 | ※ 180 | ` 181 | ω 182 | Σ 183 | ➔ 184 | ‖ 185 | Β 186 | ̸ 187 | ─ 188 | ● 189 | ⩾ 190 | Χ 191 | Α 192 | ⋅ 193 | ◆ 194 | ★ 195 | ■ 196 | ψ 197 | ǂ 198 | □ 199 | ζ 200 | ! 201 | Γ 202 | ↔ 203 | θ 204 | ⁄ 205 | 〈 206 | 〉 207 | ― 208 | υ 209 | 
 210 | τ 211 | ⋆ 212 | Ø 213 | © 214 | ∥ 215 | С 216 | ˂ 217 | ➢ 218 | ɛ 219 | ⁡ 220 | ✗ 221 | ← 222 | ○ 223 | ¢ 224 | ⩽ 225 | ∖ 226 | ˃ 227 | ­ 228 | ≈ 229 | Π 230 | ̌ 231 | ≦ 232 | ∅ 233 | ᅟ 234 | 235 | 236 | ∣ 237 | ¤ 238 | ♯ 239 | ̆ 240 | ξ 241 | ÷ 242 | ▼ 243 |  244 | ι 245 | ν 246 | ║ 247 | 248 | 249 | ◦ 250 | ​ 251 | ◊ 252 | ∙ 253 | « 254 | » 255 | ł 256 | ı 257 | Θ 258 | ∈ 259 | „ 260 | ∘ 261 | ✔ 262 | ̇ 263 | æ 264 | ʹ 265 | ˆ 266 | ♣ 267 | ⇓ 268 | ∩ 269 | ⊕ 270 | ⇒ 271 | ⇑ 272 | ̨ 273 | Ι 274 | Λ 275 | ⋯ 276 | А 277 | ⋮ 278 | -------------------------------------------------------------------------------- /alphabet/pubtabnet/structure_alphabet.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | colspan="2" 23 | colspan="3" 24 | colspan="4" 25 | colspan="5" 26 | colspan="6" 27 | colspan="7" 28 | colspan="8" 29 | colspan="9" 30 | colspan="10" 31 | rowspan="2" 32 | rowspan="3" 33 | rowspan="4" 34 | rowspan="5" 35 | rowspan="6" 36 | rowspan="7" 37 | rowspan="8" 38 | rowspan="9" 39 | rowspan="10" 40 | -------------------------------------------------------------------------------- /configs/fintabnet.py: -------------------------------------------------------------------------------- 1 | _base_ = "pubtabnet.py" 2 | 3 | 4 | model = dict( 5 | handler=dict( 6 | html_dict_file="alphabet/fintabnet/structure_alphabet.txt", 7 | cell_dict_file="alphabet/fintabnet/character_alphabet.txt", 8 | ) 9 | ) 10 | 11 | train_pipeline = [ 12 | dict(type="LoadImageFromFile"), 13 | dict(type="TableResize", size=520), 14 | dict( 15 | type="TablePad", 16 | size=(520, 520), 17 | ), 18 | dict(type="TableBboxFlip"), 19 | dict(type="TableBboxEncode"), 20 | dict(type="ToTensorOCR"), 21 | dict( 22 | type="NormalizeOCR", 23 | mean=[0.5, 0.5, 0.5], 24 | std=[0.5, 0.5, 0.5], 25 | ), 26 | dict( 27 | type="Collect", 28 | keys=["img"], 29 | meta_keys=[ 30 | "filename", 31 | "ori_shape", 32 | "img_shape", 33 | "pad_shape", 34 | "img_scale", 35 | "html", 36 | "cell", 37 | "bbox", 38 | ], 39 | ), 40 | ] 41 | 42 | data = dict( 43 | train=dict( 44 | img_prefix="../data/fintabnet/img_tables/train/", 45 | ann_file="../data/mmocr_fintabnet/train/", 46 | pipeline=train_pipeline, 47 | ), 48 | val=dict( 49 | img_prefix="../data/fintabnet/img_tables/test/", 50 | ann_file="../data/mmocr_fintabsub/test/", 51 | pipeline=train_pipeline, 52 | ), 53 | test=dict( 54 | img_prefix="../data/fintabnet/img_tables/val/", 55 | ann_file="../data/mmocr_fintabsub/val/", 56 | ), 57 | ) 58 | -------------------------------------------------------------------------------- /configs/pubtab250.py: -------------------------------------------------------------------------------- 1 | _base_ = "pubtabnet.py" 2 | 3 | 4 | data = dict(train=dict(ann_file="../data/mmocr_pubtab250/train/")) 5 | -------------------------------------------------------------------------------- /configs/pubtabfin.py: -------------------------------------------------------------------------------- 1 | _base_ = "pubtabnet.py" 2 | 3 | 4 | template = "{}
" 5 | 6 | model = dict(handler=dict(revisor=dict(template=template))) 7 | 8 | ignore = ["b"] # in all elements 9 | -------------------------------------------------------------------------------- /configs/pubtabnet.py: -------------------------------------------------------------------------------- 1 | max_len_html = 800 2 | max_len_cell = 8000 3 | 4 | seed = None 5 | 6 | eb_tokens = { 7 | "": "", 8 | "": " ", 9 | "": " ", 10 | "": "\u2028\u2028", 11 | "": " ", 12 | "": "", 13 | "": " ", 14 | "": "", 15 | "": " ", 16 | "": "", 17 | "": " \u2028 \u2028 ", 18 | } 19 | 20 | revisions = { 21 | "^.*$": eb_tokens, 22 | "(.*?)": { 23 | r'()(.*?)': r"\g<1>\g<4>", 24 | "": "", 25 | "": "", 26 | "": "", 27 | }, 28 | } 29 | 30 | cell_tokens = ["", "", "", ""], 99 | revisor=dict( 100 | template="{}", 101 | patterns=revisions, 102 | ), 103 | ), 104 | ) 105 | 106 | train_pipeline = [ 107 | dict(type="LoadImageFromFile"), 108 | dict(type="TableResize", size=520), 109 | dict( 110 | type="TablePad", 111 | size=(520, 520), 112 | ), 113 | dict(type="TableBboxEncode"), 114 | dict(type="ToTensorOCR"), 115 | dict( 116 | type="NormalizeOCR", 117 | mean=[0.5, 0.5, 0.5], 118 | std=[0.5, 0.5, 0.5], 119 | ), 120 | dict( 121 | type="Collect", 122 | keys=["img"], 123 | meta_keys=[ 124 | "filename", 125 | "ori_shape", 126 | "img_shape", 127 | "pad_shape", 128 | "img_scale", 129 | "html", 130 | "cell", 131 | "bbox", 132 | ], 133 | ), 134 | ] 135 | 136 | test_pipeline = [ 137 | dict(type="LoadImageFromFile"), 138 | dict(type="TableResize", size=520), 139 | dict( 140 | type="TablePad", 141 | size=(520, 520), 142 | ), 143 | dict(type="ToTensorOCR"), 144 | dict( 145 | type="NormalizeOCR", 146 | mean=[0.5, 0.5, 0.5], 147 | std=[0.5, 0.5, 0.5], 148 | ), 149 | dict( 150 | type="Collect", 151 | keys=["img"], 152 | meta_keys=[ 153 | "filename", 154 | "ori_shape", 155 | "img_shape", 156 | "pad_shape", 157 | "img_scale", 158 | ], 159 | ), 160 | ] 161 | 162 | loader = dict( 163 | type="TableHardDiskLoader", 164 | max_len_html=max_len_html, 165 | parser=dict( 166 | type="TableStrParser", 167 | cell_tokens=cell_tokens, 168 | ), 169 | ) 170 | 171 | data = dict( 172 | samples_per_gpu=2, 173 | workers_per_gpu=2, 174 | train=dict( 175 | type="TableDataset", 176 | img_prefix="../data/pubtabnet/train/", 177 | ann_file="../data/mmocr_pubtabnet/train/", 178 | pipeline=train_pipeline, 179 | loader=loader, 180 | test_mode=False, 181 | ), 182 | val=dict( 183 | type="TableDataset", 184 | img_prefix="../data/pubtabnet/val/", 185 | ann_file="../data/mmocr_pubtabsub/val/", 186 | pipeline=train_pipeline, 187 | loader=loader, 188 | test_mode=True, 189 | ), 190 | test=dict( 191 | type="TableDataset", 192 | img_prefix="../data/pubtabnet/val/", 193 | ann_file="../data/mmocr_pubtabsub/val/", 194 | pipeline=test_pipeline, 195 | loader=loader, 196 | test_mode=True, 197 | ), 198 | ) 199 | 200 | # optimizer 201 | optimizer = dict(type="Ranger", lr=1e-3) 202 | optimizer_config = dict(grad_clip=dict(max_norm=30, norm_type=2)) 203 | 204 | # learning policy 205 | lr_config = dict( 206 | policy="step", 207 | warmup="linear", 208 | warmup_iters=50, 209 | warmup_ratio=1.0 / 3, 210 | step=[25, 28], 211 | ) 212 | 213 | # runner 214 | runner = dict(type="EpochBasedRunner", max_epochs=30) 215 | 216 | # evaluation 217 | ignore = None 218 | evaluation = dict(interval=1, metric="acc") 219 | 220 | # fp16 221 | fp16 = dict(loss_scale="dynamic") 222 | 223 | # checkpoint setting 224 | checkpoint_config = dict(interval=1) 225 | 226 | # log_config 227 | log_config = dict(interval=100, hooks=[dict(type="TextLoggerHook")]) 228 | 229 | # logger 230 | log_level = "INFO" 231 | 232 | # yapf:enable 233 | dist_params = dict(backend="nccl") 234 | 235 | # pretrained 236 | load_from = None 237 | resume_from = None 238 | 239 | # workflow 240 | workflow = [("train", 1)] 241 | -------------------------------------------------------------------------------- /datasets/FinTabNet.yaml: -------------------------------------------------------------------------------- 1 | type: FinTabNet 2 | load: 3 | dir: ~/data/fintabnet/img_tables/ 4 | jsonl: 5 | - ~/data/fintabnet/img_tables/FinTabNet_1.0.0_table_train.jsonl 6 | - ~/data/fintabnet/img_tables/FinTabNet_1.0.0_table_val.jsonl 7 | - ~/data/fintabnet/img_tables/FinTabNet_1.0.0_table_test.jsonl 8 | dump: 9 | dir: ~/data/mmocr_fintabnet/ 10 | json: ~/data/ground_truth_fintabnet.json 11 | split: val 12 | replace: 13 | []: "" 14 | [' ']: "" 15 | ['', ' ', '']: "" 16 | ['', ' ', '']: "" 17 | ['', ' ', '']: "" 18 | ['', '']: "" 19 | ['', '']: "" 20 | [' ', '', '']: "" 21 | [' ', '', '']: "" 22 | [' ', ' ']: "" 23 | [' ', '', '', '', '', ' ', '', '']: "" 24 | [' ', '', '', ' ']: "" 25 | [' ', '', '', '', '', '', ' ', '', ' ']: "" 26 | ['', '', '', '']: "" 27 | [' ', ' ', ' ']: "" 28 | [' ', ' ', '', '', '', '', ' ']: "" 29 | ['', '', '', '']: "" 30 | ['', '', '', '']: "" 31 | [' ', '', '', '', '', '', '']: "" 32 | [' ', '', ' ', '']: "" 33 | ['', ' ', '', '', ' ', '']: "" 34 | ['', ' ', '', '', '']: "" 35 | ['', '', '', '']: "" 36 | [' ', ' ', ' ', ' ', '', '', ' ']: "" 37 | ['', '', '', '', '', '', '', '']: "" 38 | [' ', ' ', ' ', ' ']: "" 39 | [' ', ' ', ' ', ' ', ' ']: "" 40 | [' ', ' ', ' ', ' ', ' ', ' ']: "" 41 | ['', '', ' ', '', '', '', '']: "" 42 | [' ', ' ', '', '']: "" 43 | [' ', '', '', '', '', '', ' ', ' ', '', ' ']: "" 44 | [' ', '', '', '', '']: "" 45 | ['', '', '', '', '', '']: "" 46 | [' ', ' ', ' ', '', '']: "" 47 | [' ', ' ', ' ', '', '', ' ']: "" 48 | [' ', '', '', ' ', ' ', '', '']: "" 49 | ['', '', ' ', '', '']: "" 50 | [' ', ' ', ' ', ' ', '', '']: "" 51 | [' ', ' ', ' ', '', '']: "" 52 | [' ', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']: "" 53 | ['', '', '', '', '', '', '', '', '', '']: "" 54 | [' ', '', ' ', '']: "" 55 | ['', '', '', '', '', '', ' ']: "" 56 | ['', '', '', '', ' ']: "" 57 | ['', '', ' ']: "" 58 | [' ', ' ', '', '', ' ']: "" 59 | [' ', ' ', '', '']: "" 60 | -------------------------------------------------------------------------------- /datasets/FinTabSub.yaml: -------------------------------------------------------------------------------- 1 | type: FinTabNet 2 | load: 3 | dir: ~/data/fintabnet/img_tables/ 4 | jsonl: 5 | - ~/data/fintabnet/img_tables/FinTabNet_1.0.0_table_train.jsonl 6 | - ~/data/fintabnet/img_tables/FinTabNet_1.0.0_table_val.jsonl 7 | - ~/data/fintabnet/img_tables/FinTabNet_1.0.0_table_test.jsonl 8 | dump: 9 | dir: ~/data/mmocr_fintabsub/ 10 | json: ~/data/ground_truth_fintabsub.json 11 | split: val 12 | samples: 64 13 | replace: 14 | []: "" 15 | [' ']: "" 16 | ['', ' ', '']: "" 17 | ['', ' ', '']: "" 18 | ['', ' ', '']: "" 19 | ['', '']: "" 20 | ['', '']: "" 21 | [' ', '', '']: "" 22 | [' ', '', '']: "" 23 | [' ', ' ']: "" 24 | [' ', '', '', '', '', ' ', '', '']: "" 25 | [' ', '', '', ' ']: "" 26 | [' ', '', '', '', '', '', ' ', '', ' ']: "" 27 | ['', '', '', '']: "" 28 | [' ', ' ', ' ']: "" 29 | [' ', ' ', '', '', '', '', ' ']: "" 30 | ['', '', '', '']: "" 31 | ['', '', '', '']: "" 32 | [' ', '', '', '', '', '', '']: "" 33 | [' ', '', ' ', '']: "" 34 | ['', ' ', '', '', ' ', '']: "" 35 | ['', ' ', '', '', '']: "" 36 | ['', '', '', '']: "" 37 | [' ', ' ', ' ', ' ', '', '', ' ']: "" 38 | ['', '', '', '', '', '', '', '']: "" 39 | [' ', ' ', ' ', ' ']: "" 40 | [' ', ' ', ' ', ' ', ' ']: "" 41 | [' ', ' ', ' ', ' ', ' ', ' ']: "" 42 | ['', '', ' ', '', '', '', '']: "" 43 | [' ', ' ', '', '']: "" 44 | [' ', '', '', '', '', '', ' ', ' ', '', ' ']: "" 45 | [' ', '', '', '', '']: "" 46 | ['', '', '', '', '', '']: "" 47 | [' ', ' ', ' ', '', '']: "" 48 | [' ', ' ', ' ', '', '', ' ']: "" 49 | [' ', '', '', ' ', ' ', '', '']: "" 50 | ['', '', ' ', '', '']: "" 51 | [' ', ' ', ' ', ' ', '', '']: "" 52 | [' ', ' ', ' ', '', '']: "" 53 | [' ', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']: "" 54 | ['', '', '', '', '', '', '', '', '', '']: "" 55 | [' ', '', ' ', '']: "" 56 | ['', '', '', '', '', '', ' ']: "" 57 | ['', '', '', '', ' ']: "" 58 | ['', '', ' ']: "" 59 | [' ', ' ', '', '', ' ']: "" 60 | [' ', ' ', '', '']: "" 61 | -------------------------------------------------------------------------------- /datasets/PubTab250.yaml: -------------------------------------------------------------------------------- 1 | type: PubTabNet 2 | load: 3 | dir: ~/data/pubtabnet/ 4 | jsonl: 5 | - ~/data/pubtabnet/PubTabNet_2.0.0.jsonl 6 | dump: 7 | dir: ~/data/mmocr_pubtab250/ 8 | json: ~/data/ground_truth_pubtab250.json 9 | split: val 10 | seq_len: 11 | html: 12 | min: 250 13 | replace: 14 | []: "" 15 | [' ']: "" 16 | ['', ' ', '']: "" 17 | ["\u2028", "\u2028"]: "" 18 | ['', ' ', '']: "" 19 | ['', '']: "" 20 | ['', ' ', '']: "" 21 | ['', '', '', '']: "" 22 | ['', '', ' ', '', '']: "" 23 | ['', '']: "" 24 | ['', ' ', "\u2028", ' ', "\u2028", ' ', '']: "" 25 | -------------------------------------------------------------------------------- /datasets/PubTabNet.yaml: -------------------------------------------------------------------------------- 1 | type: PubTabNet 2 | load: 3 | dir: ~/data/pubtabnet/ 4 | jsonl: 5 | - ~/data/pubtabnet/PubTabNet_2.0.0.jsonl 6 | dump: 7 | dir: ~/data/mmocr_pubtabnet/ 8 | json: ~/data/ground_truth_pubtabnet.json 9 | split: val 10 | replace: 11 | []: "" 12 | [' ']: "" 13 | ['', ' ', '']: "" 14 | ["\u2028", "\u2028"]: "" 15 | ['', ' ', '']: "" 16 | ['', '']: "" 17 | ['', ' ', '']: "" 18 | ['', '', '', '']: "" 19 | ['', '', ' ', '', '']: "" 20 | ['', '']: "" 21 | ['', ' ', "\u2028", ' ', "\u2028", ' ', '']: "" 22 | -------------------------------------------------------------------------------- /datasets/PubTabSub.yaml: -------------------------------------------------------------------------------- 1 | type: PubTabNet 2 | load: 3 | dir: ~/data/pubtabnet/ 4 | jsonl: 5 | - ~/data/pubtabnet/PubTabNet_2.0.0.jsonl 6 | dump: 7 | dir: ~/data/mmocr_pubtabsub/ 8 | json: ~/data/ground_truth_pubtabsub.json 9 | split: val 10 | seq_len: 11 | html: 12 | min: 250 13 | samples: 64 14 | replace: 15 | []: "" 16 | [' ']: "" 17 | ['', ' ', '']: "" 18 | ["\u2028", "\u2028"]: "" 19 | ['', ' ', '']: "" 20 | ['', '']: "" 21 | ['', ' ', '']: "" 22 | ['', '', '', '']: "" 23 | ['', '', ' ', '', '']: "" 24 | ['', '']: "" 25 | ['', ' ', "\u2028", ' ', "\u2028", ' ', '']: "" 26 | -------------------------------------------------------------------------------- /mutab/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.0" 2 | -------------------------------------------------------------------------------- /mutab/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from mutab import datasets, models, optimizer 2 | 3 | from .test import evaluate 4 | from .train import train 5 | 6 | __all__ = ["datasets", "models", "optimizer", "evaluate", "train"] 7 | -------------------------------------------------------------------------------- /mutab/apis/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from concurrent.futures import ProcessPoolExecutor 3 | from functools import partial 4 | from typing import List, Optional 5 | 6 | from mmdet.apis import init_detector 7 | from tqdm import tqdm 8 | 9 | from mutab.metrics import TEDS 10 | 11 | 12 | def score(item, truth, ignore: Optional[List[str]]): 13 | teds_full = TEDS(ignore, struct_only=False) 14 | teds_html = TEDS(ignore, struct_only=True) 15 | file_name = os.path.basename(item["path"]) 16 | if file_name not in truth: 17 | return None 18 | item.update(real=truth[file_name]["html"]) 19 | item.update(type=truth[file_name]["type"]) 20 | scores = {} 21 | scores.update(full=teds_full.evaluate(**item)) 22 | scores.update(html=teds_html.evaluate(**item)) 23 | item.update(TEDS=scores) 24 | return (file_name, item) 25 | 26 | 27 | def worker(n: int, paths: List[str], cfg: str, ckpt: str, truth): 28 | model = init_detector(config=cfg, checkpoint=ckpt, device=n) 29 | items = map(model.predict, tqdm(list(paths), disable=n > 0)) 30 | final = partial(score, truth=truth, ignore=model.cfg.ignore) 31 | with ProcessPoolExecutor() as pool: 32 | return list(pool.map(final, items)) 33 | 34 | 35 | def evaluate(paths: List[List[str]], cfg: str, ckpt: str, truth): 36 | with ProcessPoolExecutor(len(paths)) as pool: 37 | process = partial(worker, cfg=cfg, ckpt=ckpt, truth=truth) 38 | results = list(pool.map(process, *zip(*enumerate(paths)))) 39 | return dict(filter(None, sum(results, []))) 40 | -------------------------------------------------------------------------------- /mutab/apis/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime as dt 3 | 4 | from mmcv import Config, mkdir_or_exist 5 | from mmcv.runner import get_dist_info 6 | from mmdet.apis import train_detector 7 | from mmdet.utils import get_device 8 | 9 | from mutab.datasets import build_dataset 10 | from mutab.models import build_detector 11 | from mutab.utils import collect_env, get_logger, pretty_env 12 | 13 | 14 | def train(cfg: Config, cfg_file: str): 15 | mkdir_or_exist(cfg.work_dir) 16 | _, devices = get_dist_info() 17 | cfg.gpu_ids = range(devices) 18 | cfg.device = get_device() 19 | 20 | # prepare log 21 | time = dt.now().strftime("%Y%m%d_%H%M%S") 22 | log = os.path.join(cfg.work_dir, "{}.log".format(time)) 23 | log = get_logger(log_file=log, log_level=cfg.log_level) 24 | metas = dict(env=collect_env(), config=cfg.pretty_text) 25 | 26 | # dump environmental information 27 | log.info(pretty_env(bar="-" * 64)) 28 | log.info("\n{}".format(cfg.pretty_text)) 29 | 30 | # build model and dataset 31 | model = build_detector(cfg.model) 32 | dataset = build_dataset(cfg.data.train) 33 | 34 | # dump configuration 35 | os.environ.update(LOCAL_RANK=os.getenv("LOCAL_RANK", "0")) 36 | cfg.dump(os.path.join(cfg.work_dir, os.path.basename(cfg_file))) 37 | 38 | # start training 39 | cfg.checkpoint_config.meta = dict(env=collect_env(), CLASSES=int(dataset.CLASSES)) 40 | train_detector(model, dataset, cfg, devices > 1, True, timestamp=time, meta=metas) 41 | -------------------------------------------------------------------------------- /mutab/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.datasets.builder import build_dataset 2 | from mmocr.datasets.pipelines import NormalizeOCR, ResizeOCR, ToTensorOCR 3 | 4 | from .dataset import TableDataset 5 | from .loader import TableHardDiskLoader, TableStrParser 6 | from .pipeline import TableBboxEncode, TablePad, TableResize 7 | 8 | __all__ = [ 9 | "NormalizeOCR", 10 | "ResizeOCR", 11 | "TableBboxEncode", 12 | "TableDataset", 13 | "TableHardDiskLoader", 14 | "TablePad", 15 | "TableResize", 16 | "TableStrParser", 17 | "ToTensorOCR", 18 | "build_dataset", 19 | ] 20 | -------------------------------------------------------------------------------- /mutab/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from mmdet.datasets.builder import DATASETS 3 | from mmocr.datasets import BaseDataset 4 | 5 | from mutab.metrics import TEDS 6 | from mutab.utils import get_logger 7 | 8 | 9 | @DATASETS.register_module() 10 | class TableDataset(BaseDataset): 11 | def evaluate(self, results, **kwargs): 12 | metric = TEDS(struct_only=False) 13 | scores = [] 14 | logger = get_logger() 15 | for idx, info in enumerate(self.data_infos): 16 | score = metric.evaluate(**results[idx]) 17 | logger.info("%s score: %s", info["filename"], score) 18 | scores.append(score) 19 | 20 | return dict(TEDS=np.mean(scores)) 21 | -------------------------------------------------------------------------------- /mutab/datasets/loader.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | import numpy as np 5 | from mmocr.datasets.builder import LOADERS, PARSERS, build_parser 6 | 7 | from mutab.utils import get_logger 8 | 9 | 10 | @PARSERS.register_module() 11 | class TableStrParser: 12 | def __init__(self, cell_tokens): 13 | assert isinstance(cell_tokens, list) 14 | assert len(cell_tokens) 15 | self.cell_tokens = cell_tokens 16 | 17 | def align(self, html, bbox, **info): 18 | queue = iter(bbox) 19 | boxes = np.zeros((len(html), 4)) 20 | for idx, cell in enumerate(html): 21 | if cell in self.cell_tokens: 22 | boxes[idx] = next(queue) 23 | return dict(html=html, bbox=boxes, **info) 24 | 25 | def __call__(self, info): 26 | return self.align(**info) 27 | 28 | 29 | @LOADERS.register_module() 30 | class TableHardDiskLoader: 31 | def __init__(self, parser: dict, ann_file: str, max_len_html: int): 32 | self.parser = build_parser(parser) 33 | self.infos = self.load(ann_file, max_len_html) 34 | 35 | def __len__(self): 36 | return len(self.infos) 37 | 38 | def __getitem__(self, index): 39 | return self.parser(self.infos[index]) 40 | 41 | def __iter__(self): 42 | self.idx = 0 43 | return self 44 | 45 | def __next__(self): 46 | if self.idx < len(self): 47 | data = self[self.idx] 48 | self.idx += 1 49 | return data 50 | raise StopIteration 51 | 52 | def load(self, ann_file: str, max_len_html: int): 53 | data = [] 54 | logger = get_logger() 55 | logger.info(f"Loading {ann_file} ...") 56 | for f in glob.glob(os.path.join(ann_file, "*.txt")): 57 | with open(f) as f: 58 | data.append(self.parse(f)) 59 | logger.info(f"{len(data)} tables were loaded from {ann_file}") 60 | return list(v for v in data if len(v["html"]) <= max_len_html) 61 | 62 | def parse(self, f): 63 | path = f.readline().strip() 64 | html = f.readline().strip().split(",") 65 | bbox_list = [] 66 | cell_list = [] 67 | for value in f.readlines(): 68 | bbox, cell = value.strip().split("<;>") 69 | bbox = tuple(map(int, bbox.split(","))) 70 | bbox_list.append(bbox) 71 | if bbox != (0, 0, 0, 0): 72 | cell_list.append(cell.split("\t")) 73 | return dict(filename=path, html=html, cell=cell_list, bbox=bbox_list) 74 | -------------------------------------------------------------------------------- /mutab/datasets/pipeline.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from mmdet.datasets.builder import PIPELINES 4 | 5 | 6 | @PIPELINES.register_module() 7 | class TableResize: 8 | def __init__(self, size: int): 9 | self.size = size 10 | 11 | def __call__(self, results): 12 | self.resize_img(results) 13 | self.resize_box(results) 14 | return results 15 | 16 | def resize_img(self, results): 17 | image = results["img"] 18 | h, w, _ = image.shape 19 | if w < h: 20 | w = int(self.size / h * w) 21 | h = int(self.size) 22 | else: 23 | h = int(self.size / w * h) 24 | w = int(self.size) 25 | scale = (h / image.shape[0], w / image.shape[1]) 26 | image = cv2.resize(image, (w, h), interpolation=cv2.INTER_LINEAR) 27 | results.update(img=image, img_shape=image.shape, img_scale=scale) 28 | 29 | def resize_box(self, results): 30 | h, w = results["img_shape"][:2] 31 | info = results.get("img_info") 32 | # train and val phase 33 | if info is not None and info.get("bbox") is not None: 34 | bbox = info["bbox"] 35 | y, x = results["img_scale"] 36 | bbox[..., 0::2] = np.clip(bbox[..., 0::2] * x, 0, w - 1) 37 | bbox[..., 1::2] = np.clip(bbox[..., 1::2] * y, 0, h - 1) 38 | info.update(bbox=bbox) 39 | 40 | 41 | @PIPELINES.register_module() 42 | class TablePad: 43 | def __init__(self, size): 44 | self.size = size[::-1] 45 | 46 | def __call__(self, results): 47 | img = self.extend(results["img"], self.size) 48 | results.update(img=img, pad_shape=img.shape) 49 | return results 50 | 51 | def extend(self, img, size): 52 | h = (0, size[0] - img.shape[0]) 53 | w = (0, size[1] - img.shape[1]) 54 | pad = np.pad(img, (h, w, (0, 0))) 55 | return pad.astype(img.dtype) 56 | 57 | 58 | @PIPELINES.register_module() 59 | class TableBboxFlip: 60 | def __call__(self, results): 61 | h, _, _ = results["img_shape"] 62 | bbox = results["img_info"].get("bbox", results.get("bbox")) 63 | mask = np.count_nonzero(bbox, axis=-1, keepdims=True) 64 | flip = np.where(mask, h - 1 - bbox, bbox).clip(min=0) 65 | np.copyto(bbox[..., 1], flip[..., 1]) 66 | np.copyto(bbox[..., 3], flip[..., 3]) 67 | return results 68 | 69 | 70 | @PIPELINES.register_module() 71 | class TableBboxEncode: 72 | def __call__(self, results): 73 | info = results["img_info"] 74 | size = results["img"].shape 75 | bbox = self.xyxy_to_xywh(info["bbox"]) 76 | bbox = self.normalize_bbox(bbox, size) 77 | assert np.all(bbox >= 0) 78 | assert np.all(bbox <= 1) 79 | info.update(bbox=bbox) 80 | self.adjust_key(results) 81 | return results 82 | 83 | def xyxy_to_xywh(self, bbox): 84 | bb = np.empty_like(bbox) 85 | # xy center 86 | bb[..., 0] = bbox[..., 0::2].mean(axis=-1) 87 | bb[..., 1] = bbox[..., 1::2].mean(axis=-1) 88 | # width and height 89 | bb[..., 2] = bbox[..., 0::2].ptp(axis=-1) 90 | bb[..., 3] = bbox[..., 1::2].ptp(axis=-1) 91 | return bb 92 | 93 | def normalize_bbox(self, bbox, size): 94 | bbox[..., 0::2] /= size[1] 95 | bbox[..., 1::2] /= size[0] 96 | return bbox 97 | 98 | def adjust_key(self, results): 99 | results.update(html=results["img_info"].pop("html")) 100 | results.update(cell=results["img_info"].pop("cell")) 101 | results.update(bbox=results["img_info"].pop("bbox")) 102 | -------------------------------------------------------------------------------- /mutab/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .metric import TEDS 2 | 3 | __all__ = ["TEDS"] 4 | -------------------------------------------------------------------------------- /mutab/metrics/metric.py: -------------------------------------------------------------------------------- 1 | import distance 2 | from apted import APTED, Config 3 | from apted.helpers import Tree 4 | from lxml import etree, html 5 | 6 | 7 | class TableTree(Tree): 8 | def __init__(self, tag, col=None, row=None, txt=None, *sub): 9 | super().__init__(tag, *sub) 10 | self.tag = tag 11 | self.col = col 12 | self.row = row 13 | self.txt = txt 14 | 15 | def bracket(self): 16 | if self.tag == "td": 17 | values = dict( 18 | tag=self.tag, 19 | col=self.col, 20 | row=self.row, 21 | txt=self.txt, 22 | ) 23 | else: 24 | values = dict(tag=self.tag) 25 | result = str(values) 26 | for child in self.children: 27 | result += child.bracket() 28 | return "{{{}}}".format(result) 29 | 30 | 31 | class Custom(Config): 32 | @staticmethod 33 | def maximum(*seqs): 34 | return max(map(len, seqs)) 35 | 36 | def normalized_distance(self, *seqs): 37 | return float(distance.levenshtein(*seqs)) / self.maximum(*seqs) 38 | 39 | def rename(self, node1, node2): 40 | if node1.tag != node2.tag: 41 | return 1.0 42 | if node1.col != node2.col: 43 | return 1.0 44 | if node1.row != node2.row: 45 | return 1.0 46 | if node1.tag == "td" and (node1.txt or node2.txt): 47 | return self.normalized_distance(node1.txt, node2.txt) 48 | return 0.0 49 | 50 | 51 | class TEDS: 52 | def __init__(self, ignore_tags=None, struct_only=False): 53 | self.ignore_tags = ignore_tags 54 | self.struct_only = struct_only 55 | 56 | def tokenize(self, node, tokens): 57 | tokens.append("<%s>" % node.tag) 58 | if node.text is not None: 59 | tokens += list(node.text) 60 | for n in node.getchildren(): 61 | self.tokenize(n, tokens) 62 | if node.tag != "unk": 63 | tokens.append("" % node.tag) 64 | if node.tag != "td" and node.tail is not None: 65 | tokens += list(node.tail) 66 | 67 | def load_html_tree(self, node, parent=None): 68 | if node.tag == "td": 69 | if self.struct_only: 70 | cell = [] 71 | else: 72 | tokens = [] 73 | self.tokenize(node, tokens) 74 | cell = tokens[1:-1].copy() 75 | col = int(node.attrib.get("colspan", "1")) 76 | row = int(node.attrib.get("rowspan", "1")) 77 | sub = TableTree(node.tag, col, row, cell) 78 | else: 79 | sub = TableTree(node.tag) 80 | if parent is not None: 81 | parent.children.append(sub) 82 | if node.tag != "td": 83 | for n in node.getchildren(): 84 | self.load_html_tree(n, sub) 85 | return sub 86 | 87 | def evaluate(self, pred, real, **kwargs): 88 | parser = html.HTMLParser(remove_comments=True, encoding="utf-8") 89 | pred = html.fromstring(pred, parser=parser) 90 | real = html.fromstring(real, parser=parser) 91 | pred = next(iter(pred.xpath("//table")), pred) 92 | real = next(iter(real.xpath("//table")), real) 93 | assert pred.tag in ("div", "table") 94 | assert real.tag in ("div", "table") 95 | if self.ignore_tags: 96 | etree.strip_tags(pred, *self.ignore_tags) 97 | etree.strip_tags(real, *self.ignore_tags) 98 | n_nodes_pred = len(pred.xpath(".//*")) 99 | n_nodes_real = len(real.xpath(".//*")) 100 | pred = self.load_html_tree(pred) 101 | real = self.load_html_tree(real) 102 | result = APTED(pred, real, Custom()).compute_edit_distance() 103 | result = 1 - float(result) / max(n_nodes_pred, n_nodes_real) 104 | return result 105 | 106 | 107 | if __name__ == "__main__": 108 | import json 109 | 110 | with open("sample_pred.json") as fp: 111 | pred_json = json.load(fp) 112 | with open("sample_real.json") as fp: 113 | real_json = json.load(fp) 114 | with open("sample_test.json") as fp: 115 | test_json = json.load(fp) 116 | for key in pred_json: 117 | pred = pred_json[key] 118 | real = real_json[key]["html"] 119 | teds = TEDS().evaluate(pred, real) 120 | print(key, teds) 121 | assert test_json[key] == teds 122 | -------------------------------------------------------------------------------- /mutab/metrics/sample_pred.json: -------------------------------------------------------------------------------- 1 | {"PMC2094709_004_00.png": "\n \n \n \n \n \n \n \n
WeekDuration (min)Intensity (% HRR)Intensity (RPE)
12050 \u2013 609 \u2013 11
22050 \u2013 609 \u2013 11
3 \u2013 52560 \u2013 7011
6 \u2013 83060 \u2013 7011
9 \u2013 113070 \u2013 8011 \u2013 13
12 \u2013 143570 \u2013 8011 \u2013 13
15 & 164075 \u2013 8513 \u2013 15
\n \n ", "PMC2871264_002_00.png": "\n \n \n \n \n \n \n \n
Name of algorithmNotable features
MACS [23]Uses both a control library and local statistics to minimize bias
SICER [14]Designed for detecting diffusely enriched regions; for example, histone modification
PeakSeq [24]Corrects for reference genome mappability and local statistics
SISSRs [25]High resolution, precise identification of binding-site location
F-seq [26]Uses kernel density estimation
\n \n ", "PMC2915972_003_00.png": "\n \n \n \n \n \n \n \n
No of patients
Gender:
Men24
Women26
Age (years):
30-392
40-498
50-5915
60-6916
70-796
\u2265 803
Tumor site:
Bladder4
Breast10
Colorectal4
Exophageal9
Gynecological7
Lung6
Prostate10
Length of interval between baseline and follow-up interview (median)
< 50 days22
\u2265 50 days28
\n \n ", "PMC3160368_005_00.png": "\n \n \n \n \n \n \n \n
Methods (n-mers used)Average Sensitivity of 5-fold cross validation (%)Average Specificity of 5-fold cross validation (%)
FDAFSA (hexamers)84*86*
PromMachine (tetramers)86+81+
\n \n ", "PMC3568059_003_00.png": "\n \n \n \n \n \n \n \n
Participants during the period;
0 to 3 months3 to 6 months6 to 12 months
Characteristicsn=72n=71n=65
Age, years, median (range)73 (50\u201394)73 (47\u201392)73 (47\u201390)
Patients, n (%)
Female33 (46)27 (38)26 (40)
Male39 (54)44 (62)39 (60)
Stroke classification (TOAST), n (%)
Large vessel disease17 (24)18 (25)17 (26)
Small vessel disease21 (29)21 (30)17 (26)
Cardioembolic stroke15 (21)11 (15)11 (17)
Cryptogenic stroke13 (18)14 (20)12 (19)
Intracerebral haemorrhage6 (8)7 (10)8 (12)
Side of feision, n (%)
Right side lesion35 (49)32 (45)28 (43)
Left side lesion37 (51)39 (53)37 (57)
Hypertension47 (65)44 (62)41 (63)
Diabetes mellitus17 (24)18 (25)17 (26)
Results from clinical scales 1\u20137 days after stroke onset
BBS median (range) (n)35 (0\u201356) (n=71)41 (0\u201356) (n=70)41 (0\u201356) (n=46)
M-MAS UAS-IS median (range)45 (12\u201355) (n=65)47 (12\u201355) (n=65)50 (16\u201355) (n=56)
\n \n ", "PMC3707453_006_00.png": "\n \n \n \n \n \n \n \n
Star Magnitude 1Star Magnitude 6Saturation Charge [%]Capacitanc e Linearity [%]
Noise (g)SN at 10Signal (g)Noise (g)SN at 10 No AD [d]
121200498471882358105000099.2
1439604265016101991327223298.6
1552204185017131471919710998.1
1599504185017591301917201897.8
1624004195017841221915957597.6
164550420501801151914925497.5
\n \n ", "PMC3765162_003_01.png": "\n \n \n \n \n \n \n \n
Men (n = 359)Women (n = 412)
Metabolic syndromeMetabolic syndrome
Baseline characteristicsYes (n = 163)No (n = 196)P-valueYes (n = 96)No (n = 316)P value
Age (years)*61.86 (\u00b10.83)60.32 (\u00b10.77)0.1764.96 (\u00b10.88)58.52 (\u00b10.55)<0.001
Sitting Systolic BP (mmHg)*141.34 (\u00b11.27)132.26 (\u00b11.15)<0.001151.82 (\u00b11.16)137.4( (\u00b10.96)<0.001
Stitting Diastolic BP (mmHg)*85.69 (\u00b10.77)80.79 (\u00b10.73)<0.00189.27 (\u00b10.92)82.67 (\u00b10.51)<0.001
Antitypertensive Therapy (%)50.9%28.4%<0.00160.4%29.4%<0.001
Total Cholesterol (mmol/L)*5.61 (\u00b10.08)5.70 (\u00b10.08)0.566.04 (\u00b10.1)5.99 (\u00b10.06)0.67
LDL cholesterol (mmol/L)*3.44 (\u00b10.06)3.49 (\u00b10.06)0.523.58 (\u00b1 0.06)3.54 (\u00b1 0.04)0.66
HDL cholesterol (mmol/L)*1.03 (\u00b10.63)1.27 (\u00b10.02)<0.0011.20 (\u00b1 0.02)1.48 (\u00b10.016)<0.001
Triglycerides (mmol/L)*2.10 (1.63; 2.64)1.32 (0.98; 1.57)<0.0012.15 (1.78; 2.83)1.24 (0.97; 1.56)<0.001
Diabetes mellitus (%)30.7%6.3%<0.00133.3%2.3%<0.001
BMI (kg/m2)*29.88 (\u00b10.35)26.06 (\u00b10.2)<0.00122.39 (\u00b10.47)26.95 (\u00b10.25)<0.001
ApoA1 Ig/L*1.29 (\u00b10.013)1.40 (\u00b10.017)<0.0011.44 (\u00b10.02)1.55 (\u00b10.001)<0.001
ApoB (g/L)*1.21 (\u00b10.02)1.19 (\u00b10.02)0.481.23 (\u00b10.02)1.18 (\u00b10.014)0.044
Homa index*2.25(1.15; 4.18)0.94(0.51; 1.8)<0.0012.51 (1.67, 3.86)1.14 (0.72; 1.7)<0.001
MITCoffean (mm)*0.79 (\u00b10.15)0.76 (\u00b10.12)0.0840.77 (\u00b10.16)0.69 (\u00b10.13)<0.001
Sum of total plaque area (mm2)*53 (25; 100)42 (10/27)0.00216 (1; 44)8 (1;32)0.01
Sum of plaque area carotids (mm2)*22 (1; 39)12 (1; 27.5)0.0118.75 (1.25.75)1 (1; 19)0.013
Sum of plaque area femoral (mm3)*33(10 6,0)23(1, 49)0.01110 (-17.75)1(1; 6)0.012
\n \n ", "PMC3872294_001_00.png": "\n \n \n \n \n \n \n \n
HC (N = 20)FASD (N = 15)
Age (years)16.3 (2.1)15.3 (2.1)
IQ108 (15)*80 (15)*
Male/female (%male)12/8 (60%)10/5 (67%)
FASD sub diagnosis\u20138 FAS, 7 ARND
\n \n ", "PMC4196076_004_00.png": "\n \n \n \n \n \n \n \n
miRNAChange relative to controlsDirection of regulationChromosomemiRNAChange relative to controlsDirection of regulationChromosome
hsa-miR-11812.13Up19hsa-miR-8742.97Up5
hsa-miR-125a-5p5.04Up19hsa-miR-8902.83UpX
hsa-miR-21-3p2.82Up17hsa-miR-9392.59Up8
hsa-miR-29b-1-pp3.12Up7hsa-miR-1290\u22127.56Down1
hsa-miR-3665-3p2.19Up10hsa-miR-191-3-p\u22122.63Down10
hsa-miR-1327-5p2.01Up2hsa-miR-2861\u22123.31Down9
hsa-miR-3665-3p2.03Up10hsa-miR-3665\u22122.37Down13
hsa-miR-371a-5p3.14Up19hsa-miR-4357\u22123.62Down1
hsa-miR-43272.95Up21hsa-miR-452-5p\u22122.54DownX
hsa-miR-584-5p2.31Up5hsa-miR-513a-5p\u22123.15DownX
hsa-miR-6025.74Up9hsa-miR-572\u22125.80Down4
hsa-miR-629-3p2.71Up15hsa-miR-629-3p\u22123.03Down15
hsa-miR-642b-3p2.10Up19hsa-miR-165\u22127.18Down1
hsa-miR-6513.91UpXhsa-miR-875-5p\u22123.91Down8
hsa-miR-7622.84Up16hsa-miR-940\u22122.31Down16
\n \n ", "PMC4219599_004_00.png": "\n \n \n \n \n \n \n \n
SBE (n = 24)MEA 7n = 24Evele N = 24
Ethnopositive data
Age (yrs)0.1 (0)0.1 (0)43.9 (8)
Male (%)0.3 (0.0)0.1 (0.0)8.1 (10%)
Married0.1 (0.9)0.9 (0%)8.9 (11)
Married29.6 (4.3)27.0 (0.0)27.9 (161)
Preventions Fathers
1 + 11.0 (1%)5 (21%)5.2 (2.8)
1 + 15 (5.9%)1 (1.9%)8 (18%)
4 + 15 (5.9%)11 (5%)21 (69%)
4 + 13 (33%)1 (4%)3 (19%)
41 + 10 (0%)1 (4%)1 (1%)
Others increase stage
CT14 (6.4%)11 (54%)11 (52%)
-715 (5%)0 (0%)0 (0%)
CT25 (5%)0 (0%)0 (0%)
Private wound with schools0 (0%)0 (0%)0 (0%)
Non-sensitive factors40.2 (11.4)41.2 (13.3)45.0 (12.0)
Non-sensitive factors
None1 (11%)1 (13%)6 (18%)
None2 (2.9%)2 (9%)4 (1.7%)
None2 (2.9%)0 (0%)4 (1.9%)
Total survivor0 (0%)0 (0%)0 (0%)
Primary experience8 (9%)23 (80%)*0.0 (0%)
Postoperative followsors
1 + 01 (11%)1 (13%)4 (12%)
1 + 06 (6.7%)15 (57%)21 (61%)
4 + 12 (2%)5 (37%)2 (2%)
4 + 18 (29%)8 (29%)0 (0%)
Pathological survour stage
PT38 (37%)16 (39%)24 (17%)
PT38 (37%)6 (3%)5 (17%)
PT30 (0%)6 (3%)4 (3%)
Positive17 (14%)6 (3%)1 (0.1%)
Positive nempl nodes17 (14%)0.9 (0%)1.0 (1%)
Positive reference in complete hospital stay (n)2.0 (0.4)2.0 (0.2)2.2 (0.3)
Position of pressoreation compression (%)10.5 (10)4.4 (14)8.9 (2.2)
Duration of pressoreation collectivation (%)10.5 (10)8.4 (14)8.9 (9.2)
\n \n ", "PMC4297392_007_00.png": "\n \n \n \n \n \n \n \n
Treatment phaseAdverse eventNo. of patients
T1Swelling1
Itching1
Fever4
Throat infection1
Chest Congestion2
Total9
T2Diarrhea1
Body Pain1
Total2
T3Diarrhea1
Total1
T4Nil-
\n \n ", "PMC4311460_007_00.png": "\n \n \n \n \n \n \n \n
Number PatientsPatients
CategoryType CHP%(N = 4,560)%
IInflammation 6,98711.33,53777.6
IIInfection 3,6295.92,45153.8
IIIInjury 5,5569.03,40174.6
IVSpecific conditions 32,01651.9n.c.
VNeoplasms 3,5925.82,461#54
Maligne 1,219 (27%)
O,ther-benign2,148 1,758 (39%)
VICongenital 4900.8n.c.
VIIOtherwise 9,38315.2n.c.
TotalALL-types 100
\n \n ", "PMC4357206_002_00.png": "\n \n \n \n \n \n \n \n
N = 121
Demographics
Age (yr) - median (IQR)62 (56-73)
Female sex (%)46 (38)
White race (%)112 (93)
Comorbidities (%)
Hypertension64 (53)
Chronic lung disease37 (31)
Active malignancy34 (28)
Diabetes mellitus29 (24)
Chronic kidney disease7 (6)
Congestive heart failure4 (3)
Chronic liver disease2 (2)
Severity of illness
APACHE II score - median (IQR)*14 (10-16)
Chanlson Comorbidity Index - median (IQR)\u20202 (1-4)
ICU type
Surgical102 (84)
SICU66 (54)
TICU36 (30)
Nonsurgical19 (16)
CCU11 (9)
MICU8 (7)
Status of procedure (for surgical patients) (%)
Elective41 (34)
Urgent57 (47)
Dops in hospital prior to enrollment \u2013 median (IQR)1 (1-3)
\n \n ", "PMC4445578_009_01.png": "\n \n \n \n \n \n \n \n
Reactive astrogliossChanges in astrocytes morphologyChanges in molecules expression
Upregulated moleculesUpregulated or downregulated molecules
Mild to moderate astroglosis\u2022 Hypertrophy of cell body\u2022 Structural elements GFAP, nestin, virenetin\u2022 Inflammatory cell regulators, cytokines, growth factors, glutathione
\u2022 Astrocytes processes are are numeroca and thicker\u2022 Transcriptional regulators STAT3, NFASI (Pechem 1076, cAnP6 Chiga, SOX9 [61-65].Trassopteres and purprs; AQP4 and No YK+ transporters [26,64-69]
\u2022 Glutamate transporter [76-73]
\u2022 The non-overlapping domains of individual astrocytes are preserved\u2022 Vascular regulators: PGE, NO [74,75]
\u2022 Energy provision: lactate [76]
\u2022 Molecules implicated in synapse formation and
\u2022 Remodeling thrombospondin and Complement C1q [77,78]
- Significant extension of processes\u2022 Molecules implicated in ovidative stress, and providing protection from oxidative stress: NO, NOS, SOX, Glutathione [67,68,79]
\u2022 Proliferation
\u2022 Overlapping of individual domains
\u2022 Substantial reorganization of tissue activitecute [50]
\n \n ", "PMC4969833_016_01.png": "\n \n \n \n \n \n \n \n
HorizontalNormalVerticalTotal Object
Horizontal383546 (83%)
Normal154762 (87%)
Vertical22111401163 (98%)
\n \n ", "PMC5303243_003_00.png": "\n \n \n \n \n \n \n \n
CharacteristicsTotal (N = 613)MSSA (N = 508)MRSA (N = 105)OR (95%CI)P-value
Age (years) (median, quartiles)72 (66,79)75 (6731)72 (67,78)N/A0.0048
Gender322 (100.0)214 (82.3)57 (17.7)1.4 (0.93\u20132.16)0.5909
Male291 (100.0)255 (83.5)48 (16.5)
Step aging n (%)0,0849
Young Old311 (100.0)267 (85.9)44 (14.1)1.5 (1.00\u20132.35)
O6: O&272 (100.0)219 (80.5)53 (19.5)0.7 (0.49\u20131.13)
Longevity30 (100.0)22 (73.3)8 (26.7)0.6 (0.24\u20131.27)
Disease n (%)<0.0001
PNU47 (100.0)28 (59.6)19 (40.4)0.3 (0.14\u20130.49)
BSI37 (100.0)27 (73.0)10 (27.0)0.5 (0.25\u20131.14)
SSTI416 (100.0)350 (84.1)66 (15.9)1.3 (0.85\u20132.03)
EI62 (100.0)56 (90.3)6 (9.7)1.7 (0.72\u20134.06)
Others51 (100.0)47 (92.2)4 (7.8)2.6 (0.91\u20137.31)
Place of the treatment infections n (%)0.0033
INPATBENTS430 (100.0)352 (81.4)78 (18.1)0.8 (0.49\u20131.26)
LTCF16 (100.0)9 (56.3)7 (43.8)0.3 (0.09\u20130.69)
OUTPATIENTS167 (100.0)147 (88.0)20 (12.0)1.7 (1.03\u20132.92)
Infections treated in hospitals (NPATIENTS N = 430, n (%))
ICU19 (100.0)12 (63.2)7 (36.8)2.8 (1.06\u20137.34)0.014
non-ICU411 (100.0)340 (82.7)71 (17.3)
\n \n ", "PMC5451934_004_00.png": "\n \n \n \n \n \n \n \n
ConditionPre Well-BeingPost Well-BeingPre-Post-Change
TP (handler & dog interaction)46.33 \u00b1 7.41 148.69 \u00b1 7.22+2.36
DO (dog only interaction)49.78 \u00b1 7.9151.56 \u00b1 6.99+1.78 **
HO (handler only interaction)47.37 \u00b1 7.5746.43 \u00b1 8.03\u22120.94 **
\n \n ", "PMC5755158_010_01.png": "\n \n \n \n \n \n \n \n
WeaningWeek 15Off-test
Weaning\u2013\u2013\u2013
Week 15\u20130.17 \u00b1 0.080.16 \u00b1 0.03
Off-test\u20130.80 \u00b1 0.240.19 \u00b1 0.09
\n \n ", "PMC5849724_006_00.png": "\n \n \n \n \n \n \n \n
AnalytesGC-HRMSGC-MS/MSGC-MS
LOQ (ng/CIPP)Estimated LOQ, (ng/cig)LOQ, (ng/CPP)Estimated LOQ, (ng/cig)LOQ (ng/CIPP)Estimated LOQ, (ng/cig)
Naphthalene0.510.0261178.7158.94108.175.41
Benzolylphenamthene0.040.002NDND66.803.34
Benzolylanthracene0.030.00238.571.9338.111.91
Chrysene0.040.00250.132.5149.612.48
Cyclopentid,culysyner0.020.00148.842.4460.043.00
S-Methylchrysene0.040.002NDND2.480.12
Benzo[p]Iluonarthene0.040.00211.440.575.080.25
Benzol[Illicuranthene0.050.00312.410.625.070.25
Benzo[[aceanthrylene]0.090.005NDNDNDND
Benzoliglyreene0.040.0025.010.253.030.15
Indeno(1,2,1-cultypnee0.020.0015.460.271.540.08
Dibenodju/lipinthe cere0.070.0040.830.041.480.07
Dibenzolip/lyprene0.050.003NDNDNDND
Dibenzolyadyprene0.040.0020.800.040.280.01
Dibenzolyuloyene0.060.0031.330.07NDND
Dibenzolya/hyperene0.070.0042.990.15NDND
\n \n ", "PMC6022086_007_00.png": "\n \n \n \n \n \n \n \n
MethodData TypeMean (m)RMSE (m)P90% (m)PGSD (%)
Improved FCMGaofen-35.775.8910.0794.37
Sentinel-16.305.8314.0380.00
Original FCMGaofen-36.977.6613.8790.70
Sentinel-18.534.8113.1490.00
\n \n "} -------------------------------------------------------------------------------- /mutab/metrics/sample_real.json: -------------------------------------------------------------------------------- 1 | {"PMC5755158_010_01.png": {"html": "
WeaningWeek 15Off-test
Weaning\u2013\u2013\u2013
Week 15\u20130.17 \u00b1 0.080.16 \u00b1 0.03
Off-test\u20130.80 \u00b1 0.240.19 \u00b1 0.09
", "tag_len": 44, "cell_len_max": 11, "width": 238, "height": 59, "type": "simple"}, "PMC4445578_009_01.png": {"html": "
Reactive astroglioisChanges in astrocytes morphologyChanges in molecules expression
Upregulated moleculesUpregulated or downregulated molecules
Mild to moderate astrogliosis\u2022 Hypertrophy of cell body\u2022 Structural elements: GFAP, nestin, vimentin\u2022 Inflammatory cell regulators: cytokines, growth factors, glutathione
\u2022 Astrocytes processes are are numerous and thicker\u2022 Transcriptional regulators: STAT3, NF\u03baB, Rheb-m TOR, cAMP, Olig2, SOX9 [61\u201365].\u2022 Transporters and pumps: AQP4 and Na+/K+ transporters [61, 66\u201369]
\u2022 Glutamate transporter [70\u201373]
\u2022 The non-overlapping domains of individual astrocytes are preserved\u2022 Vascular regulators: PGE, NO [74, 75]
\u2022 Energy provision: lactate [76]
\u2022 Molecules implicated in synapse formation and
Severe astrogliosis and glial scar\u2022 Intense hypertrophy of cell body\u2022 Remodeling: thrombospondin and Complement C1q [77, 78]
\u2022 Significant extension of processes\u2022 Molecules implicated in oxidative stress and providing protection from oxidative stress: NO, NOS, SOD, Glutathione [67, 68, 79]
\u2022 Proliferation
\u2022 Overlapping of individual domains
\u2022 Substantial reorganization of tissue architecture [60]
", "tag_len": 116, "cell_len_max": 129, "width": 486, "height": 248, "type": "complex"}, "PMC2871264_002_00.png": {"html": "
Name of algorithmNotable features
MACS [23]Uses both a control library and local statistics to minimize bias
SICER [14]Designed for detecting diffusely enriched regions; for example, histone modification
PeakSeq [24]Corrects for reference genome mappability and local statistics
SISSRs [25]High resolution, precise identification of binding-site location
F-seq [26]Uses kernel density estimation
", "tag_len": 40, "cell_len_max": 84, "width": 238, "height": 124, "type": "simple"}, "PMC3872294_001_00.png": {"html": "
HC (N = 20)FASD (N = 15)
Age (years)16.3 (2.1)15.3 (2.1)
IQ108 (15)*80 (15)*
Male/female (%male)12/8 (60%)10/5 (67%)
FASD sub diagnosis\u20138 FAS, 7 ARND
", "tag_len": 44, "cell_len_max": 19, "width": 251, "height": 88, "type": "simple"}, "PMC2915972_003_00.png": {"html": "
No of patients
Gender:
Men24
Women26
Age (years):
30-392
40-498
50-5915
60-6916
70-796
\u2265 803
Tumor site:
Bladder4
Breast10
Colorectal4
Esophageal9
Gynecological7
Lung6
Prostate10
Length of interval between baseline and follow-up interview
(median)
< 50 days22
\u2265 50 days28
", "tag_len": 142, "cell_len_max": 59, "width": 238, "height": 287, "type": "complex"}, "PMC4196076_004_00.png": {"html": "
miRNAChange relative to controlsDirection of regulationChromosomemiRNAChange relative to controlsDirection of regulationChromosome
hsa-miR-11812.13Up19hsa-miR-8742.97Up5
hsa-miR-125a-5p5.04Up19hsa-miR-8902.83UpX
hsa-miR-21-3p2.82Up17hsa-miR-9392.59Up8
hsa-miR-29b-1-5p3.12Up7hsa-miR-1290\u22127.56Down1
hsa-miR-3663-3p2.19Up10hsa-miR-1915-3p\u22122.63Down10
hsa-miR-3127-5p2.01Up2hsa-miR-2861\u22123.31Down9
hsa-miR-3663-3p2.03Up10hsa-miR-3665\u22122.37Down13
hsa-miR-371a-5p3.14Up19hsa-miR-4257\u22123.62Down1
hsa-miR-43272.95Up21hsa-miR-452-5p\u22122.54DownX
hsa-miR-584-5p2.31Up5hsa-miR-513a-5p\u22123.15DownX
hsa-miR-6025.74Up9hsa-miR-572\u22125.80Down4
hsa-miR-629-3p2.71Up15hsa-miR-629-3p\u22123.03Down15
hsa-miR-642b-3p2.10Up19hsa-miR-765\u22127.18Down1
hsa-miR-6513.91UpXhsa-miR-875-5p\u22123.91Down8
hsa-miR-7622.84Up16hsa-miR-940\u22122.31Down16
", "tag_len": 292, "cell_len_max": 29, "width": 486, "height": 236, "type": "simple"}, "PMC3160368_005_00.png": {"html": "
Methods (n-mers used)Average Sensitivity of 5-fold cross validation (%)Average Specificity of 5-fold cross validation (%)
FDAFSA(hexamers)84*86*
PromMachine(tetramers)86+81+
", "tag_len": 28, "cell_len_max": 52, "width": 238, "height": 71, "type": "simple"}, "PMC3707453_006_00.png": {"html": "
TFC Layer Thickness [\u03bcm]Star Magnitude 1Star Magnitude 6Saturation Charge [e-]Capacitance Linearity [%]
Signal @ 0.1s integr. [e-]Noise @ 0.1s integr. [e-]S/N at 10 bit A/D [dB]Signal @ 0.1s integr. [e-]Noise @ 0.1s integr. [e-]S/N at 10 bit A/D [dB]
0.51212004984718823581050000099.2
1.01439604265016101991327223298.6
1.51552204185017131471919710998.1
1.81599504185017591301917201897.8
2.01624004195017841221915957597.6
2.21645504205018071151914925497.5
", "tag_len": 160, "cell_len_max": 30, "width": 446, "height": 184, "type": "complex"}, "PMC4311460_007_00.png": {"html": "
NumberPatients
CategoryTypeCHP%(N = 4,560)%
IInflammation6,98711.33,53777.6
IIInfection3,6295.92,45153.8
IIIInjury5,5569.03,40174.6
IVSpecific conditions32,01651.9n.c.
VNeoplasms3,5925.82,461#54
Maligne1,4441,219 (27%)
Other-benign2,1481,758 (39%)
VICongenital4900.8n.c.
VIIOtherwise9,38315.2n.c.
TotalALL-types61,653100
", "tag_len": 220, "cell_len_max": 19, "width": 486, "height": 170, "type": "complex"}, "PMC5451934_004_00.png": {"html": "
ConditionPre Well-BeingPost Well-BeingPre-Post Change
TP (handler & dog interaction)46.33 \u00b1 7.41 148.69 \u00b1 7.22+2.36
DO (dog only interaction)49.78 \u00b1 7.9151.56 \u00b1 6.99+1.78 **
HO (handler only interaction)47.37 \u00b1 7.5746.43 \u00b1 8.03\u22120.94 **
", "tag_len": 44, "cell_len_max": 30, "width": 389, "height": 56, "type": "simple"}, "PMC5849724_006_00.png": {"html": "
AnalytesGC\u2013HRMSGC\u2013MS/MSGC\u2013MS
LOQ, (ng/CFPa)Estimated LOQ, (ng/cig)LOQ, (ng/CFPa)Estimated LOQ, (ng/cig)LOQ, (ng/CFPa)Estimated LOQ, (ng/cig)
Naphthalene0.510.0261178.7158.94108.175.41
Benzo[c]phenanthrene0.040.002NDND66.803.34
Benzo[a]anthracene0.030.00238.571.9338.111.91
Chrysene0.040.00250.132.5149.612.48
Cyclopenta[c,d]pyrene0.020.00148.842.4460.043.00
5-Methylchrysene0.040.002NDND2.480.12
Benzo[b]fluoranthene0.040.00211.440.575.080.25
Benzo[k]fluoranthene0.050.00312.410.625.070.25
Benzo[j]aceanthrylene0.090.005NDNDNDND
Benzo[a]pyrene0.040.0025.010.253.030.15
Indeno[1,2,3-c,d]pyrene0.020.0015.460.271.540.08
Dibenzo[a,h]anthracene0.070.0040.830.041.480.07
Dibenzo[a,l]pyrene0.050.003NDNDNDND
Dibenzo[a,e]pyrene0.040.0020.800.040.280.01
Dibenzo[a,i]pyrene0.060.0031.330.07NDND
Dibenzo[a,h]pyrene0.070.0042.990.15NDND
", "tag_len": 292, "cell_len_max": 27, "width": 486, "height": 253, "type": "complex"}, "PMC6022086_007_00.png": {"html": "
MethodData TypeMean (m)RMSE (m)P90% (m)PGSD (%)
Improved FCMGaofen-35.775.8910.0794.37
Sentinel-16.305.8314.0380.00
Original FCMGaofen-36.977.6613.8790.70
Sentinel-18.534.8113.1490.00
", "tag_len": 74, "cell_len_max": 12, "width": 409, "height": 77, "type": "complex"}, "PMC4297392_007_00.png": {"html": "
Treatment phaseAdverse eventNo. of patients
T1Swelling1
Itching1
Fever4
Throat infection1
Chest Congestion2
Total9
T2Diarrhea1
Body Pain1
Total2
T3Diarrhea1
Total1
T4Nil-
", "tag_len": 98, "cell_len_max": 17, "width": 238, "height": 185, "type": "complex"}, "PMC2094709_004_00.png": {"html": "
WeekDuration (min)Intensity (% HRR)Intensity (RPE)
12050 \u2013 609 \u2013 11
22050 \u2013 609 \u2013 11
3 \u2013 52560 \u2013 7011
6 \u2013 83060 \u2013 7011
9 \u2013 113070 \u2013 8011 \u2013 13
12 \u2013 143570 \u2013 8011 \u2013 13
15 & 164075 \u2013 8513 \u2013 15
", "tag_len": 84, "cell_len_max": 19, "width": 503, "height": 107, "type": "simple"}, "PMC3568059_003_00.png": {"html": "
Participants during the period;
0 to 3 months3 to 6 months6 to 12 months
Characteristicsn=72n=71n=65
Age, years, median (range)73 (50\u201394)73 (47\u201392)73 (47\u201390)
Patients, n (%)
Female33 (46)27 (38)26 (40)
Male39 (54)44 (62)39 (60)
Stroke classification (TOAST), n (%)
Large vessel disease17 (24)18 (25)17 (26)
Small vessel disease21 (29)21 (30)17 (26)
Cardioembolic stroke15 (21)11 (15)11 (17)
Cryptogenic stroke13 (18)14 (20)12 (19)
Intracerebral haemorrhage6 (8)7 (10)8 (12)
Side of lesion, n (%)
Right side lesion35 (49)32 (45)28 (43)
Left side lesion37 (51)39 (55)37 (57)
Hypertension47 (65)44 (62)41 (63)
Diabetes mellitus17 (24)18 (25)17 (26)
Results from clinical scales 1\u20137 days after stroke onset
BBS median (range) (n)35 (0\u201356) (n=71)41 (0\u201356) (n=70)41 (0\u201356) (n=64)
M-MAS UAS-95 median (range)45 (12\u201355) (n=65)47 (12\u201355) (n=65)50 (16\u201355) (n=59)
", "tag_len": 208, "cell_len_max": 56, "width": 486, "height": 296, "type": "complex"}, "PMC4357206_002_00.png": {"html": "
N = 121
Demographics
Age (yr) - median (IQR)62 (56-73)
Female sex (%)46 (38)
White race (%)112 (93)
Comorbidities (%)
Hypertension64 (53)
Chronic lung disease37 (31)
Active malignancy34 (28)
Diabetes mellitus29 (24)
Chronic kidney disease7 (6)
Congestive heart failure4 (3)
Chronic liver disease2 (2)
Severity of illness
APACHE II score - median (IQR)*14 (10-16)
Charlson Comorbidity Index - median (IQR)\u20202 (1-4)
ICU type
Surgical102 (84)
SICU66 (54)
TICU36 (30)
Nonsurgical19 (16)
CCU11 (9)
MICU8 (7)
Status of procedure (for surgical patients) (%)
Elective41 (34)
Urgent57 (47)
Days in hospital prior to enrollment \u2013 median (IQR)1 (1-3)
", "tag_len": 166, "cell_len_max": 51, "width": 238, "height": 381, "type": "simple"}, "PMC4219599_004_00.png": {"html": "
ORP (n = 9)RALP (n = 24)Total (n = 33)
Anthropometric data
Age (yr)60 (7)63 (6)62 (6)
Height (m)1.76 (0.07)1.75 (0.05)1.75 (0.06)
Weight (kg)92 (12)83 (10)86 (11)
BMI (kg.m-2)29.6 (4.5)27.3 (3.0)27.9 (3.6)
Preoperative factors
PSA (ng/mL)5.8 (4.2)5.0 (2.1)5.2 (2.8)
Preoperative Gleason score
3 + 31 (11%)5 (21%)6 (18%)
3 + 45 (56%)16 (67%)21 (64%)
4 + 33 (33%)2 (9%)5 (15%)
4 + 40 (0%)1 (4%)1 (3%)
Clinical tumour stage
cT14 (44%)13 (54%)17 (52%)
cT25 (56%)11 (46%)16 (48%)
cT30 (0%)0 (0%)0 (0%)
cT40 (0%)0 (0%)0 (0%)
Prostate volume (cc)40.2 (13.4)41.2 (12.5)40.9 (12.6)
Intraoperative factors
Nerve sparing
None3 (33%)3 (13%)6 (18%)
One bundle2 (22%)2 (9%)4 (12%)
Two bundles4 (44%)19 (79%)23 (70%)
Pelvic lymph node dissection7 (78%)2 (9%)a9 (27%)
Bladder neck preservation0 (0%)23 (96%)a23 (70%)
Postoperative factors
Postoperative Gleason score
3 + 31 (11%)3 (13%)4 (12%)
3 + 46 (67%)16 (67%)22 (67%)
4 + 32 (22%)5 (21%)7 (21%)
4 + 40 (0%)0 (0%)0 (0%)
Pathological tumour stage
pT26 (67%)18 (75%)24 (73%)
pT33 (33%)6 (25%)9 (27%)
pT40 (0%)0 (0%)0 (0%)
Positive lymph nodes1/7 (14%)0/2 (0%)1/9 (11%)
Positive margins2 (22%)2 (9%)4 (12%)
Duration of postoperative hospital stay (d)2.9 (0.3)2.0 (0.2)a2.3 (0.5)
Duration of postoperative catheterization (d)10.2 (3.0)8.4 (1.6)8.9 (2.2)
Anastomic structure0 (0%)1 (4%)1 (3%)
", "tag_len": 414, "cell_len_max": 45, "width": 486, "height": 577, "type": "simple"}, "PMC3765162_003_01.png": {"html": "
Men (n = 359)Women (n = 412)
Metabolic syndromeMetabolic syndrome
Baseline characteristicsYes (n = 163)No (n = 196)P valueYes (n = 96)No (n = 316)P value
Age (years)*61.86 (\u00b10.83)60.32 (\u00b10.77)0.1764.96 (\u00b10.88)58.52 (\u00b10.55)<0.001
Sitting Systolic BP (mmHg)*141.34 (\u00b11.27)132.26 (\u00b11.15)<0.001151.82 (\u00b11.16)137.49 (\u00b10.96)<0.001
Sitting Diastolic BP (mmHg)*85.69 (\u00b10.77)80.79 (\u00b10.73)<0.00189.27 (\u00b10.92)82.67 (\u00b10.51)<0.001
Antihypertensive Therapy (%)50.9%28.4%<0.00160.4%29.4%<0.001
Total Cholesterol (mmol/L)*5.61 (\u00b10.08)5.70 (\u00b10.08)0.566.04 (\u00b10.1)5.99 (\u00b10.06)0.67
LDL cholesterol (mmol/L)*3.44 (\u00b10.06)3.49 (\u00b10.06)0.523.58 (\u00b1 0.06)3.54 (\u00b1 0.04)0.66
HDL cholesterol (mmol/L)*1.03 (\u00b10.63)1.27 (\u00b10.02)<0.0011.20 (\u00b1 0.02)1.48 (\u00b10.016)<0.001
Triglycerides (mmol/L)\u20202.10 (1.63; 2.64)1.32 (0.98; 1.57)<0.0012.15 (1.78; 2.83)1.24 (0.97; 1.56)<0.001
Diabetes mellitus (%)30.7%6.3%<0.00133.3%2.3%<0.001
BMI (kg/m2)*29.88 (\u00b10.35)26.06 (\u00b10.2)<0.00132.39 (\u00b10.47)26.95 (\u00b10.25)<0.001
ApoA1 (g/L)*1.29 (\u00b10.013)1.40 (\u00b10.017)<0.0011.44 (\u00b10.02)1.55 (\u00b10.001)<0.001
ApoB (g/L)*1.21 (\u00b10.02)1.19 (\u00b10.02)0.481.23 (\u00b10.02)1.18 (\u00b10.014)0.044
Homa index\u20202.25(1.15; 4.18)0.94(0.51; 1.8)<0.0012.51 (1.67; 3.86)1.14 (0.72; 1.7)<0.001
IMTccMean (mm)*0.79 (\u00b10.15)0.76 (\u00b10.12)0.0840.77 (\u00b10.16)0.69 (\u00b10.13)<0.001
Sum of total plaque area (mm2)\u202053 (25; 103)42 (10;72)0.00216 (1; 44)8 (1;32)0.01
Sum of plaque area carotids (mm2)\u202022 (1; 39)12 (1; 27.5)0.0118.75 (1;25.75)1 (1; 19)0.013
Sum of plaque area femoral (mm2)\u202033(10; 62)23(1; 49)0.0111(1; 17.75)1(1; 6)0.012
", "tag_len": 316, "cell_len_max": 42, "width": 486, "height": 282, "type": "complex"}, "PMC5303243_003_00.png": {"html": "
CharacteristicsTotal (N = 613)MSSA(N = 508)MRSA (N = 105)OR (95%CI)P-value
Age (years)(median, quartiles)72 (66;79)75 (67;81)72 (65;78)N/A0.0048
Gender:Female322 (100.0)214 (82.3)57 (17.7)1.4 (0.93\u20132.16)0.5909
Male291 (100.0)255 (83.5)48 (16.5)
Step aging n (%)0,0849
Young Old311 (100.0)267 (85.9)44 (14.1)1.5 (1.00\u20132.35)
Old Old272 (100.0)219 (80.5)53 (19.5)0.7 (0.49\u20131.13)
Longevity30 (100.0)22 (73.3)8 (26.7)0.6 (0.24\u20131.27)
Disease n (%)<0.0001
PNU47 (100.0)28 (59.6)19 (40.4)0.3 (0.14\u20130.49)
BSI37 (100.0)27 (73.0)10 (27.0)0.5 (0.25\u20131.14)
SSTI416 (100.0)350 (84.1)66 (15.9)1.3 (0.85\u20132.03)
EI62 (100.0)56 (90.3)6 (9.7)1.7 (0.72\u20134.06)
Others51 (100.0)47 (92.2)4 (7.8)2.6 (0.91\u20137.31)
Place of the treatment infections n (%)0.0033
INPATIENTS430 (100.0)352 (81.4)78 (18.1)0.8 (0.49\u20131.26)
LTCF16 (100.0)9 (56.3)7 (43.8)0.3 (0.09\u20130.69)
OUTPATIENTS167 (100.0)147 (88.0)20 (12.0)1.7 (1.03\u20132.92)
Infections treated in hospitals (INPATIENTS N = 430, n (%))
ICU19 (100.0)12 (63.2)7 (36.8)2.8 (1.06\u20137.34)0.014
non-ICU411 (100.0)340 (82.7)71 (17.3)
", "tag_len": 290, "cell_len_max": 63, "width": 486, "height": 316, "type": "complex"}, "PMC4969833_016_01.png": {"html": "
HorizontalNormalVerticalTotal Object
Horizontal383546 (83%)
Normal154762 (87%)
Vertical22111401163 (98%)
", "tag_len": 52, "cell_len_max": 14, "width": 264, "height": 58, "type": "simple"}} -------------------------------------------------------------------------------- /mutab/metrics/sample_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "PMC2094709_004_00.png": 1.0, 3 | "PMC2871264_002_00.png": 1.0, 4 | "PMC2915972_003_00.png": 0.9298260149130074, 5 | "PMC3160368_005_00.png": 0.994615695248351, 6 | "PMC3568059_003_00.png": 0.9609420535891124, 7 | "PMC3707453_006_00.png": 0.8538903625110521, 8 | "PMC3765162_003_01.png": 0.9867342100509474, 9 | "PMC3872294_001_00.png": 0.9863636363636363, 10 | "PMC4196076_004_00.png": 0.9958653089334908, 11 | "PMC4219599_004_00.png": 0.6029978075326913, 12 | "PMC4297392_007_00.png": 0.8070175438596492, 13 | "PMC4311460_007_00.png": 0.6576923076923077, 14 | "PMC4357206_002_00.png": 0.9295181638546892, 15 | "PMC4445578_009_01.png": 0.6754965084868096, 16 | "PMC4969833_016_01.png": 1.0, 17 | "PMC5303243_003_00.png": 0.6494374120956399, 18 | "PMC5451934_004_00.png": 0.9978213507625272, 19 | "PMC5755158_010_01.png": 1.0, 20 | "PMC5849724_006_00.png": 0.9653439200120101, 21 | "PMC6022086_007_00.png": 1.0 22 | } 23 | -------------------------------------------------------------------------------- /mutab/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbone import TableResNet 2 | from .decoder import TableDecoder 3 | from .encoder import PositionalEncoding2D 4 | from .factory import build_detector 5 | from .handler import TableHandler 6 | from .loss import BBLoss, CELoss, KLLoss 7 | from .scanner import TableScanner 8 | 9 | __all__ = [ 10 | "BBLoss", 11 | "CELoss", 12 | "KLLoss", 13 | "PositionalEncoding2D", 14 | "TableDecoder", 15 | "TableHandler", 16 | "TableResNet", 17 | "TableScanner", 18 | "build_detector", 19 | ] 20 | -------------------------------------------------------------------------------- /mutab/models/backbone.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Mapping 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from mutab.models.factory import BACKBONES, GC_MODULES 8 | from mutab.models.factory import build_gc_module as build 9 | 10 | 11 | class BN(nn.BatchNorm2d): 12 | def __init__(self, d: int, mom=0.1): 13 | super().__init__(d, momentum=mom) 14 | 15 | 16 | class Conv(nn.Conv2d): 17 | def __init__(self, d: int, h: int, k: int): 18 | super().__init__(d, h, k, padding=k // 2, bias=False) 19 | 20 | 21 | class ConvBn(nn.Sequential): 22 | def __init__(self, d: int, h: int, k: int, mom=0.1): 23 | super().__init__(Conv(d, h, k), BN(h, mom=mom)) 24 | 25 | 26 | class ConvBnReLU(nn.Sequential): 27 | def __init__(self, d: int, h: int, k: int, mom=0.1): 28 | super().__init__(ConvBn(d, h, k, mom=mom), nn.ReLU()) 29 | 30 | 31 | @GC_MODULES.register_module() 32 | class GCA(nn.Module): 33 | def __init__(self, d: int, ratio: float, heads: int): 34 | super().__init__() 35 | neck = int(ratio * d) 36 | assert d % heads == 0 37 | self.size = d // heads 38 | self.prob = nn.Softmax(dim=2) 39 | self.mask = nn.Conv2d(self.size, 1, 1) 40 | self.norm = nn.LayerNorm([neck, 1, 1]) 41 | self.c1 = nn.Conv2d(d, neck, 1) 42 | self.c2 = nn.Conv2d(neck, d, 1) 43 | 44 | def forward(self, x): 45 | n, c, h, w = x.size() 46 | mask = self.mask(x.reshape(-1, self.size, h, w)) 47 | mask = self.prob(mask.flatten(-2).unsqueeze(-1)) 48 | y = x.reshape(-1, self.size, h * w).unsqueeze(1) 49 | y = torch.matmul(y, mask).reshape(n, c, 1, 1) 50 | return self.c2(F.relu(self.norm(self.c1(y)))).add(x) 51 | 52 | 53 | class ResidualBlock(nn.Module): 54 | def __init__(self, d: int, h: int, gca: List[str] = [], **gcb): 55 | super().__init__() 56 | self.cv1 = nn.Sequential() 57 | self.cv1.append(ConvBn(d, h, 3, mom=0.9)) 58 | self.cv1.append(nn.ReLU()) 59 | self.cv1.append(ConvBn(h, h, 3, mom=0.9)) 60 | self.cv1.extend(build(gcb, type=gc, d=h) for gc in gca) 61 | self.cv2 = ConvBn(d, h, 1) if d != h else nn.Identity() 62 | 63 | def forward(self, x): 64 | return F.relu(self.cv2(x).add(self.cv1(x))) 65 | 66 | 67 | class ResidualGroup(nn.Sequential): 68 | def __init__(self, d: int, h: int, depth: int, **gcb): 69 | super().__init__() 70 | self.append(ResidualBlock(d, h, **gcb)) 71 | self.extend(ResidualBlock(h, h) for _ in range(1, depth)) 72 | 73 | 74 | @BACKBONES.register_module() 75 | class TableResNet(nn.Sequential): 76 | def __init__( 77 | self, 78 | dim: int, 79 | out: int, 80 | gcb1: Mapping[str, Any], 81 | gcb2: Mapping[str, Any], 82 | gcb3: Mapping[str, Any], 83 | gcb4: Mapping[str, Any], 84 | ): 85 | super().__init__() 86 | 87 | ch1 = out // 8 88 | ch2 = out // 4 89 | ch3 = out // 2 90 | 91 | # group1 92 | self.append(ConvBnReLU(dim, ch1, 3)) 93 | self.append(ConvBnReLU(ch1, ch2, 3)) 94 | 95 | # group2 96 | self.append(nn.MaxPool2d(2, ceil_mode=True)) 97 | self.append(ResidualGroup(ch2, ch3, **gcb1)) 98 | 99 | # group3 100 | self.append(nn.MaxPool2d(2, ceil_mode=True)) 101 | self.append(ResidualGroup(ch3, ch3, **gcb2)) 102 | 103 | # group4 104 | self.append(nn.MaxPool2d(2, ceil_mode=True)) 105 | self.append(ResidualGroup(ch3, out, **gcb3)) 106 | self.append(ResidualGroup(out, out, **gcb4)) 107 | -------------------------------------------------------------------------------- /mutab/models/decoder.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import math 3 | from functools import partial 4 | from typing import List 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from positional_encodings import torch_encodings as pos 10 | from rotary_embedding_torch import RotaryEmbedding 11 | 12 | from mutab.models.factory import ATTENTIONS, DECODERS, build_attention 13 | 14 | 15 | class PositionalEncodingAdd(pos.PositionalEncoding1D): 16 | def forward(self, x): 17 | return super().forward(x).add(x) 18 | 19 | 20 | class Mask(nn.Module): 21 | def forward(self, x, mask): 22 | return x.where(mask, torch.finfo(x.dtype).min) 23 | 24 | 25 | class Linear(nn.Sequential): 26 | def __init__(self, d: int, h: int, *, act=nn.Identity): 27 | super().__init__(nn.LayerNorm(d), nn.Linear(d, h), act()) 28 | 29 | 30 | class Attention(nn.Module, abc.ABC): 31 | def __init__(self, heads: int, d_model: int, **kwargs): 32 | super().__init__() 33 | assert d_model % heads == 0 34 | self.dim = int(d_model // heads) 35 | self.lhd = (-1, heads, self.dim) 36 | self.q = Linear(d_model, d_model) 37 | self.k = Linear(d_model, d_model) 38 | self.v = Linear(d_model, d_model) 39 | self.w = Linear(d_model, d_model) 40 | 41 | def forward(self, q, k, v, **kwargs): 42 | q = self.q(q).view(len(q), *self.lhd).transpose(1, 2) 43 | k = self.k(k).view(len(k), *self.lhd).transpose(1, 2) 44 | v = self.v(v).view(len(v), *self.lhd).transpose(1, 2) 45 | x = self.attention(q, k, v, **kwargs).transpose(1, 2) 46 | return self.w(x.contiguous().flatten(start_dim=2)) 47 | 48 | @property 49 | @abc.abstractmethod 50 | def causal(self) -> bool: 51 | raise NotImplementedError 52 | 53 | @abc.abstractmethod 54 | def attention(self, q, k, v, **kwargs): 55 | raise NotImplementedError 56 | 57 | 58 | @ATTENTIONS.register_module() 59 | class GlobalAttention(Attention): 60 | def __init__(self, dropout: float, **kwargs): 61 | super().__init__(**kwargs) 62 | self.drop = nn.Dropout(dropout) 63 | self.mask = Mask() 64 | 65 | @property 66 | def causal(self): 67 | return False 68 | 69 | def attention(self, q, k, v, mask=None, **kwargs): 70 | p = q.matmul(k.mT.div(math.sqrt(v.size(-1)))) 71 | p = p if mask is None else self.mask(p, mask) 72 | return self.drop(p.softmax(dim=-1)).matmul(v) 73 | 74 | 75 | @ATTENTIONS.register_module() 76 | class WindowAttention(GlobalAttention): 77 | def __init__(self, window: int, **kwargs): 78 | super().__init__(**kwargs) 79 | self.rotary = RotaryEmbedding(self.dim) 80 | self.window = window 81 | 82 | @property 83 | def causal(self): 84 | return True 85 | 86 | def attention(self, q, k, v, **kwargs): 87 | # buckets 88 | bq = self.bucket(q) 89 | bk = self.unfold(self.bucket(k)) 90 | bv = self.unfold(self.bucket(v)) 91 | 92 | # indices 93 | n = int(bq.shape[-3:-1].numel()) 94 | i = torch.arange(n).to(q.device) 95 | i = self.bucket(i.unsqueeze(-1)) 96 | j = self.unfold(i).mT 97 | 98 | # masking 99 | mask = i.ge(j).logical_and(j.ne(-1)) 100 | 101 | # rotary embedding 102 | bq = self.rotary.rotate_queries_or_keys(bq) 103 | bk = self.rotary.rotate_queries_or_keys(bk) 104 | 105 | # global attention 106 | out = super().attention(q=bq, k=bk, v=bv, mask=mask) 107 | return out.flatten(-3, -2).narrow(-2, 0, q.size(-2)) 108 | 109 | def bucket(self, x): 110 | n = self.window * math.ceil(x.size(-2) / self.window) 111 | x = F.pad(x, pad=(0, 0, 0, n - x.size(-2)), value=-1) 112 | x = torch.stack(x.split(self.window, dim=-2), dim=-3) 113 | return x 114 | 115 | def unfold(self, x): 116 | pad = F.pad(x, pad=(0, 0, 0, 0, 1, 0), value=-1) 117 | pad = pad.narrow(-3, start=0, length=x.size(-3)) 118 | return torch.cat([pad, x], dim=-2) 119 | 120 | 121 | @ATTENTIONS.register_module() 122 | class AbsentAttention(nn.Module): 123 | def __init__(self, **kwargs): 124 | super().__init__() 125 | 126 | def forward(self, q, k, v, **kwargs): 127 | return torch.zeros_like(q) 128 | 129 | 130 | class FeedForward(nn.Sequential): 131 | def __init__(self, d_model: int, **kwargs): 132 | super().__init__() 133 | self.append(Linear(d_model, d_model, act=nn.ReLU)) 134 | self.append(Linear(d_model, d_model, act=nn.Identity)) 135 | 136 | 137 | class Block(nn.Module): 138 | def __init__(self, att1, att2, **kwargs): 139 | super().__init__() 140 | self.att1 = build_attention(att1, **kwargs) 141 | self.att2 = build_attention(att2, **kwargs) 142 | self.feed = FeedForward(**kwargs) 143 | 144 | def forward(self, kwargs): 145 | kwargs.update(**self.perform(**kwargs)) 146 | return kwargs 147 | 148 | def perform(self, x, y, mask=None, **kwargs): 149 | x = x.add(self.att1(x, x, x, mask=mask)) 150 | x = x.add(self.att2(x, y, y, mask=None)) 151 | x = x.add(self.feed(x)) 152 | return dict(x=x) 153 | 154 | 155 | class Blocks(nn.Sequential): 156 | def __init__(self, blocks, **kwargs): 157 | block = lambda args: Block(**args, **kwargs) 158 | super().__init__(*tuple(map(block, blocks))) 159 | 160 | def forward(self, **kwargs): 161 | return super().forward(kwargs).get("x") 162 | 163 | 164 | class Fetcher(nn.Module): 165 | def __init__(self, SOC: int, EOS: int, **kwargs): 166 | super().__init__() 167 | 168 | # special tokens 169 | self.register_buffer("SOC", torch.tensor(SOC)) 170 | self.register_buffer("EOS", torch.tensor(EOS)) 171 | 172 | def extract(self, x, mask, size): 173 | return F.pad(x[mask], pad=(0, 0, 0, size - sum(mask))) 174 | 175 | def forward(self, img, hid, seq): 176 | assert hid.ndim == 3 177 | assert seq.ndim == 2 178 | 179 | # masking 180 | soc = torch.isin(seq, self.SOC).unsqueeze(2) 181 | eos = torch.isin(seq, self.EOS).unsqueeze(2) 182 | 183 | # padding 184 | soc = soc.logical_and(eos.cumsum(dim=1).logical_not()) 185 | pad = partial(self.extract, size=soc.sum(dim=1).max()) 186 | 187 | # extract 188 | ext = torch.stack(list(map(pad, hid, soc.squeeze(2)))) 189 | 190 | return hid, ext 191 | 192 | 193 | class Decoder(nn.Module): 194 | def __init__( 195 | self, 196 | d_input: int, 197 | d_model: int, 198 | num_emb: int, 199 | max_len: int, 200 | SOS: int, 201 | EOS: int, 202 | SEP: int, 203 | **kwargs, 204 | ): 205 | super().__init__() 206 | 207 | # special tokens 208 | self.register_buffer("SOS", torch.tensor(SOS)) 209 | self.register_buffer("EOS", torch.tensor(EOS)) 210 | self.register_buffer("SEP", torch.tensor(SEP)) 211 | 212 | # embedding 213 | self.emb = nn.Embedding(num_emb, d_model) 214 | self.pos = PositionalEncodingAdd(d_model) 215 | 216 | # blocks 217 | self.dec = Blocks(d_model=d_model, **kwargs) 218 | self.cat = Linear(d_input, d_model) 219 | self.out = Linear(d_model, num_emb) 220 | 221 | # prediction length 222 | self.max_len = max_len 223 | 224 | def predict(self, img, aux): 225 | seq = self.SOS.expand(len(img), 1) 226 | eos = self.EOS.expand(len(img), 1) 227 | for _ in range(self.max_len + 1): 228 | h, out = self(img, seq, aux, argmax=True) 229 | seq = torch.cat([seq[:, :1], out], dim=1) 230 | end = seq.eq(eos).sum(dim=1).bool().sum() 231 | if end.item() == len(img): 232 | break 233 | 234 | return h, out 235 | 236 | def forward(self, img, seq, aux, argmax=False): 237 | # alignment 238 | idx = torch.eq(seq, self.SEP).cumsum(dim=1).unsqueeze(-1) 239 | mat = torch.zeros(*seq.shape, aux.size(1)).to(aux.device) 240 | mat = mat.scatter_(-1, idx.clip_(max=aux.size(1) - 1), 1) 241 | mix = torch.cat([self.emb(seq), mat.matmul(aux)], dim=-1) 242 | 243 | # prediction 244 | hid = self.dec(x=self.pos(self.cat(mix)), y=img, mask=None) 245 | out = self.out(hid).argmax(-1) if argmax else self.out(hid) 246 | 247 | return hid, out 248 | 249 | 250 | @DECODERS.register_module() 251 | class TableDecoder(nn.Module): 252 | def __init__( 253 | self, 254 | d_model: int, 255 | html_decoder, 256 | cell_decoder, 257 | html_fetcher, 258 | num_emb_html: int, 259 | num_emb_cell: int, 260 | max_len_html: int, 261 | max_len_cell: int, 262 | SOC_HTML: List[int], 263 | SOS_HTML: int, 264 | EOS_HTML: int, 265 | SOS_CELL: int, 266 | EOS_CELL: int, 267 | SEP_CELL: int, 268 | **kwargs, 269 | ): 270 | super().__init__() 271 | 272 | # parameters 273 | html_decoder.update(d_model=d_model) 274 | cell_decoder.update(d_model=d_model) 275 | 276 | # alphabet 277 | html_decoder.update(num_emb=num_emb_html) 278 | cell_decoder.update(num_emb=num_emb_cell) 279 | 280 | # capacity 281 | html_decoder.update(max_len=max_len_html) 282 | cell_decoder.update(max_len=max_len_cell) 283 | 284 | # special tokens 285 | html_decoder.update(SOS=SOS_HTML) 286 | html_decoder.update(EOS=EOS_HTML) 287 | html_decoder.update(SEP=EOS_HTML) 288 | 289 | cell_decoder.update(SOS=SOS_CELL) 290 | cell_decoder.update(EOS=EOS_CELL) 291 | cell_decoder.update(SEP=SEP_CELL) 292 | 293 | html_fetcher.update(SOC=SOC_HTML) 294 | html_fetcher.update(EOS=EOS_HTML) 295 | 296 | # input channels 297 | html_decoder.update(d_input=d_model + 2) 298 | cell_decoder.update(d_input=d_model * 2) 299 | 300 | # other parameters 301 | html_decoder.update(**kwargs) 302 | cell_decoder.update(**kwargs) 303 | 304 | # en/decoders 305 | self.html = Decoder(**html_decoder) 306 | self.cell = Decoder(**cell_decoder) 307 | self.grid = Fetcher(**html_fetcher) 308 | 309 | # bbox 310 | self.bbox = Linear(d_model, 4, act=nn.Sigmoid) 311 | 312 | # LtoR or RtoL 313 | self.register_buffer("LtoR", torch.eye(2)[0]) 314 | self.register_buffer("RtoL", torch.eye(2)[1]) 315 | 316 | def forward(self, img, html, back, cell, **kwargs): 317 | # ground truth 318 | html = html.to(img.device) 319 | back = back.to(img.device) 320 | cell = cell.to(img.device) 321 | 322 | # remove [EOS] 323 | s_html = html[:, :-1] 324 | e_back = back[:, :-1] 325 | s_cell = cell[:, :-1] 326 | 327 | # remove [SOS] 328 | e_html = html[:, 1::] 329 | 330 | # LtoR or RtoL 331 | h_LtoR = self.LtoR.expand(len(img), 1, 2) 332 | h_RtoL = self.RtoL.expand(len(img), 1, 2) 333 | 334 | # structure prediction 335 | h_html, o_html = self.html(img, s_html, h_LtoR) 336 | h_back, o_back = self.html(img, e_back, h_RtoL) 337 | 338 | # character prediction 339 | h_html, h_grid = self.grid(img, h_html, e_html) 340 | h_cell, o_cell = self.cell(img, s_cell, h_grid) 341 | 342 | return dict( 343 | html=o_html, 344 | back=o_back, 345 | cell=o_cell, 346 | bbox=self.bbox(h_html), 347 | ) 348 | 349 | def predict(self, img): 350 | # LtoR 351 | h_LtoR = self.LtoR.expand(len(img), 1, 2) 352 | 353 | # structure prediction 354 | h_html, o_html = self.html.predict(img, h_LtoR) 355 | 356 | # character prediction 357 | h_html, h_grid = self.grid(img, h_html, o_html) 358 | h_cell, o_cell = self.cell.predict(img, h_grid) 359 | 360 | return dict(html=o_html, cell=o_cell, bbox=self.bbox(h_html)) 361 | -------------------------------------------------------------------------------- /mutab/models/encoder.py: -------------------------------------------------------------------------------- 1 | from positional_encodings import torch_encodings as pos 2 | 3 | from mutab.models.factory import ENCODERS 4 | 5 | 6 | @ENCODERS.register_module() 7 | class PositionalEncoding2D(pos.PositionalEncodingPermute2D): 8 | def forward(self, img): 9 | return super().forward(img).add(img).flatten(2).mT 10 | -------------------------------------------------------------------------------- /mutab/models/factory.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | from mmdet.models.builder import BACKBONES, DETECTORS, LOSSES 3 | 4 | HANDLERS = Registry("handler") 5 | ENCODERS = Registry("encoder") 6 | DECODERS = Registry("decoder") 7 | ATTENTIONS = Registry("attentions") 8 | GC_MODULES = Registry("gc-modules") 9 | 10 | 11 | def build_from_dict(cfg, registry, **kwargs): 12 | return build_from_cfg(dict(**cfg, **kwargs), registry) 13 | 14 | 15 | def build_detector(cfg, **kwargs): 16 | return build_from_dict(cfg, DETECTORS, **kwargs) 17 | 18 | 19 | def build_backbone(cfg, **kwargs): 20 | return build_from_dict(cfg, BACKBONES, **kwargs) 21 | 22 | 23 | def build_encoder(cfg, **kwargs): 24 | return build_from_dict(cfg, ENCODERS, **kwargs) 25 | 26 | 27 | def build_decoder(cfg, **kwargs): 28 | return build_from_dict(cfg, DECODERS, **kwargs) 29 | 30 | 31 | def build_handler(cfg, **kwargs): 32 | return build_from_dict(cfg, HANDLERS, **kwargs) 33 | 34 | 35 | def build_loss(cfg, **kwargs): 36 | return build_from_dict(cfg, LOSSES, **kwargs) 37 | 38 | 39 | def build_gc_module(cfg, **kwargs): 40 | return build_from_dict(cfg, GC_MODULES, **kwargs) 41 | 42 | 43 | def build_attention(cfg, **kwargs): 44 | return build_from_dict(cfg, ATTENTIONS, **kwargs) 45 | -------------------------------------------------------------------------------- /mutab/models/handler.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from functools import cached_property 3 | from itertools import product 4 | from typing import Dict, List 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from more_itertools import flatten, split_at 10 | 11 | from mutab.models.factory import HANDLERS 12 | from mutab.models.revisor import Revisor 13 | 14 | 15 | @HANDLERS.register_module() 16 | class TableHandler(nn.Module): 17 | def __init__( 18 | self, 19 | html_dict_file: str, 20 | cell_dict_file: str, 21 | SOC: List[str], 22 | EOC: List[str], 23 | revisor: Dict[str, str], 24 | ): 25 | super().__init__() 26 | 27 | assert isinstance(html_dict_file, str) 28 | assert isinstance(cell_dict_file, str) 29 | 30 | assert isinstance(SOC, list) 31 | assert isinstance(EOC, list) 32 | 33 | self.SOC = SOC 34 | 35 | self.char2idx_html, self.idx2char_html = self.load(html_dict_file) 36 | self.char2idx_cell, self.idx2char_cell = self.load(cell_dict_file) 37 | 38 | self.SOS_HTML = self.add(self.char2idx_html, self.idx2char_html, "") 39 | self.EOS_HTML = self.add(self.char2idx_html, self.idx2char_html, "") 40 | self.PAD_HTML = self.add(self.char2idx_html, self.idx2char_html, "") 41 | self.UKN_HTML = self.add(self.char2idx_html, self.idx2char_html, "") 42 | 43 | self.SOS_CELL = self.add(self.char2idx_cell, self.idx2char_cell, "") 44 | self.EOS_CELL = self.add(self.char2idx_cell, self.idx2char_cell, "") 45 | self.PAD_CELL = self.add(self.char2idx_cell, self.idx2char_cell, "") 46 | self.SEP_CELL = self.add(self.char2idx_cell, self.idx2char_cell, "") 47 | self.UKN_CELL = self.add(self.char2idx_cell, self.idx2char_cell, "") 48 | 49 | assert len(self.char2idx_html) == len(self.idx2char_html) 50 | assert len(self.char2idx_cell) == len(self.idx2char_cell) 51 | 52 | self.char2idx_html = defaultdict(lambda: self.UKN_HTML, self.char2idx_html) 53 | self.char2idx_cell = defaultdict(lambda: self.UKN_CELL, self.char2idx_cell) 54 | 55 | self.revisor = Revisor(**revisor, SOC=SOC, EOC=EOC) 56 | 57 | def load(self, dict_file: str, enc="utf-8"): 58 | with open(dict_file, encoding=enc) as f: 59 | idx2char = list(filter(None, f.read().splitlines())) 60 | char2idx = dict(zip(idx2char, range(len(idx2char)))) 61 | return char2idx, idx2char 62 | 63 | def add(self, char2idx, idx2char, token: str): 64 | idx = len(idx2char) 65 | idx2char.append(token) 66 | char2idx[token] = idx 67 | return idx 68 | 69 | @property 70 | def num_class_html(self): 71 | return len(self.idx2char_html) 72 | 73 | @property 74 | def num_class_cell(self): 75 | return len(self.idx2char_cell) 76 | 77 | @cached_property 78 | def SOC_HTML(self): 79 | return list(self.char2idx_html[v] for v in self.SOC) 80 | 81 | def str2idx(self, strings, char2idx): 82 | return list([char2idx[v] for v in string] for string in strings) 83 | 84 | def idx2str(self, indices, idx2char, join=lambda tokens: tokens): 85 | return list(join([idx2char[i] for i in idx]) for idx in indices) 86 | 87 | def pad_tensor(self, batch, value): 88 | pad = lambda seq, size: F.pad(seq, (0, size - len(seq)), value=value) 89 | return torch.stack([pad(seq, max(map(len, batch))) for seq in batch]) 90 | 91 | def encode_html(self, batch): 92 | samples = [] 93 | for idx in self.str2idx(batch, self.char2idx_html): 94 | idx = (self.SOS_HTML, *idx, self.EOS_HTML) 95 | samples.append(torch.tensor(idx)) 96 | return self.pad_tensor(samples, self.PAD_HTML) 97 | 98 | def encode_cell(self, batch): 99 | samples = [] 100 | sos = self.SOS_CELL 101 | eos = self.EOS_CELL 102 | sep = self.SEP_CELL 103 | for sample in batch: 104 | item = self.str2idx(sample, self.char2idx_cell) 105 | item = flatten(flatten(product(item, [[sep]]))) 106 | samples.append(torch.tensor([sos, *item, eos])) 107 | return self.pad_tensor(samples, self.PAD_CELL) 108 | 109 | def decode_html(self, batch): 110 | strip = lambda it: next(split_at(it, lambda n: n == self.EOS_HTML)) 111 | return self.idx2str(map(strip, batch.tolist()), self.idx2char_html) 112 | 113 | def decode_cell(self, batch): 114 | strings = [] 115 | for idx in batch.tolist(): 116 | idx = next(split_at(idx, lambda n: n == self.EOS_CELL)) 117 | idx = list(split_at(idx, lambda n: n == self.SEP_CELL)) 118 | strings.append(self.idx2str(idx, self.idx2char_cell, "".join)) 119 | return strings 120 | 121 | def encode_bbox(self, batch): 122 | pad = lambda bb, k: F.pad(torch.from_numpy(bb), (0, 0, 1, k - len(bb))) 123 | return torch.stack([pad(bb, 1 + max(map(len, batch))) for bb in batch]) 124 | 125 | def decode_bbox(self, batch, mask, img_metas): 126 | results = [] 127 | for bbox, mask, meta in zip(batch, mask, img_metas): 128 | bbox = bbox.cpu().numpy() 129 | mask = mask.cpu().numpy() 130 | scale = meta["img_scale"] 131 | shape = meta["pad_shape"] 132 | bbox[:, 0::2] *= shape[1] 133 | bbox[:, 1::2] *= shape[0] 134 | bbox[:, 0::2] /= scale[1] 135 | bbox[:, 1::2] /= scale[0] 136 | results.append(bbox[mask]) 137 | return results 138 | 139 | def item(self, html, cell, bbox, img_meta): 140 | results = dict(real=self.revisor(**img_meta) if "html" in img_meta else None) 141 | results.update(html=html, cell=cell, bbox=bbox, pred=self.revisor(html, cell)) 142 | return results 143 | 144 | def forward(self, img_metas): 145 | html = self.encode_html([m["html"] for m in img_metas]) 146 | cell = self.encode_cell([m["cell"] for m in img_metas]) 147 | bbox = self.encode_bbox([m["bbox"] for m in img_metas]) 148 | return dict(html=html, back=html.fliplr(), cell=cell, bbox=bbox) 149 | 150 | def reverse(self, html, cell, bbox, img_metas, **kwargs): 151 | mask = torch.isin(html, torch.tensor(self.SOC_HTML).to(html)) 152 | bbox = self.decode_bbox(bbox, mask=mask, img_metas=img_metas) 153 | html = self.decode_html(html) 154 | cell = self.decode_cell(cell) 155 | return tuple(map(self.item, html, cell, bbox, img_metas)) 156 | -------------------------------------------------------------------------------- /mutab/models/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from mutab.models.factory import LOSSES 5 | 6 | 7 | @LOSSES.register_module() 8 | class CELoss(nn.Module): 9 | def __init__(self, key: str, ignore_index: int): 10 | super().__init__() 11 | 12 | # keys 13 | self.key = key 14 | self.label = "loss_ce_{}".format(key) 15 | 16 | # loss 17 | self.loss = self.build_loss(ignore_index) 18 | 19 | def build_loss(self, ignore_index): 20 | return nn.CrossEntropyLoss(ignore_index=ignore_index) 21 | 22 | def format(self, outputs, targets): 23 | # outputs [N, C, L] 24 | # targets [N, L] 25 | logit = outputs[self.key].mT 26 | label = targets[self.key][:, 1:] 27 | return logit, label.to(logit.device) 28 | 29 | def forward(self, outputs, targets, img_metas=None): 30 | logit, label = self.format(outputs, targets) 31 | return {self.label: self.loss(logit, label)} 32 | 33 | 34 | @LOSSES.register_module() 35 | class KLLoss(nn.Module): 36 | def __init__(self, key: str, rev: str, ignore_index: int): 37 | super().__init__() 38 | 39 | # keys 40 | self.key = key 41 | self.rev = rev 42 | 43 | # labels 44 | self.loss_key = f"loss_kl_{key}" 45 | self.loss_rev = f"loss_kl_{rev}" 46 | 47 | # prob 48 | self.sm_p = nn.Softmax(dim=2) 49 | self.sm_q = nn.LogSoftmax(dim=2) 50 | 51 | # loss 52 | self.loss = self.build_loss("sum") 53 | 54 | # 55 | pad = torch.tensor(ignore_index) 56 | self.register_buffer("PAD", pad) 57 | 58 | def build_loss(self, reduction): 59 | return nn.KLDivLoss(reduction=reduction) 60 | 61 | def format(self, outputs, targets): 62 | # outputs [N, L, C] 63 | logit_f = outputs[self.key][:, :-1] 64 | logit_b = outputs[self.rev][:, :-1].fliplr() 65 | 66 | # detect 67 | text = targets[self.key][:, 1:-1].unsqueeze(-1) 68 | mask = ~torch.isin(text.to(self.PAD), self.PAD) 69 | 70 | # P: target 71 | p_f = self.sm_p(logit_b.mul(mask)).detach() 72 | p_b = self.sm_p(logit_f.mul(mask)).detach() 73 | 74 | # Q: output 75 | q_f = self.sm_q(logit_f.mul(mask)) 76 | q_b = self.sm_q(logit_b.mul(mask)) 77 | 78 | return (q_f, p_f), (q_b, p_b), mask 79 | 80 | def forward(self, outputs, targets, img_metas=None): 81 | qp_f, qp_b, mask = self.format(outputs, targets) 82 | kl_f = self.loss(*qp_f).div(mask.sum().clamp(1)) 83 | kl_b = self.loss(*qp_b).div(mask.sum().clamp(1)) 84 | return {self.loss_key: kl_f, self.loss_rev: kl_b} 85 | 86 | 87 | @LOSSES.register_module() 88 | class BBLoss(nn.Module): 89 | def __init__(self, ignore_index: str): 90 | super().__init__() 91 | 92 | # loss 93 | self.loss = self.build_loss("sum") 94 | 95 | # 96 | pad = torch.tensor(ignore_index) 97 | self.register_buffer("PAD", pad) 98 | 99 | def build_loss(self, reduction): 100 | return nn.L1Loss(reduction=reduction) 101 | 102 | def format(self, outputs, targets): 103 | # outputs [N, L, 4] 104 | pred = outputs["bbox"] 105 | 106 | # targets [N, L, 4] 107 | bbox = targets["bbox"][:, 1:].to(pred.device) 108 | 109 | # structural tokens 110 | html = targets["html"][:, 1:].to(pred.device) 111 | 112 | # detect 113 | mask = ~torch.eq(html, self.PAD).unsqueeze(-1) 114 | 115 | # remove 116 | pred = pred.masked_select(mask) 117 | bbox = bbox.masked_select(mask) 118 | 119 | assert pred.dim() == 1 120 | assert bbox.dim() == 1 121 | 122 | # samples 123 | pair_h = pred[0::2], bbox[0::2] 124 | pair_v = pred[1::2], bbox[1::2] 125 | 126 | return pair_h, pair_v, mask 127 | 128 | def forward(self, outputs, targets, img_metas=None): 129 | pair_h, pair_v, mask = self.format(outputs, targets) 130 | loss_h = self.loss(*pair_h).div(mask.sum().clamp(1)) 131 | loss_v = self.loss(*pair_v).div(mask.sum().clamp(1)) 132 | return dict(loss_h=loss_h, loss_v=loss_v) 133 | -------------------------------------------------------------------------------- /mutab/models/revisor.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Dict, List 3 | 4 | 5 | class Revisor: 6 | def __init__( 7 | self, 8 | SOC: List[str], 9 | EOC: List[str], 10 | template: str, 11 | patterns: Dict[str, Dict[str, str]], 12 | ): 13 | assert isinstance(SOC, list) 14 | assert isinstance(EOC, list) 15 | 16 | self.SOC = SOC 17 | self.EOC = EOC 18 | 19 | assert isinstance(template, str) 20 | assert isinstance(patterns, dict) 21 | 22 | self.template = template 23 | self.patterns = patterns 24 | 25 | def merge(self, html, cell): 26 | contents = iter(cell) 27 | internal = False 28 | restored = [] 29 | for idx, el in enumerate(html): 30 | if el in self.SOC: 31 | internal = True 32 | if internal and el in self.EOC: 33 | ch = "".join(next(contents, "")) 34 | el = el.replace(" 1: 79 | nn.init.xavier_uniform_(p) 80 | 81 | @auto_fp16(apply_to=["img"]) 82 | def forward(self, img, img_metas, return_loss=True, **kwargs): 83 | if return_loss: 84 | return self.forward_train(img, img_metas) 85 | elif isinstance(img_metas[0], list): 86 | return self.forward_test(img, img_metas[0]) 87 | else: 88 | return self.forward_test(img, img_metas) 89 | 90 | def train_step(self, data, optimizer): 91 | loss = self.parse_losses(self(**data)) 92 | loss.update(num_samples=len(data["img_metas"])) 93 | return loss 94 | 95 | def val_step(self, data, optimizer): 96 | loss = self.parse_losses(self(**data)) 97 | loss.update(num_samples=len(data["img_metas"])) 98 | return loss 99 | 100 | def parse_losses(self, losses): 101 | logs = dict({k: v.mean() for k, v in losses.items()}) 102 | loss = sum(v for k, v in logs.items() if "loss" in k) 103 | logs.update(loss=loss) 104 | for key, value in logs.items(): 105 | # reduce loss when distributed training 106 | if dist.is_available() and dist.is_initialized(): 107 | value = value.data.clone() 108 | world = int(dist.get_world_size()) 109 | dist.all_reduce(value.div_(world)) 110 | logs[key] = value.item() 111 | return dict(loss=loss, log_vars=logs) 112 | 113 | def forward_train(self, image, img_metas): 114 | targets = self.handler.forward(img_metas) 115 | outputs = self.decoder(self.encoder(self.backbone(image)), **targets) 116 | return ChainMap(*[f(outputs, targets, img_metas) for f in self.loss]) 117 | 118 | def forward_test(self, images, img_metas): 119 | return self.simple_test(images, img_metas) 120 | 121 | def simple_test(self, image, img_metas): 122 | outputs = self.decoder.predict(self.encoder(self.backbone(image))) 123 | return self.handler.reverse(**outputs, img_metas=tuple(img_metas)) 124 | 125 | def predict(self, path: str): 126 | return dict(path=path, **model_inference(self, imread(path))) 127 | -------------------------------------------------------------------------------- /mutab/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | from mutab.optimizer.factory import OPTIMIZERS 2 | 3 | __all__ = ["OPTIMIZERS"] 4 | -------------------------------------------------------------------------------- /mutab/optimizer/factory.py: -------------------------------------------------------------------------------- 1 | from inspect import isclass 2 | 3 | from mmcv.runner.optimizer.builder import OPTIMIZERS 4 | from ranger.ranger2020 import Ranger 5 | from torch import optim 6 | from torch.optim import Optimizer 7 | 8 | 9 | def register_torch_optimizers(): 10 | for name in dir(optim): 11 | if name.startswith("__"): 12 | continue 13 | _optim = getattr(optim, name) 14 | if isclass(_optim) and issubclass(_optim, Optimizer): 15 | if name not in OPTIMIZERS.module_dict.keys(): 16 | OPTIMIZERS.register_module()(_optim) 17 | 18 | if isclass(Ranger) and issubclass(Ranger, Optimizer): 19 | OPTIMIZERS.register_module()(Ranger) 20 | 21 | 22 | register_torch_optimizers() 23 | -------------------------------------------------------------------------------- /mutab/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import collect_env, get_logger, pretty_env, visualize_bbox 2 | 3 | __all__ = ["collect_env", "get_logger", "pretty_env", "visualize_bbox"] 4 | -------------------------------------------------------------------------------- /mutab/utils/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import mmcv.utils as utils 5 | 6 | 7 | def get_logger(**kwargs): 8 | return utils.get_logger("mmdet", **kwargs) 9 | 10 | 11 | def collect_env(): 12 | return dict(**utils.collect_env(), commit=utils.get_git_hash()) 13 | 14 | 15 | def pretty_env(bar: str): 16 | contents = list(f"{k}: {v}" for k, v in collect_env().items()) 17 | return "\n".join(["", bar] + contents + [bar, ""]) 18 | 19 | 20 | def visualize_bbox(bbox, path, save, **kwargs): 21 | img = cv2.imread(path) 22 | for x, y, w, h in bbox: 23 | a = int(x - w / 2), int(y - h / 2) 24 | b = int(x + w / 2), int(y + h / 2) 25 | img = cv2.rectangle(img, a, b, (0, 0, 255), thickness=1) 26 | cv2.imwrite(os.path.join(save, os.path.basename(path)), img) 27 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "mutab" 3 | version = "0.1.0" 4 | dependencies = [ 5 | "apted", 6 | "distance", 7 | "lxml", 8 | "mmcv-full<2", 9 | "mmdet<3", 10 | "mmocr<1", 11 | "more-itertools", 12 | "numpy", 13 | "positional-encodings[pytorch]", 14 | "ranger@git+https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer", 15 | "rotary-embedding-torch", 16 | "torch>=2,<2.1", 17 | "tqdm", 18 | "urllib3<2", 19 | "yapf==0.40.1", 20 | ] 21 | 22 | [tool.setuptools.packages.find] 23 | include = ["mutab"] 24 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import lzma 4 | import os 5 | import pickle 6 | import time 7 | from datetime import timedelta as td 8 | from glob import glob 9 | from pathlib import Path 10 | 11 | import numpy as np 12 | from more_itertools import divide 13 | from torch.multiprocessing import set_start_method 14 | from tqdm import tqdm 15 | 16 | from mutab.apis import evaluate 17 | from mutab.utils import visualize_bbox 18 | 19 | EASY = "simple" 20 | HARD = "complex" 21 | 22 | 23 | def main(): 24 | args = argparse.ArgumentParser() 25 | args.add_argument("--gpus", type=int, default=4) 26 | args.add_argument("--show", action="store_true") 27 | args.add_argument("--ckpt", type=str, default="latest.pth") 28 | args.add_argument("--save", type=str, default="results.xz") 29 | args.add_argument("--json", type=str, required=True) 30 | args.add_argument("--conf", type=str, required=True) 31 | args.add_argument("--path", type=str, required=True) 32 | args = args.parse_args() 33 | 34 | root = Path(args.ckpt).parent.expanduser() 35 | 36 | with open(args.json) as f: 37 | jsonl_ground_truth = json.load(f) 38 | 39 | set_start_method("spawn") 40 | count = time.perf_counter() 41 | paths = divide(args.gpus, glob(os.path.join(args.path, "*.png"))) 42 | items = evaluate(paths, args.conf, args.ckpt, jsonl_ground_truth) 43 | count = td(seconds=time.perf_counter() - count) / td(hours=1) 44 | 45 | easy = list(v for v in items.values() if v["type"] == EASY) 46 | hard = list(v for v in items.values() if v["type"] == HARD) 47 | 48 | summary = {} 49 | summary.update(html=np.mean([v["TEDS"]["html"] for v in items.values()])) 50 | summary.update(full=np.mean([v["TEDS"]["full"] for v in items.values()])) 51 | summary.update(easy=np.mean([v["TEDS"]["full"] for v in easy])) 52 | summary.update(hard=np.mean([v["TEDS"]["full"] for v in hard])) 53 | 54 | with open(root.joinpath("{}.log".format(args.save)), "w") as f: 55 | print(f"{len(items)} samples in {count:.2f} hours:", file=f) 56 | print(f"AVG TEDS html score: {summary['html']:.4f}", file=f) 57 | print(f"AVG TEDS full score: {summary['full']:.4f}", file=f) 58 | print(f"AVG TEDS easy score: {summary['easy']:.4f}", file=f) 59 | print(f"AVG TEDS hard score: {summary['hard']:.4f}", file=f) 60 | 61 | with lzma.open(root.joinpath(args.save), "wb") as f: 62 | pickle.dump(dict(results=items, summary=summary, **vars(args)), f) 63 | 64 | if args.show: 65 | for name, item in tqdm(list(items.items())): 66 | visualize_bbox(**item, save=root) 67 | 68 | 69 | if __name__ == "__main__": 70 | main() 71 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import argparse 3 | 4 | from mmcv import Config 5 | from mmcv.runner import init_dist 6 | from torch.multiprocessing import set_start_method 7 | 8 | from mutab.apis import train 9 | 10 | 11 | def main(): 12 | args = argparse.ArgumentParser() 13 | args.add_argument("config") 14 | args.add_argument("--work-dir", required=True) 15 | args.add_argument("--launcher", required=False) 16 | args, _ = args.parse_known_args() 17 | 18 | cfg = Config.fromfile(args.config) 19 | cfg.update(**vars(args)) 20 | set_start_method("fork") 21 | 22 | if args.launcher is not None: 23 | init_dist(args.launcher, **cfg.dist_params) 24 | 25 | train(cfg, args.config) 26 | 27 | 28 | if __name__ == "__main__": 29 | main() 30 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ $# -lt 3 ] 4 | then 5 | echo "Usage: bash $0 CONFIG WORK_DIR GPUS" 6 | exit 7 | fi 8 | 9 | BIN=${BIN:-python3} 10 | CONFIG=$1 11 | WORK_DIR=$2 12 | GPUS=$3 13 | 14 | PORT=${PORT:-29500} 15 | SCRIPT=$(dirname $0)/train.py 16 | 17 | if [ ${GPUS} == 1 ]; then 18 | $BIN $SCRIPT $CONFIG --work-dir=${WORK_DIR} 19 | else 20 | $BIN -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT $SCRIPT $CONFIG --work-dir=${WORK_DIR} --launcher pytorch 21 | fi 22 | --------------------------------------------------------------------------------