├── tests ├── __init__.py ├── unit │ ├── __init__.py │ ├── formats │ │ ├── __init__.py │ │ ├── test_labelbox.py │ │ ├── test_kitti.py │ │ ├── test_pascalvoc.py │ │ └── semantic_segmentation │ │ │ └── test_pascalvoc.py │ ├── test_bounding_box.py │ ├── model │ │ ├── test_object_detection.py │ │ └── test_binary_mask_segmentation.py │ └── test_utils.py ├── integration │ ├── __init__.py │ ├── object_detection │ │ ├── __init__.py │ │ ├── test_object_detection.py │ │ ├── test_to_yolov8.py │ │ └── test_inverse.py │ ├── instance_segmentation │ │ ├── __init__.py │ │ ├── test_instance_segmentation.py │ │ ├── test_inverse.py │ │ └── test_to_yolov8.py │ ├── integration_utils.py │ └── test_maskpair_cli.py ├── fixtures │ ├── image_file_loading │ │ └── 0001.png │ ├── instance_segmentation │ │ └── YOLOv8 │ │ │ ├── images │ │ │ ├── 000000036086.jpg │ │ │ └── 000000109005.jpg │ │ │ ├── dataset.yaml │ │ │ └── labels │ │ │ ├── 000000109005.txt │ │ │ └── 000000036086.txt │ ├── object_detection │ │ ├── COCO │ │ │ └── data │ │ │ │ ├── a-weird Filename.asdf.jpg │ │ │ │ └── aNother-weird__ filename.with.many.characters.jpg │ │ ├── lightly │ │ │ ├── images │ │ │ │ ├── a-weird Filename.asdf.jpg │ │ │ │ └── aNother-weird__ filename.with.many.characters.jpg │ │ │ └── detection-task-name │ │ │ │ ├── a-weird Filename.asdf.json │ │ │ │ ├── aNother-weird__ filename.with.many.characters.json │ │ │ │ └── schema.json │ │ ├── KITTI │ │ │ ├── images │ │ │ │ └── a-difficult subfolder │ │ │ │ │ ├── a-weird Filename.asdf.jpg │ │ │ │ │ └── aNother-weird__ filename.with.many.characters.jpg │ │ │ └── labels │ │ │ │ ├── a-weird Filename.asdf.txt │ │ │ │ └── aNother-weird__ filename.with.many.characters.txt │ │ ├── YOLOv8 │ │ │ ├── images │ │ │ │ └── a-difficult subfolder │ │ │ │ │ ├── a-weird Filename.asdf.jpg │ │ │ │ │ └── aNother-weird__ filename.with.many.characters.jpg │ │ │ ├── labels │ │ │ │ └── a-difficult subfolder │ │ │ │ │ ├── a-weird Filename.asdf.txt │ │ │ │ │ └── aNother-weird__ filename.with.many.characters.txt │ │ │ └── example.yaml │ │ ├── PascalVOC │ │ │ ├── a-weird Filename.asdf.xml │ │ │ └── aNother-weird__ filename.with.many.characters.xml │ │ └── Labelbox │ │ │ └── export-result.ndjson │ └── semantic_segmentation │ │ └── pascalvoc │ │ ├── JPEGImages │ │ ├── 2007_000032.jpg │ │ └── subdir │ │ │ └── 2007_000033.jpg │ │ ├── SegmentationClass │ │ ├── 2007_000032.png │ │ └── subdir │ │ │ └── 2007_000033.png │ │ └── class_id_to_name.json ├── simple_object_detection_label_input.py └── simple_instance_segmentation_label_input.py ├── .python-version ├── docs ├── CNAME ├── blog │ └── index.md ├── assets │ ├── labelformat_banner.png │ └── js │ │ └── analytics.js ├── formats │ ├── index.md │ └── object-detection │ │ ├── index.md │ │ ├── lightly.md │ │ ├── yolov26.md │ │ ├── kitti.md │ │ ├── pascalvoc.md │ │ ├── labelbox.md │ │ ├── coco.md │ │ ├── labelformat.md │ │ └── rtdetr.md ├── installation.md ├── about-us.md ├── index.md ├── features.md ├── quick-start.md ├── tutorials │ └── converting-coco-to-yolov8.md └── usage.md ├── src └── labelformat │ ├── __init__.py │ ├── py.typed │ ├── model │ ├── __init__.py │ ├── category.py │ ├── image.py │ ├── multipolygon.py │ ├── semantic_segmentation.py │ ├── instance_segmentation.py │ ├── object_detection.py │ ├── bounding_box.py │ └── binary_mask_segmentation.py │ ├── errors.py │ ├── types.py │ ├── formats │ ├── rtdetr.py │ ├── rtdetrv2.py │ ├── yolov5.py │ ├── yolov6.py │ ├── yolov7.py │ ├── yolov9.py │ ├── yolov10.py │ ├── yolov11.py │ ├── yolov12.py │ ├── yolov26.py │ ├── labelformat.py │ ├── __init__.py │ ├── kitti.py │ ├── semantic_segmentation │ │ └── pascalvoc.py │ └── lightly.py │ ├── cli │ ├── registry.py │ └── cli.py │ └── utils.py ├── Makefile ├── .github ├── workflows │ └── run-tests.yml └── copilot-instructions.md ├── LICENSE ├── pyproject.toml ├── mkdocs.yml └── .gitignore /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.7.16 2 | -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | labelformat.com -------------------------------------------------------------------------------- /src/labelformat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/labelformat/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/labelformat/model/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit/formats/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/blog/index.md: -------------------------------------------------------------------------------- 1 | # Blog 2 | 3 | -------------------------------------------------------------------------------- /tests/integration/object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/integration/instance_segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/assets/labelformat_banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/docs/assets/labelformat_banner.png -------------------------------------------------------------------------------- /tests/fixtures/image_file_loading/0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/image_file_loading/0001.png -------------------------------------------------------------------------------- /src/labelformat/errors.py: -------------------------------------------------------------------------------- 1 | class LabelWithoutImageError(Exception): 2 | """Raised when a label is found without a corresponding image.""" 3 | 4 | pass 5 | -------------------------------------------------------------------------------- /src/labelformat/types.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | JsonDict = Dict[str, Any] # type: ignore[misc] 4 | 5 | 6 | class ParseError(Exception): 7 | pass 8 | -------------------------------------------------------------------------------- /tests/fixtures/instance_segmentation/YOLOv8/images/000000036086.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/instance_segmentation/YOLOv8/images/000000036086.jpg -------------------------------------------------------------------------------- /tests/fixtures/instance_segmentation/YOLOv8/images/000000109005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/instance_segmentation/YOLOv8/images/000000109005.jpg -------------------------------------------------------------------------------- /tests/fixtures/object_detection/COCO/data/a-weird Filename.asdf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/object_detection/COCO/data/a-weird Filename.asdf.jpg -------------------------------------------------------------------------------- /src/labelformat/model/category.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional 3 | 4 | 5 | @dataclass(frozen=True) 6 | class Category: 7 | id: int 8 | name: str 9 | -------------------------------------------------------------------------------- /tests/fixtures/object_detection/lightly/images/a-weird Filename.asdf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/object_detection/lightly/images/a-weird Filename.asdf.jpg -------------------------------------------------------------------------------- /tests/fixtures/semantic_segmentation/pascalvoc/JPEGImages/2007_000032.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/semantic_segmentation/pascalvoc/JPEGImages/2007_000032.jpg -------------------------------------------------------------------------------- /src/labelformat/model/image.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass(frozen=True) 5 | class Image: 6 | id: int 7 | filename: str 8 | width: int 9 | height: int 10 | -------------------------------------------------------------------------------- /tests/fixtures/semantic_segmentation/pascalvoc/JPEGImages/subdir/2007_000033.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/semantic_segmentation/pascalvoc/JPEGImages/subdir/2007_000033.jpg -------------------------------------------------------------------------------- /tests/fixtures/semantic_segmentation/pascalvoc/SegmentationClass/2007_000032.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/semantic_segmentation/pascalvoc/SegmentationClass/2007_000032.png -------------------------------------------------------------------------------- /docs/assets/js/analytics.js: -------------------------------------------------------------------------------- 1 | if (window.location.hostname !== 'labelformat.com') { 2 | console.log("Google Analytics disabled on localhost or non-production environments"); 3 | window['ga-disable-G-K4PH64C9BM'] = true; 4 | } -------------------------------------------------------------------------------- /tests/fixtures/semantic_segmentation/pascalvoc/SegmentationClass/subdir/2007_000033.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/semantic_segmentation/pascalvoc/SegmentationClass/subdir/2007_000033.png -------------------------------------------------------------------------------- /tests/fixtures/object_detection/COCO/data/aNother-weird__ filename.with.many.characters.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/object_detection/COCO/data/aNother-weird__ filename.with.many.characters.jpg -------------------------------------------------------------------------------- /tests/fixtures/object_detection/KITTI/images/a-difficult subfolder/a-weird Filename.asdf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/object_detection/KITTI/images/a-difficult subfolder/a-weird Filename.asdf.jpg -------------------------------------------------------------------------------- /tests/fixtures/instance_segmentation/YOLOv8/dataset.yaml: -------------------------------------------------------------------------------- 1 | path: . # dataset root dir 2 | train: images 3 | 4 | # Classes 5 | nc: 4 # number of classes 6 | names: 7 | 0: kite 8 | 1: person 9 | 2: elephant 10 | 3: sports ball 11 | -------------------------------------------------------------------------------- /tests/fixtures/object_detection/YOLOv8/images/a-difficult subfolder/a-weird Filename.asdf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/object_detection/YOLOv8/images/a-difficult subfolder/a-weird Filename.asdf.jpg -------------------------------------------------------------------------------- /tests/fixtures/object_detection/lightly/images/aNother-weird__ filename.with.many.characters.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/object_detection/lightly/images/aNother-weird__ filename.with.many.characters.jpg -------------------------------------------------------------------------------- /tests/fixtures/object_detection/KITTI/images/a-difficult subfolder/aNother-weird__ filename.with.many.characters.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/object_detection/KITTI/images/a-difficult subfolder/aNother-weird__ filename.with.many.characters.jpg -------------------------------------------------------------------------------- /tests/fixtures/object_detection/YOLOv8/images/a-difficult subfolder/aNother-weird__ filename.with.many.characters.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lightly-ai/labelformat/HEAD/tests/fixtures/object_detection/YOLOv8/images/a-difficult subfolder/aNother-weird__ filename.with.many.characters.jpg -------------------------------------------------------------------------------- /tests/fixtures/object_detection/YOLOv8/labels/a-difficult subfolder/a-weird Filename.asdf.txt: -------------------------------------------------------------------------------- 1 | 16 0.861211 0.73232 0.0357969 0.0442067 2 | 0 0.817984 0.691791 0.0331875 0.0786779 3 | 0 0.929648 0.6725 0.0284219 0.0548558 4 | 0 0.859836 0.609724 0.00710937 0.0194952 5 | 13 0.615359 0.657127 0.0538438 0.0570913 6 | 13 0.732055 0.705204 0.0629531 0.0557452 7 | 1 0.121414 0.493966 0.00982813 0.0367308 8 | 0 0.951336 0.666106 0.0143594 0.0271635 9 | -------------------------------------------------------------------------------- /tests/fixtures/object_detection/YOLOv8/labels/a-difficult subfolder/aNother-weird__ filename.with.many.characters.txt: -------------------------------------------------------------------------------- 1 | 31 0.642445 0.497757 0.241953 0.847196 2 | 31 0.558789 0.463306 0.228891 0.845631 3 | 31 0.490047 0.457991 0.148594 0.687944 4 | 31 0.446477 0.466554 0.134797 0.684556 5 | 31 0.39518 0.444825 0.129297 0.670257 6 | 0 0.971078 0.47597 0.0195 0.038715 7 | 1 0.924641 0.475257 0.0183437 0.0418692 8 | 31 0.493016 0.492839 0.147281 0.689743 9 | -------------------------------------------------------------------------------- /src/labelformat/formats/rtdetr.py: -------------------------------------------------------------------------------- 1 | from labelformat.cli.registry import Task, cli_register 2 | 3 | from .coco import COCOObjectDetectionInput, COCOObjectDetectionOutput 4 | 5 | """ 6 | RT-DETR format follows the same specs as COCO. 7 | """ 8 | 9 | 10 | @cli_register(format="rtdetr", task=Task.OBJECT_DETECTION) 11 | class RTDETRObjectDetectionInput(COCOObjectDetectionInput): 12 | pass 13 | 14 | 15 | @cli_register(format="rtdetr", task=Task.OBJECT_DETECTION) 16 | class RTDETRObjectDetectionOutput(COCOObjectDetectionOutput): 17 | pass 18 | -------------------------------------------------------------------------------- /src/labelformat/formats/rtdetrv2.py: -------------------------------------------------------------------------------- 1 | from labelformat.cli.registry import Task, cli_register 2 | 3 | from .coco import COCOObjectDetectionInput, COCOObjectDetectionOutput 4 | 5 | """ 6 | RT-DETRv2 format follows the same specs as COCO. 7 | """ 8 | 9 | 10 | @cli_register(format="rtdetrv2", task=Task.OBJECT_DETECTION) 11 | class RTDETRv2ObjectDetectionInput(COCOObjectDetectionInput): 12 | pass 13 | 14 | 15 | @cli_register(format="rtdetrv2", task=Task.OBJECT_DETECTION) 16 | class RTDETRv2ObjectDetectionOutput(COCOObjectDetectionOutput): 17 | pass 18 | -------------------------------------------------------------------------------- /src/labelformat/formats/yolov5.py: -------------------------------------------------------------------------------- 1 | from labelformat.cli.registry import Task, cli_register 2 | 3 | from .yolov8 import YOLOv8ObjectDetectionInput, YOLOv8ObjectDetectionOutput 4 | 5 | """ 6 | YOLOv5 format follows the same specs as YOLOv8. 7 | """ 8 | 9 | 10 | @cli_register(format="yolov5", task=Task.OBJECT_DETECTION) 11 | class YOLOv5ObjectDetectionInput(YOLOv8ObjectDetectionInput): 12 | pass 13 | 14 | 15 | @cli_register(format="yolov5", task=Task.OBJECT_DETECTION) 16 | class YOLOv5ObjectDetectionOutput(YOLOv8ObjectDetectionOutput): 17 | pass 18 | -------------------------------------------------------------------------------- /src/labelformat/formats/yolov6.py: -------------------------------------------------------------------------------- 1 | from labelformat.cli.registry import Task, cli_register 2 | 3 | from .yolov8 import YOLOv8ObjectDetectionInput, YOLOv8ObjectDetectionOutput 4 | 5 | """ 6 | YOLOv6 format follows the same specs as YOLOv8. 7 | """ 8 | 9 | 10 | @cli_register(format="yolov6", task=Task.OBJECT_DETECTION) 11 | class YOLOv6ObjectDetectionInput(YOLOv8ObjectDetectionInput): 12 | pass 13 | 14 | 15 | @cli_register(format="yolov6", task=Task.OBJECT_DETECTION) 16 | class YOLOv6ObjectDetectionOutput(YOLOv8ObjectDetectionOutput): 17 | pass 18 | -------------------------------------------------------------------------------- /src/labelformat/formats/yolov7.py: -------------------------------------------------------------------------------- 1 | from labelformat.cli.registry import Task, cli_register 2 | 3 | from .yolov8 import YOLOv8ObjectDetectionInput, YOLOv8ObjectDetectionOutput 4 | 5 | """ 6 | YOLOv7 format follows the same specs as YOLOv8. 7 | """ 8 | 9 | 10 | @cli_register(format="yolov7", task=Task.OBJECT_DETECTION) 11 | class YOLOv7ObjectDetectionInput(YOLOv8ObjectDetectionInput): 12 | pass 13 | 14 | 15 | @cli_register(format="yolov7", task=Task.OBJECT_DETECTION) 16 | class YOLOv7ObjectDetectionOutput(YOLOv8ObjectDetectionOutput): 17 | pass 18 | -------------------------------------------------------------------------------- /src/labelformat/formats/yolov9.py: -------------------------------------------------------------------------------- 1 | from labelformat.cli.registry import Task, cli_register 2 | 3 | from .yolov8 import YOLOv8ObjectDetectionInput, YOLOv8ObjectDetectionOutput 4 | 5 | """ 6 | YOLOv9 format follows the same specs as YOLOv8. 7 | """ 8 | 9 | 10 | @cli_register(format="yolov9", task=Task.OBJECT_DETECTION) 11 | class YOLOv9ObjectDetectionInput(YOLOv8ObjectDetectionInput): 12 | pass 13 | 14 | 15 | @cli_register(format="yolov9", task=Task.OBJECT_DETECTION) 16 | class YOLOv9ObjectDetectionOutput(YOLOv8ObjectDetectionOutput): 17 | pass 18 | -------------------------------------------------------------------------------- /src/labelformat/formats/yolov10.py: -------------------------------------------------------------------------------- 1 | from labelformat.cli.registry import Task, cli_register 2 | 3 | from .yolov8 import YOLOv8ObjectDetectionInput, YOLOv8ObjectDetectionOutput 4 | 5 | """ 6 | YOLOv10 format follows the same specs as YOLOv8. 7 | """ 8 | 9 | 10 | @cli_register(format="yolov10", task=Task.OBJECT_DETECTION) 11 | class YOLOv10ObjectDetectionInput(YOLOv8ObjectDetectionInput): 12 | pass 13 | 14 | 15 | @cli_register(format="yolov10", task=Task.OBJECT_DETECTION) 16 | class YOLOv10ObjectDetectionOutput(YOLOv8ObjectDetectionOutput): 17 | pass 18 | -------------------------------------------------------------------------------- /src/labelformat/formats/yolov11.py: -------------------------------------------------------------------------------- 1 | from labelformat.cli.registry import Task, cli_register 2 | 3 | from .yolov8 import YOLOv8ObjectDetectionInput, YOLOv8ObjectDetectionOutput 4 | 5 | """ 6 | YOLOv11 format follows the same specs as YOLOv8. 7 | """ 8 | 9 | 10 | @cli_register(format="yolov11", task=Task.OBJECT_DETECTION) 11 | class YOLOv11ObjectDetectionInput(YOLOv8ObjectDetectionInput): 12 | pass 13 | 14 | 15 | @cli_register(format="yolov11", task=Task.OBJECT_DETECTION) 16 | class YOLOv11ObjectDetectionOutput(YOLOv8ObjectDetectionOutput): 17 | pass 18 | -------------------------------------------------------------------------------- /src/labelformat/formats/yolov12.py: -------------------------------------------------------------------------------- 1 | from labelformat.cli.registry import Task, cli_register 2 | 3 | from .yolov8 import YOLOv8ObjectDetectionInput, YOLOv8ObjectDetectionOutput 4 | 5 | """ 6 | YOLOv12 format follows the same specs as YOLOv8. 7 | """ 8 | 9 | 10 | @cli_register(format="yolov12", task=Task.OBJECT_DETECTION) 11 | class YOLOv12ObjectDetectionInput(YOLOv8ObjectDetectionInput): 12 | pass 13 | 14 | 15 | @cli_register(format="yolov12", task=Task.OBJECT_DETECTION) 16 | class YOLOv12ObjectDetectionOutput(YOLOv8ObjectDetectionOutput): 17 | pass 18 | -------------------------------------------------------------------------------- /src/labelformat/formats/yolov26.py: -------------------------------------------------------------------------------- 1 | from labelformat.cli.registry import Task, cli_register 2 | 3 | from .yolov8 import YOLOv8ObjectDetectionInput, YOLOv8ObjectDetectionOutput 4 | 5 | """ 6 | YOLOv26 format follows the same specs as YOLOv11. 7 | """ 8 | 9 | 10 | @cli_register(format="yolov26", task=Task.OBJECT_DETECTION) 11 | class YOLOv26ObjectDetectionInput(YOLOv8ObjectDetectionInput): 12 | pass 13 | 14 | 15 | @cli_register(format="yolov26", task=Task.OBJECT_DETECTION) 16 | class YOLOv26ObjectDetectionOutput(YOLOv8ObjectDetectionOutput): 17 | pass 18 | -------------------------------------------------------------------------------- /tests/fixtures/semantic_segmentation/pascalvoc/class_id_to_name.json: -------------------------------------------------------------------------------- 1 | { 2 | "0": "background", 3 | "1": "aeroplane", 4 | "2": "bicycle", 5 | "3": "bird", 6 | "4": "boat", 7 | "5": "bottle", 8 | "6": "bus", 9 | "7": "car", 10 | "8": "cat", 11 | "9": "chair", 12 | "10": "cow", 13 | "11": "diningtable", 14 | "12": "dog", 15 | "13": "horse", 16 | "14": "motorbike", 17 | "15": "person", 18 | "16": "pottedplant", 19 | "17": "sheep", 20 | "18": "sofa", 21 | "19": "train", 22 | "20": "tvmonitor", 23 | "255": "void" 24 | } -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | poetry-check: 2 | @echo "🔒 Verifying that poetry.lock is consistent with pyproject.toml..." 3 | poetry lock --check 4 | 5 | format: 6 | isort . 7 | black . 8 | 9 | format-check: 10 | @echo "⚫ Checking code format..." 11 | isort --check-only --diff . 12 | black --check . 13 | 14 | type-check: 15 | @echo "👮 Running type checker" 16 | mypy . 17 | 18 | static-checks: poetry-check format-check type-check 19 | 20 | test: 21 | @echo "🏃 Running tests..." 22 | pytest . 23 | 24 | all-checks: static-checks test 25 | @echo "✅ Great success!" 26 | 27 | clean: 28 | rm -rf dist 29 | 30 | build: 31 | poetry build 32 | -------------------------------------------------------------------------------- /tests/fixtures/object_detection/KITTI/labels/a-weird Filename.asdf.txt: -------------------------------------------------------------------------------- 1 | dog -1 -1 -10 540.0 295.0 563.0 313.0 -1 -1 -1 -1000 -1000 -1000 -10 2 | person -1 -1 -10 513.0 271.0 534.0 304.0 -1 -1 -1 -1000 -1000 -1000 -10 3 | person -1 -1 -10 586.0 268.0 604.0 291.0 -1 -1 -1 -1000 -1000 -1000 -10 4 | person -1 -1 -10 548.0 250.0 553.0 258.0 -1 -1 -1 -1000 -1000 -1000 -10 5 | bench -1 -1 -10 377.0 261.0 411.0 285.0 -1 -1 -1 -1000 -1000 -1000 -10 6 | bench -1 -1 -10 448.0 282.0 488.0 305.0 -1 -1 -1 -1000 -1000 -1000 -10 7 | bicycle with space -1 -1 -10 75.0 198.0 81.0 213.0 -1 -1 -1 -1000 -1000 -1000 -10 8 | person -1 -1 -10 604.0 271.0 613.0 282.0 -1 -1 -1 -1000 -1000 -1000 -10 9 | -------------------------------------------------------------------------------- /tests/fixtures/object_detection/KITTI/labels/aNother-weird__ filename.with.many.characters.txt: -------------------------------------------------------------------------------- 1 | snowboard -1 -1 -10 334.0 32.0 489.0 395.0 -1 -1 -1 -1000 -1000 -1000 -10 2 | snowboard -1 -1 -10 284.0 17.0 430.0 379.0 -1 -1 -1 -1000 -1000 -1000 -10 3 | snowboard -1 -1 -10 266.0 49.0 361.0 343.0 -1 -1 -1 -1000 -1000 -1000 -10 4 | snowboard -1 -1 -10 243.0 53.0 329.0 346.0 -1 -1 -1 -1000 -1000 -1000 -10 5 | snowboard -1 -1 -10 212.0 47.0 295.0 334.0 -1 -1 -1 -1000 -1000 -1000 -10 6 | person -1 -1 -10 615.0 195.0 627.0 212.0 -1 -1 -1 -1000 -1000 -1000 -10 7 | bicycle with space -1 -1 -10 586.0 194.0 598.0 212.0 -1 -1 -1 -1000 -1000 -1000 -10 8 | snowboard -1 -1 -10 268.0 63.0 362.0 358.0 -1 -1 -1 -1000 -1000 -1000 -10 9 | -------------------------------------------------------------------------------- /tests/fixtures/object_detection/YOLOv8/example.yaml: -------------------------------------------------------------------------------- 1 | path: . # dataset root dir 2 | train: images/a-difficult subfolder 3 | nc: 32 # number of classes 4 | 5 | # Classes (we follow the coco examples) 6 | names: 7 | 0: person 8 | 1: bicycle with space 9 | 2: car 10 | 3: motorcycle 11 | 4: airplane 12 | 5: bus 13 | 6: train 14 | 7: truck 15 | 8: boat 16 | 9: traffic light 17 | 10: fire hydrant 18 | 11: stop sign 19 | 12: parking meter 20 | 13: bench 21 | 14: bird 22 | 15: cat 23 | 16: dog 24 | 17: horse 25 | 18: sheep 26 | 19: cow 27 | 20: elephant 28 | 21: bear 29 | 22: zebra 30 | 23: giraffe 31 | 24: backpack 32 | 25: umbrella 33 | 26: handbag 34 | 27: tie 35 | 28: suitcase 36 | 29: frisbee 37 | 30: skis 38 | 31: snowboard -------------------------------------------------------------------------------- /docs/formats/index.md: -------------------------------------------------------------------------------- 1 | # Supported Formats 2 | 3 | ## Object Detection 4 | - [COCO](./object-detection/coco.md) 5 | - [KITTI](./object-detection/kitti.md) 6 | - [Labelbox](./object-detection/labelbox.md) 7 | - [Lightly](./object-detection/lightly.md) 8 | - [PascalVOC](./object-detection/pascalvoc.md) 9 | - [RT-DETR](./object-detection/rtdetr.md) 10 | - [RT-DETRv2](./object-detection/rtdetrv2.md) 11 | - [YOLOv5](./object-detection/yolov5.md) 12 | - [YOLOv6](./object-detection/yolov6.md) 13 | - [YOLOv7](./object-detection/yolov7.md) 14 | - [YOLOv8](./object-detection/yolov8.md) 15 | - [YOLOv9](./object-detection/yolov9.md) 16 | - [YOLOv10](./object-detection/yolov10.md) 17 | - [YOLOv11](./object-detection/yolov11.md) 18 | - [YOLOv12](./object-detection/yolov12.md) 19 | - [YOLOv26](./object-detection/yolov26.md) 20 | -------------------------------------------------------------------------------- /.github/workflows/run-tests.yml: -------------------------------------------------------------------------------- 1 | name: Run Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | test: 13 | name: Tests 14 | runs-on: ubuntu-22.04 15 | strategy: 16 | matrix: 17 | python: ["3.8", "3.10"] 18 | 19 | steps: 20 | - name: Checkout code 21 | uses: actions/checkout@v3 22 | 23 | - name: Set up Python 24 | uses: actions/setup-python@v4 25 | with: 26 | python-version: ${{ matrix.python }} 27 | 28 | - name: Install Poetry 29 | uses: snok/install-poetry@v1 30 | with: 31 | version: 1.4.2 32 | 33 | - name: Install the package and dependencies 34 | run: | 35 | poetry install 36 | 37 | - name: Run tests 38 | run: | 39 | poetry run make all-checks 40 | -------------------------------------------------------------------------------- /tests/fixtures/instance_segmentation/YOLOv8/labels/000000109005.txt: -------------------------------------------------------------------------------- 1 | 2 0.521891 0.376939 0.545406 0.373738 0.571047 0.387593 0.587437 0.41743 0.595281 0.462173 0.595984 0.472827 0.602406 0.48028 0.595984 0.481332 0.595281 0.519696 0.595984 0.545257 0.588859 0.575093 0.579609 0.569766 0.586016 0.546332 0.581031 0.505841 0.574609 0.474953 0.568922 0.464299 0.565359 0.474953 0.553234 0.456846 0.550391 0.469626 0.544687 0.462173 0.537563 0.48028 0.536859 0.515444 0.536141 0.542079 0.535422 0.562313 0.528312 0.575093 0.515484 0.519696 0.509781 0.560187 0.498375 0.579369 0.486266 0.579369 0.494109 0.552734 0.494812 0.523949 0.494812 0.510117 0.479141 0.51757 0.471312 0.533551 0.470594 0.57722 0.451359 0.574042 0.457766 0.547407 0.454203 0.529276 0.440672 0.52715 0.429984 0.545257 0.433547 0.585748 0.415016 0.583621 0.415016 0.53993 0.412172 0.473879 0.418578 0.428084 0.432125 0.405701 0.450641 0.390794 0.459906 0.386519 0.470594 0.386519 0.479859 0.391846 0.491969 0.391846 0.503375 0.386519 0.514062 0.383318 0.523313 0.376939 2 | -------------------------------------------------------------------------------- /tests/fixtures/instance_segmentation/YOLOv8/labels/000000036086.txt: -------------------------------------------------------------------------------- 1 | 3 0.248797 0.763484 0.263631 0.755875 0.281017 0.752547 0.296805 0.753984 0.317324 0.758969 0.331224 0.771109 0.339751 0.784656 0.341328 0.800125 0.337531 0.81225 0.329647 0.820578 0.323008 0.827234 0.301535 0.834609 0.281328 0.834844 0.25668 0.830328 0.240892 0.820813 0.230788 0.805594 0.228589 0.798219 0.229523 0.786328 0.23332 0.778719 0.243423 0.76825 2 | 1 0.468465 0.788766 0.504274 0.743812 0.453548 0.552813 0.626598 0.411234 0.617656 0.373031 0.525145 0.359547 0.584834 0.330344 0.626598 0.348312 0.63556 0.260672 0.584834 0.226969 0.605726 0.143813 0.62361 0.128094 0.707158 0.137078 0.710145 0.182016 0.710145 0.235953 0.775788 0.287641 0.877241 0.444937 0.859336 0.471906 0.778776 0.375281 0.754917 0.449437 0.742967 0.521344 0.707158 0.638203 0.737012 0.710109 0.725062 0.840453 0.587822 0.844938 0.575871 0.811234 0.671369 0.79775 0.63556 0.537078 0.590788 0.555063 0.528133 0.573031 0.557967 0.705625 0.593776 0.725844 0.575871 0.761797 0.483382 0.811234 3 | -------------------------------------------------------------------------------- /docs/formats/object-detection/index.md: -------------------------------------------------------------------------------- 1 | # Object Detection Overview 2 | 3 | Object detection is a computer vision task that involves identifying and locating objects within images using rectangular bounding boxes. Each detection includes: 4 | 5 | - A category label (e.g., "car", "person", "dog") 6 | - A bounding box defining the object's location and size 7 | - Optional confidence score indicating detection certainty 8 | 9 | Labelformat supports converting between major object detection annotation formats like COCO, YOLO, and Pascal VOC while preserving the essential bounding box coordinates and category information. 10 | 11 | ## Supported Formats 12 | 13 | - [COCO](./coco.md) 14 | - [KITTI](./kitti.md) 15 | - [Labelbox](./labelbox.md) 16 | - [Lightly](./lightly.md) 17 | - [PascalVOC](./pascalvoc.md) 18 | - [RT-DETR](./rtdetr.md) 19 | - [RT-DETRv2](./rtdetrv2.md) 20 | - [YOLOv5](./yolov5.md) 21 | - [YOLOv6](./yolov6.md) 22 | - [YOLOv7](./yolov7.md) 23 | - [YOLOv8](./yolov8.md) 24 | - [YOLOv9](./yolov9.md) 25 | - [YOLOv10](./yolov10.md) 26 | - [YOLOv11](./yolov11.md) 27 | - [YOLOv12](./yolov12.md) 28 | - [YOLOv26](./yolov26.md) 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Lightly 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/fixtures/object_detection/PascalVOC/a-weird Filename.asdf.xml: -------------------------------------------------------------------------------- 1 | a-weird Filename.asdf.jpg640416dog540.0295.0563.0313.0person513.0271.0534.0304.0person586.0268.0604.0291.0person548.0250.0553.0258.0bench377.0261.0411.0285.0bench448.0282.0488.0305.0bicycle with space75.0198.081.0213.0person604.0271.0613.0282.0 -------------------------------------------------------------------------------- /tests/fixtures/object_detection/PascalVOC/aNother-weird__ filename.with.many.characters.xml: -------------------------------------------------------------------------------- 1 | aNother-weird__ filename.with.many.characters.jpg640428snowboard334.032.0489.0395.0snowboard284.017.0430.0379.0snowboard266.049.0361.0343.0snowboard243.053.0329.0346.0snowboard212.047.0295.0334.0person615.0195.0627.0212.0bicycle with space586.0194.0598.0212.0snowboard268.063.0362.0358.0 -------------------------------------------------------------------------------- /src/labelformat/model/multipolygon.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Tuple 3 | 4 | from labelformat.model.bounding_box import BoundingBox 5 | 6 | Point = Tuple[float, float] 7 | 8 | 9 | @dataclass(frozen=True) 10 | class MultiPolygon: 11 | """MultiPolygon for instance segmentation. 12 | 13 | We assume all bounding box coordinates are in pixel coordinates and are 14 | NOT normalized between 0 and 1. 15 | """ 16 | 17 | polygons: List[List[Point]] 18 | 19 | def bounding_box(self) -> BoundingBox: 20 | """Get the bounding box of this MultiPolygon.""" 21 | if len(self.polygons) == 0: 22 | raise ValueError("Cannot get bounding box of empty MultiPolygon.") 23 | 24 | xmin = self.polygons[0][0][0] 25 | ymin = self.polygons[0][0][1] 26 | xmax = self.polygons[0][0][0] 27 | ymax = self.polygons[0][0][1] 28 | 29 | for polygon in self.polygons: 30 | for point in polygon: 31 | xmin = min(xmin, point[0]) 32 | ymin = min(ymin, point[1]) 33 | xmax = max(xmax, point[0]) 34 | ymax = max(ymax, point[1]) 35 | 36 | return BoundingBox( 37 | xmin=xmin, 38 | ymin=ymin, 39 | xmax=xmax, 40 | ymax=ymax, 41 | ) 42 | -------------------------------------------------------------------------------- /docs/formats/object-detection/lightly.md: -------------------------------------------------------------------------------- 1 | # Lightly Object Detection Format 2 | 3 | ## Overview 4 | The Lightly format is designed for efficient handling of object detection predictions in machine learning workflows. It provides a straightforward structure that's easy to parse and generate. For detailed information about the prediction format, refer to the [Lightly AI documentation](https://docs.lightly.ai/docs/prediction-format#prediction-format). 5 | 6 | ## Specification of Lightly Detection Format 7 | The format uses a JSON file per image containing: 8 | - `file_name`: Name of the image file 9 | - `predictions`: List of object detections 10 | - `category_id`: Integer ID of the object category 11 | - `bbox`: List of [x, y, width, height] in absolute pixel coordinates 12 | - `score`: Optional confidence score (0-1) 13 | 14 | ## File Structure 15 | ``` 16 | dataset/ 17 | ├── images/ 18 | │ ├── image1.jpg 19 | │ └── image2.jpg 20 | └── predictions/ 21 | ├── image1.json 22 | └── image2.json 23 | ``` 24 | 25 | ## Example 26 | ```json 27 | { 28 | "file_name": "image1.jpg", 29 | "predictions": [ 30 | { 31 | "category_id": 0, 32 | "bbox": [100, 200, 50, 30], 33 | "score": 0.95 34 | }, 35 | { 36 | "category_id": 1, 37 | "bbox": [300, 400, 80, 60], 38 | "score": 0.87 39 | } 40 | ] 41 | } 42 | ``` -------------------------------------------------------------------------------- /src/labelformat/formats/labelformat.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from argparse import ArgumentParser 4 | from collections.abc import Iterable 5 | from dataclasses import dataclass 6 | 7 | from labelformat.model.category import Category 8 | from labelformat.model.image import Image 9 | from labelformat.model.instance_segmentation import ( 10 | ImageInstanceSegmentation, 11 | InstanceSegmentationInput, 12 | ) 13 | from labelformat.model.object_detection import ( 14 | ImageObjectDetection, 15 | ObjectDetectionInput, 16 | ) 17 | 18 | 19 | @dataclass 20 | class _CustomBaseInput: 21 | categories: list[Category] 22 | images: list[Image] 23 | 24 | @staticmethod 25 | def add_cli_arguments(parser: ArgumentParser) -> None: 26 | raise ValueError( 27 | "LabelformatObjectDetectionInput does not support CLI arguments" 28 | ) 29 | 30 | def get_categories(self) -> Iterable[Category]: 31 | return self.categories 32 | 33 | def get_images(self) -> Iterable[Image]: 34 | return self.images 35 | 36 | 37 | @dataclass 38 | class LabelformatObjectDetectionInput(_CustomBaseInput, ObjectDetectionInput): 39 | 40 | labels: list[ImageObjectDetection] 41 | 42 | """Class for custom object detection input format. 43 | 44 | It can be used standalone or for conversion to other formats. 45 | 46 | """ 47 | 48 | def get_labels(self) -> Iterable[ImageObjectDetection]: 49 | return self.labels 50 | -------------------------------------------------------------------------------- /src/labelformat/model/semantic_segmentation.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | """Semantic segmentation core types and input interface. 4 | """ 5 | 6 | from abc import ABC, abstractmethod 7 | from collections.abc import Iterable 8 | from dataclasses import dataclass 9 | 10 | import numpy as np 11 | from numpy.typing import NDArray 12 | 13 | from labelformat.model.category import Category 14 | from labelformat.model.image import Image 15 | 16 | 17 | @dataclass 18 | class SemanticSegmentationMask: 19 | """Semantic segmentation mask with integer class IDs. 20 | 21 | The mask is stored as a 2D numpy array of integer class IDs with shape (H, W). 22 | 23 | Args: 24 | array: The 2D numpy array with integer class IDs of shape (H, W). 25 | """ 26 | 27 | array: NDArray[np.int_] 28 | 29 | def __post_init__(self) -> None: 30 | if self.array.ndim != 2: 31 | raise ValueError("SemSegMask.array must be 2D with shape (H, W).") 32 | 33 | 34 | class SemanticSegmentationInput(ABC): 35 | 36 | # TODO(Malte, 11/2025): Add a CLI interface later if needed. 37 | 38 | @abstractmethod 39 | def get_categories(self) -> Iterable[Category]: 40 | raise NotImplementedError() 41 | 42 | @abstractmethod 43 | def get_images(self) -> Iterable[Image]: 44 | raise NotImplementedError() 45 | 46 | @abstractmethod 47 | def get_mask(self, image_filepath: str) -> SemanticSegmentationMask: 48 | raise NotImplementedError() 49 | -------------------------------------------------------------------------------- /tests/fixtures/object_detection/lightly/detection-task-name/a-weird Filename.asdf.json: -------------------------------------------------------------------------------- 1 | { 2 | "file_name": "a-weird Filename.asdf.jpg", 3 | "predictions": [ 4 | { 5 | "category_id": 16, 6 | "bbox": [ 7 | 540, 8 | 295, 9 | 23, 10 | 18 11 | ], 12 | "score": 0 13 | }, 14 | { 15 | "category_id": 0, 16 | "bbox": [ 17 | 513, 18 | 271, 19 | 21, 20 | 33 21 | ], 22 | "score": 0 23 | }, 24 | { 25 | "category_id": 0, 26 | "bbox": [ 27 | 586, 28 | 268, 29 | 18, 30 | 23 31 | ], 32 | "score": 0 33 | }, 34 | { 35 | "category_id": 0, 36 | "bbox": [ 37 | 548, 38 | 250, 39 | 5, 40 | 8 41 | ], 42 | "score": 0 43 | }, 44 | { 45 | "category_id": 13, 46 | "bbox": [ 47 | 377, 48 | 261, 49 | 34, 50 | 24 51 | ], 52 | "score": 0 53 | }, 54 | { 55 | "category_id": 13, 56 | "bbox": [ 57 | 448, 58 | 282, 59 | 40, 60 | 23 61 | ], 62 | "score": 0 63 | }, 64 | { 65 | "category_id": 1, 66 | "bbox": [ 67 | 75, 68 | 198, 69 | 6, 70 | 15 71 | ], 72 | "score": 0 73 | }, 74 | { 75 | "category_id": 0, 76 | "bbox": [ 77 | 604, 78 | 271, 79 | 9, 80 | 11 81 | ], 82 | "score": 0 83 | } 84 | ] 85 | } -------------------------------------------------------------------------------- /tests/unit/test_bounding_box.py: -------------------------------------------------------------------------------- 1 | from labelformat.model.bounding_box import BoundingBox, BoundingBoxFormat 2 | from labelformat.model.category import Category 3 | 4 | 5 | class TestBoundingBox: 6 | def test_bounding_box(self) -> None: 7 | bounding_box_base = BoundingBox( 8 | xmin=10.0, 9 | ymin=20.0, 10 | xmax=30.0, 11 | ymax=40.0, 12 | ) 13 | 14 | bounding_box_yolo = bounding_box_base.from_format( 15 | bbox=[10.0, 20.0, 20.0, 20.0], 16 | format=BoundingBoxFormat.XYWH, 17 | ) 18 | 19 | assert bounding_box_yolo.xmin == bounding_box_base.xmin 20 | assert bounding_box_yolo.ymin == bounding_box_base.ymin 21 | assert bounding_box_yolo.xmax == bounding_box_base.xmax 22 | assert bounding_box_yolo.ymax == bounding_box_base.ymax 23 | 24 | def test_bounding_box_conversions(self) -> None: 25 | bounding_box_base = BoundingBox( 26 | xmin=10.0, 27 | ymin=20.0, 28 | xmax=30.0, 29 | ymax=40.0, 30 | ) 31 | 32 | bounding_box_xywh = bounding_box_base.to_format(BoundingBoxFormat.XYWH) 33 | assert bounding_box_xywh == [10.0, 20.0, 20.0, 20.0] 34 | 35 | bounding_box_xyxy = bounding_box_base.to_format(BoundingBoxFormat.XYXY) 36 | assert bounding_box_xyxy == [10.0, 20.0, 30.0, 40.0] 37 | 38 | bounding_box_cxcywh = bounding_box_base.to_format(BoundingBoxFormat.CXCYWH) 39 | assert bounding_box_cxcywh == [20.0, 30.0, 20.0, 20.0] 40 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | Installing **Labelformat** is straightforward. Follow the steps below to set up Labelformat in your development environment. 4 | 5 | ## Prerequisites 6 | 7 | - **Python 3.8 or higher:** Ensure you have Python installed on Windows, Linux, or macOS. 8 | - **pip:** Python's package installer. It typically comes with Python installations. 9 | 10 | ## Installation using package managers 11 | 12 | Labelformat is available on PyPI and can be installed using various package managers: 13 | 14 | === "pip" 15 | ```bash 16 | pip install labelformat 17 | ``` 18 | 19 | === "Poetry" 20 | ```bash 21 | poetry add labelformat 22 | ``` 23 | 24 | === "Conda" 25 | ```bash 26 | conda install -c conda-forge labelformat 27 | ``` 28 | 29 | === "Rye" 30 | ```bash 31 | rye add labelformat 32 | ``` 33 | 34 | ## Installation from Source 35 | 36 | If you prefer to install Labelformat from the source code, follow these steps: 37 | 38 | 1. Clone the Repository: 39 | ```bash 40 | git clone https://github.com/lightly-ai/labelformat.git 41 | cd labelformat 42 | ``` 43 | 2. Install Dependencies: 44 | Labelformat uses Poetry for dependency management. Ensure you have Poetry installed: 45 | ```bash 46 | pip install poetry 47 | ``` 48 | 3. Set Up the Development Environment: 49 | ```bash 50 | poetry install 51 | ``` 52 | 53 | ## Updating Labelformat 54 | 55 | To update Labelformat to the latest version, run: 56 | ```bash 57 | pip install --upgrade labelformat 58 | ``` 59 | 60 | -------------------------------------------------------------------------------- /tests/fixtures/object_detection/lightly/detection-task-name/aNother-weird__ filename.with.many.characters.json: -------------------------------------------------------------------------------- 1 | { 2 | "file_name": "aNother-weird__ filename.with.many.characters.jpg", 3 | "predictions": [ 4 | { 5 | "category_id": 31, 6 | "bbox": [ 7 | 334, 8 | 32, 9 | 155, 10 | 363 11 | ], 12 | "score": 0 13 | }, 14 | { 15 | "category_id": 31, 16 | "bbox": [ 17 | 284, 18 | 17, 19 | 146, 20 | 362 21 | ], 22 | "score": 0 23 | }, 24 | { 25 | "category_id": 31, 26 | "bbox": [ 27 | 266, 28 | 49, 29 | 95, 30 | 294 31 | ], 32 | "score": 0 33 | }, 34 | { 35 | "category_id": 31, 36 | "bbox": [ 37 | 243, 38 | 53, 39 | 86, 40 | 293 41 | ], 42 | "score": 0 43 | }, 44 | { 45 | "category_id": 31, 46 | "bbox": [ 47 | 212, 48 | 47, 49 | 83, 50 | 287 51 | ], 52 | "score": 0 53 | }, 54 | { 55 | "category_id": 0, 56 | "bbox": [ 57 | 615, 58 | 195, 59 | 12, 60 | 17 61 | ], 62 | "score": 0 63 | }, 64 | { 65 | "category_id": 1, 66 | "bbox": [ 67 | 586, 68 | 194, 69 | 12, 70 | 18 71 | ], 72 | "score": 0 73 | }, 74 | { 75 | "category_id": 31, 76 | "bbox": [ 77 | 268, 78 | 63, 79 | 94, 80 | 295 81 | ], 82 | "score": 0 83 | } 84 | ] 85 | } -------------------------------------------------------------------------------- /docs/about-us.md: -------------------------------------------------------------------------------- 1 | # About Us 2 | 3 | **Labelformat** is maintained by [Lightly](https://www.lightly.ai), a spin-off from ETH Zurich dedicated to building efficient active learning pipelines for machine learning models. Our mission is to empower data scientists and engineers with tools that streamline data processing and model training workflows. 4 | 5 | ## Our Mission 6 | 7 | At Lightly, we aim to simplify the complexities of active learning and data management, enabling teams to focus on developing cutting-edge machine learning models without getting bogged down by data preparation challenges. 8 | 9 | ## What We Offer 10 | 11 | - **Active Learning Pipelines:** Intelligent data selection to enhance model performance with minimal data. 12 | - **Efficient Data Management:** Tools and services that optimize data workflows for scalability and efficiency. 13 | - **Expert Support:** Dedicated support to help you integrate our solutions seamlessly into your projects. 14 | 15 | ## Learn More 16 | 17 | - [Homepage](https://www.lightly.ai) 18 | - [Web-App](https://app.lightly.ai) 19 | - [Lightly Solution Documentation](https://docs.lightly.ai/) 20 | - [Contact Us](https://www.lightly.ai/contact) 21 | 22 | ## Connect with Us 23 | 24 | Stay updated with the latest developments, tips, and tutorials by following us: 25 | 26 | - [GitHub](https://github.com/lightly-ai) 27 | - [Twitter](https://x.com/LightlyAI) 28 | 29 | --- 30 | 31 | **Labelformat** is part of Lightly's commitment to fostering an open-source ecosystem that benefits the global machine learning community. Join us in making data management and label conversion effortless! -------------------------------------------------------------------------------- /tests/unit/model/test_object_detection.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import numpy as np 4 | import pytest 5 | 6 | from labelformat.model.binary_mask_segmentation import ( 7 | BinaryMaskSegmentation, 8 | RLEDecoderEncoder, 9 | ) 10 | from labelformat.model.bounding_box import BoundingBox 11 | from labelformat.model.category import Category 12 | from labelformat.model.object_detection import SingleObjectDetection 13 | 14 | 15 | class TestSingleObjectDetection: 16 | @pytest.mark.parametrize( 17 | "confidence", 18 | [ 19 | None, 20 | 0.0, 21 | 1.0, 22 | ], 23 | ) 24 | def test_confidence_valid(self, confidence: float | None) -> None: 25 | detection = SingleObjectDetection( 26 | category=Category(id=0, name="cat"), 27 | box=BoundingBox(xmin=0, ymin=0, xmax=1, ymax=1), 28 | confidence=confidence, 29 | ) 30 | assert detection.confidence == confidence 31 | 32 | @pytest.mark.parametrize( 33 | "confidence, expected_error", 34 | [ 35 | (-0.1, "Confidence must be between 0 and 1, but got: -0.1"), 36 | (1.1, "Confidence must be between 0 and 1, but got: 1.1"), 37 | ], 38 | ) 39 | def test_confidence_out_of_bounds( 40 | self, confidence: float, expected_error: str 41 | ) -> None: 42 | with pytest.raises(ValueError, match=expected_error): 43 | SingleObjectDetection( 44 | category=Category(id=0, name="cat"), 45 | box=BoundingBox(xmin=0, ymin=0, xmax=1, ymax=1), 46 | confidence=confidence, 47 | ) 48 | -------------------------------------------------------------------------------- /src/labelformat/model/instance_segmentation.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from abc import ABC, abstractmethod 4 | from argparse import ArgumentParser 5 | from dataclasses import dataclass 6 | from typing import Iterable 7 | 8 | from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation 9 | from labelformat.model.category import Category 10 | from labelformat.model.image import Image 11 | from labelformat.model.multipolygon import MultiPolygon 12 | 13 | 14 | @dataclass(frozen=True) 15 | class SingleInstanceSegmentation: 16 | category: Category 17 | segmentation: MultiPolygon | BinaryMaskSegmentation 18 | 19 | 20 | @dataclass(frozen=True) 21 | class ImageInstanceSegmentation: 22 | image: Image 23 | objects: list[SingleInstanceSegmentation] 24 | 25 | 26 | class InstanceSegmentationInput(ABC): 27 | @staticmethod 28 | @abstractmethod 29 | def add_cli_arguments(parser: ArgumentParser) -> None: 30 | raise NotImplementedError() 31 | 32 | @abstractmethod 33 | def get_categories(self) -> Iterable[Category]: 34 | raise NotImplementedError() 35 | 36 | @abstractmethod 37 | def get_images(self) -> Iterable[Image]: 38 | raise NotImplementedError() 39 | 40 | @abstractmethod 41 | def get_labels(self) -> Iterable[ImageInstanceSegmentation]: 42 | raise NotImplementedError() 43 | 44 | 45 | class InstanceSegmentationOutput(ABC): 46 | @staticmethod 47 | @abstractmethod 48 | def add_cli_arguments(parser: ArgumentParser) -> None: 49 | raise NotImplementedError() 50 | 51 | def save(self, label_input: InstanceSegmentationInput) -> None: 52 | raise NotImplementedError() 53 | -------------------------------------------------------------------------------- /src/labelformat/cli/registry.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from enum import Enum 3 | from typing import Callable, Dict, Type 4 | 5 | from labelformat.model.instance_segmentation import ( 6 | InstanceSegmentationInput, 7 | InstanceSegmentationOutput, 8 | ) 9 | from labelformat.model.object_detection import ( 10 | ObjectDetectionInput, 11 | ObjectDetectionOutput, 12 | ) 13 | 14 | 15 | class Task(Enum): 16 | INSTANCE_SEGMENTATION = "instance-segmentation" 17 | OBJECT_DETECTION = "object-detection" 18 | 19 | 20 | @dataclass 21 | class Registry: # type: ignore[misc] 22 | input: Dict[Task, Dict[str, Type]] # type: ignore[type-arg] 23 | output: Dict[Task, Dict[str, Type]] # type: ignore[type-arg] 24 | 25 | 26 | _REGISTRY = Registry( 27 | input={task: {} for task in Task}, output={task: {} for task in Task} 28 | ) 29 | 30 | 31 | def cli_register(format: str, task: Task) -> Callable[[Type], Type]: # type: ignore[type-arg] 32 | def decorator(cls: Type) -> Type: # type: ignore[type-arg] 33 | if issubclass(cls, ObjectDetectionInput) or issubclass( 34 | cls, InstanceSegmentationInput 35 | ): 36 | _REGISTRY.input[task][format] = cls 37 | elif issubclass(cls, ObjectDetectionOutput) or issubclass( 38 | cls, InstanceSegmentationOutput 39 | ): 40 | _REGISTRY.output[task][format] = cls 41 | else: 42 | raise ValueError( 43 | "Can only register classes which extend one of: " 44 | f"'{ObjectDetectionInput}', " 45 | f"'{InstanceSegmentationInput}', " 46 | f"'{ObjectDetectionOutput}', " 47 | f"'{InstanceSegmentationOutput}'" 48 | ) 49 | return cls 50 | 51 | return decorator 52 | -------------------------------------------------------------------------------- /tests/unit/formats/test_labelbox.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from labelformat.formats import labelbox 4 | from labelformat.formats.labelbox import FilenameKeyOption 5 | from labelformat.types import ParseError 6 | 7 | 8 | def test_has_illegal_char() -> None: 9 | assert labelbox._has_illegal_char("filename/with/slash") 10 | assert labelbox._has_illegal_char("filename\\with\\backslash") 11 | assert labelbox._has_illegal_char("filename:with:colon") 12 | assert not labelbox._has_illegal_char("valid_filename") 13 | 14 | 15 | def test_image_from_data_row__valid() -> None: 16 | data_row = { 17 | "data_row": {"global_key": "image123", "id": "123"}, 18 | "media_attributes": {"width": 800, "height": 600}, 19 | } 20 | image = labelbox._image_from_data_row( 21 | image_id=1, data_row=data_row, filename_key=FilenameKeyOption.GLOBAL_KEY 22 | ) 23 | assert image.id == 1 24 | assert image.filename == "image123" 25 | assert image.width == 800 26 | assert image.height == 600 27 | 28 | 29 | def test_image_from_data_row__illegal_char() -> None: 30 | data_row = { 31 | "data_row": {"global_key": "image/123", "id": "123"}, 32 | "media_attributes": {"width": 800, "height": 600}, 33 | } 34 | with pytest.raises(ParseError): 35 | labelbox._image_from_data_row( 36 | image_id=1, data_row=data_row, filename_key=FilenameKeyOption.GLOBAL_KEY 37 | ) 38 | 39 | 40 | def test_image_from_data_row__key_not_found() -> None: 41 | data_row = { 42 | "data_row": {"id": "123"}, 43 | "media_attributes": {"width": 800, "height": 600}, 44 | } 45 | with pytest.raises(ParseError): 46 | labelbox._image_from_data_row( 47 | image_id=1, data_row=data_row, filename_key=FilenameKeyOption.GLOBAL_KEY 48 | ) 49 | -------------------------------------------------------------------------------- /src/labelformat/model/object_detection.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from abc import ABC, abstractmethod 4 | from argparse import ArgumentParser 5 | from dataclasses import dataclass 6 | from typing import Iterable, List 7 | 8 | from labelformat.model.bounding_box import BoundingBox 9 | from labelformat.model.category import Category 10 | from labelformat.model.image import Image 11 | 12 | 13 | @dataclass(frozen=True) 14 | class SingleObjectDetection: 15 | category: Category 16 | box: BoundingBox 17 | confidence: float | None = None 18 | 19 | def __post_init__(self) -> None: 20 | if self.confidence is not None and not (0 <= self.confidence <= 1): 21 | raise ValueError( 22 | f"Confidence must be between 0 and 1, but got: {self.confidence}" 23 | ) 24 | 25 | 26 | @dataclass(frozen=True) 27 | class ImageObjectDetection: 28 | image: Image 29 | objects: List[SingleObjectDetection] 30 | 31 | 32 | class ObjectDetectionInput(ABC): 33 | @staticmethod 34 | @abstractmethod 35 | def add_cli_arguments(parser: ArgumentParser) -> None: 36 | raise NotImplementedError() 37 | 38 | @abstractmethod 39 | def get_categories(self) -> Iterable[Category]: 40 | raise NotImplementedError() 41 | 42 | @abstractmethod 43 | def get_images(self) -> Iterable[Image]: 44 | raise NotImplementedError() 45 | 46 | @abstractmethod 47 | def get_labels(self) -> Iterable[ImageObjectDetection]: 48 | raise NotImplementedError() 49 | 50 | 51 | class ObjectDetectionOutput(ABC): 52 | @staticmethod 53 | @abstractmethod 54 | def add_cli_arguments(parser: ArgumentParser) -> None: 55 | raise NotImplementedError() 56 | 57 | def save(self, label_input: ObjectDetectionInput) -> None: 58 | raise NotImplementedError() 59 | -------------------------------------------------------------------------------- /tests/integration/instance_segmentation/test_instance_segmentation.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | from labelformat.formats.coco import ( 5 | COCOInstanceSegmentationInput, 6 | COCOInstanceSegmentationOutput, 7 | ) 8 | 9 | from ..integration_utils import ( 10 | INST_SEGMENTATION_FIXTURES_DIR, 11 | assert_almost_equal_recursive, 12 | ) 13 | 14 | 15 | def test_coco_to_coco(tmp_path: Path) -> None: 16 | coco_file = INST_SEGMENTATION_FIXTURES_DIR / "COCO/instances_with_binary_mask.json" 17 | label_input = COCOInstanceSegmentationInput(input_file=coco_file) 18 | COCOInstanceSegmentationOutput( 19 | output_file=tmp_path / "instances_with_binary_mask.json" 20 | ).save(label_input=label_input) 21 | 22 | # Compare jsons. 23 | output_json = json.loads((tmp_path / "instances_with_binary_mask.json").read_text()) 24 | expected_json = json.loads( 25 | ( 26 | INST_SEGMENTATION_FIXTURES_DIR / "COCO/instances_with_binary_mask.json" 27 | ).read_text() 28 | ) 29 | # Some fields are not converted: 30 | # - info 31 | # - licenses 32 | # - .supercategory 33 | # - .date_captured 34 | # - .license 35 | # - .flickr_url 36 | # - .coco_url 37 | # - .id 38 | # - .area 39 | del expected_json["info"] 40 | del expected_json["licenses"] 41 | for category in expected_json["categories"]: 42 | del category["supercategory"] 43 | for image in expected_json["images"]: 44 | del image["date_captured"] 45 | del image["license"] 46 | del image["flickr_url"] 47 | del image["coco_url"] 48 | for annotation in expected_json["annotations"]: 49 | del annotation["id"] 50 | del annotation["area"] 51 | assert_almost_equal_recursive(output_json, expected_json) 52 | -------------------------------------------------------------------------------- /tests/simple_object_detection_label_input.py: -------------------------------------------------------------------------------- 1 | from labelformat.formats.labelformat import LabelformatObjectDetectionInput 2 | from labelformat.model.bounding_box import BoundingBox 3 | from labelformat.model.category import Category 4 | from labelformat.model.image import Image 5 | from labelformat.model.object_detection import ( 6 | ImageObjectDetection, 7 | SingleObjectDetection, 8 | ) 9 | 10 | 11 | def get_input( 12 | filename: str = "image.jpg", with_confidence: bool = False 13 | ) -> LabelformatObjectDetectionInput: 14 | categories = [ 15 | Category(id=0, name="cat"), 16 | Category(id=1, name="dog"), 17 | Category(id=2, name="cow"), 18 | ] 19 | images = [ 20 | Image(id=0, filename=filename, width=100, height=200), 21 | ] 22 | labels = [ 23 | ImageObjectDetection( 24 | image=images[0], 25 | objects=[ 26 | SingleObjectDetection( 27 | category=categories[1], 28 | box=BoundingBox( 29 | xmin=10.0, 30 | ymin=20.0, 31 | xmax=30.0, 32 | ymax=40.0, 33 | ), 34 | confidence=0.4 if with_confidence else None, 35 | ), 36 | SingleObjectDetection( 37 | category=categories[0], 38 | box=BoundingBox( 39 | xmin=50.0, 40 | ymin=60.0, 41 | xmax=70.0, 42 | ymax=80.0, 43 | ), 44 | confidence=0.8 if with_confidence else None, 45 | ), 46 | ], 47 | ) 48 | ] 49 | 50 | return LabelformatObjectDetectionInput( 51 | categories=categories, 52 | images=images, 53 | labels=labels, 54 | ) 55 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry-core>=1.0.0"] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "labelformat" 7 | version = "0.1.9" 8 | authors = ["Lightly.ai"] 9 | description = "A tool for converting computer vision label formats." 10 | readme = "README.md" 11 | license = "MIT" 12 | 13 | [tool.poetry.dependencies] 14 | python = ">=3.8" 15 | tqdm = "*" 16 | pyyaml = "*" 17 | pillow = "*" 18 | pydantic-xml = "*" 19 | numpy = "*" 20 | 21 | [tool.poetry.group.dev.dependencies] 22 | mypy = "*" 23 | black = "*" 24 | isort = "*" 25 | flake8 = "*" 26 | pytest = "*" 27 | pytest-mock = "*" 28 | build = "*" 29 | twine = "*" 30 | types-Pillow = "*" 31 | types-PyYAML = "*" 32 | opencv-python = "*" 33 | 34 | [tool.poetry.scripts] 35 | labelformat = "labelformat.cli.cli:main" 36 | 37 | [tool.pytest.ini_options] 38 | pythonpath = [ 39 | ".", "src/" 40 | ] 41 | 42 | [tool.isort] 43 | profile = "black" 44 | 45 | [tool.mypy] 46 | ignore_missing_imports = true 47 | python_version = 3.8 48 | warn_unused_configs = true 49 | strict_equality = true 50 | # Disallow dynamic typing 51 | #disallow_any_unimported = true # because mypy fails to follow some imports, e.g. for PIL.Image.Image and matplotlib.Figure 52 | #disallow_any_expr = true # because intermediate expressions do not need to be typed 53 | disallow_any_decorated = true 54 | disallow_any_explicit = true 55 | disallow_any_generics = true 56 | disallow_subclassing_any = true 57 | # Disallow untyped definitions 58 | #disallow_untyped_calls = true # otherwise all external functions called must be typed e.g. calls to torch functions 59 | disallow_untyped_defs = true 60 | disallow_incomplete_defs = true 61 | check_untyped_defs = true 62 | disallow_untyped_decorators = true 63 | # None and optional handling 64 | no_implicit_optional = true 65 | strict_optional = true 66 | # Configuring warnings 67 | warn_unused_ignores = true 68 | warn_no_return = true 69 | warn_return_any = true 70 | warn_redundant_casts = true 71 | warn_unreachable = true 72 | 73 | # Print format 74 | show_error_codes = true 75 | show_error_context = true -------------------------------------------------------------------------------- /tests/fixtures/object_detection/lightly/detection-task-name/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "task_type": "object-detection", 3 | "categories": [ 4 | { 5 | "id": 0, 6 | "name": "person" 7 | }, 8 | { 9 | "id": 1, 10 | "name": "bicycle with space" 11 | }, 12 | { 13 | "id": 2, 14 | "name": "car" 15 | }, 16 | { 17 | "id": 3, 18 | "name": "motorcycle" 19 | }, 20 | { 21 | "id": 4, 22 | "name": "airplane" 23 | }, 24 | { 25 | "id": 5, 26 | "name": "bus" 27 | }, 28 | { 29 | "id": 6, 30 | "name": "train" 31 | }, 32 | { 33 | "id": 7, 34 | "name": "truck" 35 | }, 36 | { 37 | "id": 8, 38 | "name": "boat" 39 | }, 40 | { 41 | "id": 9, 42 | "name": "traffic light" 43 | }, 44 | { 45 | "id": 10, 46 | "name": "fire hydrant" 47 | }, 48 | { 49 | "id": 11, 50 | "name": "stop sign" 51 | }, 52 | { 53 | "id": 12, 54 | "name": "parking meter" 55 | }, 56 | { 57 | "id": 13, 58 | "name": "bench" 59 | }, 60 | { 61 | "id": 14, 62 | "name": "bird" 63 | }, 64 | { 65 | "id": 15, 66 | "name": "cat" 67 | }, 68 | { 69 | "id": 16, 70 | "name": "dog" 71 | }, 72 | { 73 | "id": 17, 74 | "name": "horse" 75 | }, 76 | { 77 | "id": 18, 78 | "name": "sheep" 79 | }, 80 | { 81 | "id": 19, 82 | "name": "cow" 83 | }, 84 | { 85 | "id": 20, 86 | "name": "elephant" 87 | }, 88 | { 89 | "id": 21, 90 | "name": "bear" 91 | }, 92 | { 93 | "id": 22, 94 | "name": "zebra" 95 | }, 96 | { 97 | "id": 23, 98 | "name": "giraffe" 99 | }, 100 | { 101 | "id": 24, 102 | "name": "backpack" 103 | }, 104 | { 105 | "id": 25, 106 | "name": "umbrella" 107 | }, 108 | { 109 | "id": 26, 110 | "name": "handbag" 111 | }, 112 | { 113 | "id": 27, 114 | "name": "tie" 115 | }, 116 | { 117 | "id": 28, 118 | "name": "suitcase" 119 | }, 120 | { 121 | "id": 29, 122 | "name": "frisbee" 123 | }, 124 | { 125 | "id": 30, 126 | "name": "skis" 127 | }, 128 | { 129 | "id": 31, 130 | "name": "snowboard" 131 | } 132 | ] 133 | } -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | 2 | ![Labelformat Banner](assets/labelformat_banner.png) 3 | 4 | # Labelformat - Fast Label Conversion for Computer Vision 5 | 6 | **Labelformat** is an open-source Python framework for converting between popular computer vision annotation formats like YOLO, COCO, PascalVOC, and KITTI. Save hours on tedious format conversions and ensure consistency in your workflows. 7 | 8 | 9 | ## Key Features 10 | - **Wide Format Support**: COCO, YOLO (v5-v12, v26), PascalVOC, KITTI, Labelbox, RT-DETR, RT-DETRv2, and more. 11 | - **Cross-Platform**: Compatible with Python 3.8+ on Windows, macOS, and Linux. 12 | - **Flexible Usage**: Intuitive CLI and Python API. 13 | - **Efficient**: Memory-conscious, optimized for large datasets. 14 | - **Offline First**: Operates locally without data uploads. 15 | - **Tested for Accuracy**: Round-trip tests for consistent results. 16 | 17 | ## Get Started Quickly 18 | 19 | 1. **Install via pip**: 20 | ```bash 21 | pip install labelformat 22 | ``` 23 | 2. **Convert Labels in One Command**: 24 | ```bash 25 | labelformat convert --task object-detection \ 26 | --input-format coco \ 27 | --input-file coco-labels/train.json \ 28 | --output-format yolov8 \ 29 | --output-file yolo-labels/data.yaml 30 | ``` 31 | 32 | ## Supported Formats 33 | 34 | ### **2D Object Detection Label Formats** 35 | 36 | | Format | Read ✔️ | Write ✔️ | 37 | |--------------|---------|----------| 38 | | COCO | ✔️ | ✔️ | 39 | | KITTI | ✔️ | ✔️ | 40 | | Labelbox | ✔️ | ❌ | 41 | | Lightly | ✔️ | ✔️ | 42 | | PascalVOC | ✔️ | ✔️ | 43 | | RT-DETR | ✔️ | ✔️ | 44 | | RT-DETRv2 | ✔️ | ✔️ | 45 | | YOLOv5 - v12, v26 | ✔️ | ✔️ | 46 | 47 | --- 48 | 49 | ### **2D Instance Segmentation Label Formats** 50 | 51 | | Format | Read ✔️ | Write ✔️ | 52 | |--------------|---------|----------| 53 | | COCO | ✔️ | ✔️ | 54 | | YOLOv8 | ✔️ | ✔️ | 55 | 56 | 57 | ## Explore More 58 | - [Quick Start Guide](quick-start.md) 59 | - [Detailed Usage Guide](usage.md) 60 | - [List of all features](features.md) 61 | --- 62 | 63 | ## 📦 Quick Links 64 | 65 | - [GitHub Repository](https://github.com/lightly-ai/labelformat) 66 | - [PyPI Package](https://pypi.org/project/labelformat/) 67 | - [Documentation](https://labelformat.com) 68 | 69 | Labelformat is maintained by [Lightly](https://www.lightly.ai). -------------------------------------------------------------------------------- /.github/copilot-instructions.md: -------------------------------------------------------------------------------- 1 | # Python coding guidelines 2 | 3 | ## General guidelines 4 | 5 | When importing, import classes directly. For functions, import the containing module and call the function using dot notation. 6 | 7 | Inside each file, put public classes then public functions, then private classes, and finally private functions. 8 | 9 | For todos, use a format `# TODO({Name}, {month}/{year}): {full_sentence_comment}`, e.g. `# TODO(Michal, 08/2023): Address in the next PR.`. 10 | 11 | If using docstrings, use the google style guide and triple quotes. Use `Args:` and `Returns:` sections. Don't repeat the type in the description. 12 | Any example: 13 | ```python 14 | def foo(bar: int) -> str: 15 | """Converts an integer to a string. 16 | 17 | Args: 18 | bar: The bar to convert. 19 | 20 | Returns: 21 | The converted bar. 22 | """ 23 | return str(bar) 24 | ``` 25 | 26 | For comments outside of docstrings, use full sentences and proper punctuation. 27 | E.g. `# This is a comment.` instead of `# this is a comment`. 28 | 29 | Avoid using `assert` outside of tests. 30 | 31 | Always use keyword arguments when calling functions, except for single-argument functions. 32 | 33 | Don't care about formatting, we use ruff for that. 34 | 35 | ## Typing 36 | 37 | Always use type hints, such that mypy passes. 38 | 39 | Use newer syntax e.g. `list[int | None]` instead of `List[Optional[int]]`. When needing them, add `from __future__ import annotations` at the top of the file. 40 | 41 | Use abstract inputs and concrete outputs. See this example: 42 | ```python 43 | def add_suffix_to_list(lst: Sequence[str], suffix: str) -> list[str]: 44 | return [x + suffix for x in lst] 45 | ``` 46 | 47 | Use Sequence and Mapping instead of list and dict for immutable types. Import them from `collections.abc`. 48 | 49 | Be specific when ignoring type errors, e.g. `# type: ignore[no-untyped-call]` instead of `# type: ignore`. 50 | 51 | ## Testing 52 | 53 | Always use pytest, never unittest. 54 | 55 | When testing a class named `MyClass`, put all tests under a class named `TestMyClass`. 56 | 57 | When testing a function or method, name it `test_{method_name_with_underscores}`. 58 | E.g. the test for `_internal_function` is named `test__internal_function`. 59 | E.g. the test for `MyClass.my_method` is named `TestMyClass.test_my_method`. 60 | 61 | When testing a special case of a function or method append a `__{special_case}` to the test name. 62 | E.g. the test for the function `compute_mean(arr: list[float])` for the empty array case 63 | should be named `test_compute_mean__empty_array`. 64 | -------------------------------------------------------------------------------- /docs/formats/object-detection/yolov26.md: -------------------------------------------------------------------------------- 1 | # YOLOv26 (YOLO26) Object Detection Format 2 | 3 | ## Overview 4 | 5 | **YOLOv26** (also known as **YOLO26**) is the latest evolution in the **You Only Look Once (YOLO)** series, engineered specifically for edge and low-power devices. It introduces a streamlined design that removes unnecessary complexity while integrating targeted innovations to deliver faster, lighter, and more accessible deployment. YOLOv26 uses the **same object detection format** as YOLOv8-v12, utilizing normalized coordinates in text files for seamless compatibility. 6 | 7 | > **Info:** YOLOv26 is currently in preview and under development. Performance numbers are preliminary and final releases will follow soon. For the latest updates, see: [GitHub Repository: ultralytics/ultralytics](https://github.com/ultralytics/ultralytics) 8 | 9 | ## Key YOLOv26 Features 10 | 11 | YOLOv26 maintains full compatibility with the YOLOv8-v12 label format while introducing several breakthrough innovations: 12 | 13 | - **End-to-End NMS-Free Inference:** Native end-to-end model producing predictions directly without non-maximum suppression, reducing latency and simplifying deployment 14 | - **DFL Removal:** Eliminates Distribution Focal Loss module for better export compatibility and broader hardware support on edge devices 15 | - **MuSGD Optimizer:** Hybrid optimizer combining SGD with Muon, inspired by Moonshot AI's Kimi K2 breakthroughs in LLM training 16 | - **ProgLoss + STAL:** Enhanced loss functions with notable improvements in small-object detection accuracy 17 | - **43% Faster CPU Inference:** Specifically optimized for edge computing with significant CPU performance gains 18 | 19 | ## Format Specification 20 | 21 | YOLOv26 uses the **identical format** as YOLOv8, YOLOv9, YOLOv10, YOLOv11, and YOLOv12. Please refer to the [YOLOv8 format documentation](yolov8.md) for complete format specifications, including: 22 | 23 | - Text file structure with normalized coordinates 24 | - Directory organization patterns 25 | - Configuration via `data.yaml` 26 | - Coordinate normalization formulas 27 | - Example annotations 28 | 29 | ## Converting Annotations to YOLOv26 Format 30 | 31 | Since YOLOv26 uses the same format as YOLOv8-v11, you can convert from other formats using Labelformat: 32 | 33 | ```bash 34 | labelformat convert \ 35 | --task object-detection \ 36 | --input-format coco \ 37 | --input-file dataset/annotations/instances_train.json \ 38 | --output-format yolov26 \ 39 | --output-folder dataset/yolov26_labels \ 40 | --output-split train 41 | ``` 42 | 43 | The converted output will be fully compatible with YOLOv26 training and inference pipelines. -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Labelformat Docs 2 | theme: 3 | name: material 4 | features: 5 | - navigation.instant 6 | - navigation.expand 7 | - search.highlight 8 | - navigation.top 9 | - navigation.tabs 10 | - navigation.tabs.sticky 11 | - navigation.indexes 12 | - content.tabs.link 13 | palette: 14 | scheme: slate 15 | primary: grey 16 | accent: blue 17 | 18 | site_url: https://labelformat.com 19 | repo_url: https://github.com/lightly-ai/labelformat 20 | repo_name: Labelformat 21 | docs_dir: docs 22 | 23 | nav: 24 | - Home: 25 | - index.md 26 | - Features: features.md 27 | - Installation: installation.md 28 | - Quick Start: quick-start.md 29 | - Usage: usage.md 30 | - Supported Formats: 31 | - Object Detection: 32 | - formats/object-detection/index.md 33 | - COCO Format: formats/object-detection/coco.md 34 | - KITTI Format: formats/object-detection/kitti.md 35 | - Labelbox Format: formats/object-detection/labelbox.md 36 | - Lightly Format: formats/object-detection/lightly.md 37 | - PascalVOC Format: formats/object-detection/pascalvoc.md 38 | - RT-DETR Format: formats/object-detection/rtdetr.md 39 | - RT-DETRv2 Format: formats/object-detection/rtdetrv2.md 40 | - YOLOv5 Format: formats/object-detection/yolov5.md 41 | - YOLOv6 Format: formats/object-detection/yolov6.md 42 | - YOLOv7 Format: formats/object-detection/yolov7.md 43 | - YOLOv8 Format: formats/object-detection/yolov8.md 44 | - YOLOv9 Format: formats/object-detection/yolov9.md 45 | - YOLOv10 Format: formats/object-detection/yolov10.md 46 | - YOLOv11 Format: formats/object-detection/yolov11.md 47 | - YOLOv12 Format: formats/object-detection/yolov12.md 48 | - YOLOv26 Format: formats/object-detection/yolov26.md 49 | - Tutorials: 50 | - Converting COCO to YOLOv8: tutorials/converting-coco-to-yolov8.md 51 | - About Us: about-us.md 52 | 53 | plugins: 54 | - search 55 | - blog 56 | - git-revision-date-localized: 57 | type: date 58 | 59 | extra: 60 | analytics: 61 | provider: google 62 | property: G-K4PH64C9BM 63 | consent: 64 | title: Cookie consent 65 | description: >- 66 | We use cookies to recognize your repeated visits and preferences, as well 67 | as to measure the effectiveness of our documentation and whether users 68 | find what they're searching for. With your consent, you're helping us to 69 | make our documentation better. 70 | 71 | extra_javascript: 72 | - assets/js/analytics.js 73 | 74 | markdown_extensions: 75 | - admonition 76 | - codehilite 77 | - toc: 78 | permalink: true 79 | - pymdownx.superfences 80 | - pymdownx.tabbed: 81 | alternate_style: true 82 | -------------------------------------------------------------------------------- /docs/features.md: -------------------------------------------------------------------------------- 1 | # Features 2 | 3 | Labelformat offers a robust set of features tailored to meet the diverse needs of computer vision engineers and data scientists. 4 | 5 | ## Key Features 6 | 7 | - **Wide Format Support:** 8 | - **2D Object Detection:** Bounding box annotations for object localization 9 | - **Instance Segmentation:** Pixel-level masks for precise object delineation 10 | 11 | - **User-Friendly CLI and Python API:** 12 | - **CLI:** Simple terminal commands to convert formats with customizable options. 13 | - **Python API:** Integrate label conversion seamlessly into your Python workflows. 14 | 15 | - **Performance Optimizations:** 16 | - **Memory Conscious:** Processes datasets file-by-file to minimize memory usage. 17 | - **Minimal Dependencies:** Targets Python 3.8 or higher, ensuring broad compatibility. 18 | 19 | - **Cross-Platform Support:** 20 | - **Windows, Linux, and macOS:** Works seamlessly across all major operating systems. 21 | 22 | - **Reliability and Testing:** 23 | - **Typed Codebase:** Ensures type safety and easier maintenance. 24 | - **Round-Trip Tests:** Guarantees label consistency across conversions. 25 | 26 | - **Open-Source and Community-Driven:** 27 | - **MIT License:** Free to use and modify. 28 | - **Active Contributions:** Regular updates and community support. 29 | 30 | ## Supported Tasks and Formats 31 | 32 | ### Object Detection 33 | 34 | - **[COCO](formats/object-detection/coco.md)** 35 | - **[KITTI](formats/object-detection/kitti.md)** 36 | - **[Labelbox](formats/object-detection/labelbox.md)** (input only) 37 | - **[Lightly](formats/object-detection/lightly.md)** 38 | - **[PascalVOC](formats/object-detection/pascalvoc.md)** 39 | - **[RT-DETR](formats/object-detection/rtdetr.md)** 40 | - **[RT-DETRv2](formats/object-detection/rtdetrv2.md)** 41 | - **[YOLOv5](formats/object-detection/yolov5.md)** 42 | - **[YOLOv6](formats/object-detection/yolov6.md)** 43 | - **[YOLOv7](formats/object-detection/yolov7.md)** 44 | - **[YOLOv8](formats/object-detection/yolov8.md)** 45 | - **[YOLOv9](formats/object-detection/yolov9.md)** 46 | - **[YOLOv10](formats/object-detection/yolov10.md)** 47 | - **[YOLOv11](formats/object-detection/yolov11.md)** 48 | - **[YOLOv12](formats/object-detection/yolov12.md)** 49 | - **[YOLOv26](formats/object-detection/yolov26.md)** 50 | 51 | --- 52 | 53 | ## Why Labelformat? 54 | 55 | Labelformat addresses the common challenges faced when dealing with diverse label formats: 56 | 57 | - **Consistency:** Ensures uniformity across different formats, crucial for model training. 58 | - **Efficiency:** Reduces the time spent on manual label format conversions. 59 | - **Scalability:** Handles large datasets with minimal memory footprint. 60 | - **Flexibility:** Supports a growing list of formats and tasks, adapting to evolving project needs. 61 | 62 | Explore our [Quick Start Guide](quick-start.md) to begin leveraging Labelformat's powerful features today! -------------------------------------------------------------------------------- /tests/integration/instance_segmentation/test_inverse.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from pytest_mock import MockerFixture 4 | 5 | from labelformat.formats.coco import ( 6 | COCOInstanceSegmentationInput, 7 | COCOInstanceSegmentationOutput, 8 | ) 9 | from labelformat.formats.yolov8 import ( 10 | YOLOv8InstanceSegmentationInput, 11 | YOLOv8InstanceSegmentationOutput, 12 | ) 13 | from labelformat.model.multipolygon import MultiPolygon 14 | 15 | from ...simple_instance_segmentation_label_input import ( 16 | SimpleInstanceSegmentationInput, 17 | SimpleInstanceSegmentationInputWithBinaryMask, 18 | ) 19 | from .. import integration_utils 20 | 21 | 22 | def test_coco_inverse(tmp_path: Path) -> None: 23 | start_label_input = SimpleInstanceSegmentationInputWithBinaryMask() 24 | COCOInstanceSegmentationOutput(output_file=tmp_path / "train.json").save( 25 | label_input=start_label_input 26 | ) 27 | end_label_input = COCOInstanceSegmentationInput(input_file=tmp_path / "train.json") 28 | assert list(start_label_input.get_labels()) == list(end_label_input.get_labels()) 29 | 30 | 31 | def test_yolov8_inverse(tmp_path: Path, mocker: MockerFixture) -> None: 32 | start_label_input = SimpleInstanceSegmentationInput() 33 | YOLOv8InstanceSegmentationOutput( 34 | output_file=tmp_path / "dataset.yaml", 35 | output_split="train", 36 | ).save(label_input=start_label_input) 37 | # For YOLOv8 we have to also provide the image files. 38 | _mock_input_images(mocker=mocker, folder=tmp_path / "images") 39 | end_label_input = YOLOv8InstanceSegmentationInput( 40 | input_file=tmp_path / "dataset.yaml", 41 | input_split="train", 42 | ) 43 | 44 | # YOLOv8 merges a multipolygon into a single polygon, so we have to 45 | # compare them with a custom check. 46 | for image_label_0, image_label_1 in zip( 47 | start_label_input.get_labels(), end_label_input.get_labels() 48 | ): 49 | assert image_label_0.image == image_label_1.image 50 | assert len(image_label_0.objects) == len(image_label_1.objects) 51 | for object_0, object_1 in zip(image_label_0.objects, image_label_1.objects): 52 | assert object_0.category == object_1.category 53 | assert isinstance(object_0.segmentation, MultiPolygon) 54 | assert isinstance(object_1.segmentation, MultiPolygon) 55 | integration_utils.assert_multipolygons_almost_equal( 56 | object_0.segmentation, object_1.segmentation 57 | ) 58 | 59 | 60 | def _mock_input_images(mocker: MockerFixture, folder: Path) -> None: 61 | folder.mkdir() 62 | (folder / "image.jpg").touch() 63 | mock_img = mocker.MagicMock() 64 | mock_img.size = (100, 200) 65 | mock_context_manager = mocker.MagicMock() 66 | mock_context_manager.__enter__.return_value = mock_img 67 | mocker.patch("PIL.Image.open", return_value=mock_context_manager) 68 | -------------------------------------------------------------------------------- /src/labelformat/model/bounding_box.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from enum import Enum 3 | from typing import List 4 | 5 | from labelformat.model.category import Category 6 | 7 | 8 | class BoundingBoxFormat(Enum): 9 | XYXY = "xyxy" 10 | XYWH = "xywh" 11 | CXCYWH = "cxcywh" 12 | 13 | 14 | @dataclass(frozen=True) 15 | class BoundingBox: 16 | xmin: float 17 | ymin: float 18 | xmax: float 19 | ymax: float 20 | 21 | @staticmethod 22 | def from_format( 23 | bbox: List[float], 24 | format: BoundingBoxFormat, 25 | ) -> "BoundingBox": 26 | """Create a bounding box from a list of floats and a format. 27 | 28 | We assume all bounding box coordinates are in pixel coordinates and are 29 | NOT normalized between 0 and 1. 30 | 31 | Args: 32 | bbox (List[float]): A list of floats representing the bounding box. 33 | format (BoundingBoxFormat): The format of the bounding box. 34 | """ 35 | if format == BoundingBoxFormat.XYXY: 36 | return BoundingBox( 37 | xmin=bbox[0], 38 | ymin=bbox[1], 39 | xmax=bbox[2], 40 | ymax=bbox[3], 41 | ) 42 | elif format == BoundingBoxFormat.XYWH: 43 | xmin = bbox[0] 44 | ymin = bbox[1] 45 | xmax = xmin + bbox[2] 46 | ymax = ymin + bbox[3] 47 | return BoundingBox( 48 | xmin=xmin, 49 | ymin=ymin, 50 | xmax=xmax, 51 | ymax=ymax, 52 | ) 53 | elif format == BoundingBoxFormat.CXCYWH: 54 | xmin = bbox[0] - bbox[2] / 2 55 | ymin = bbox[1] - bbox[3] / 2 56 | xmax = bbox[0] + bbox[2] / 2 57 | ymax = bbox[1] + bbox[3] / 2 58 | return BoundingBox( 59 | xmin=xmin, 60 | ymin=ymin, 61 | xmax=xmax, 62 | ymax=ymax, 63 | ) 64 | else: 65 | raise ValueError( 66 | f"Unknown bbox format: {format}, known formats are {list(BoundingBoxFormat)}" 67 | ) 68 | 69 | def to_format(self, format: BoundingBoxFormat) -> List[float]: 70 | if format == BoundingBoxFormat.XYXY: 71 | return [self.xmin, self.ymin, self.xmax, self.ymax] 72 | elif format == BoundingBoxFormat.XYWH: 73 | return [self.xmin, self.ymin, self.xmax - self.xmin, self.ymax - self.ymin] 74 | elif format == BoundingBoxFormat.CXCYWH: 75 | return [ 76 | (self.xmin + self.xmax) / 2, 77 | (self.ymin + self.ymax) / 2, 78 | self.xmax - self.xmin, 79 | self.ymax - self.ymin, 80 | ] 81 | else: 82 | raise ValueError( 83 | f"Unknown bbox format: {format}, known formats are {list(BoundingBoxFormat)}" 84 | ) 85 | -------------------------------------------------------------------------------- /tests/integration/instance_segmentation/test_to_yolov8.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import yaml 4 | 5 | from labelformat.formats.coco import COCOInstanceSegmentationInput 6 | from labelformat.formats.yolov8 import ( 7 | YOLOv8InstanceSegmentationInput, 8 | YOLOv8InstanceSegmentationOutput, 9 | ) 10 | from labelformat.model.instance_segmentation import InstanceSegmentationInput 11 | from labelformat.model.multipolygon import MultiPolygon 12 | 13 | from .. import integration_utils 14 | from ..integration_utils import INST_SEGMENTATION_FIXTURES_DIR 15 | 16 | 17 | def test_yolov8_to_yolov8(tmp_path: Path) -> None: 18 | label_input = YOLOv8InstanceSegmentationInput( 19 | input_file=INST_SEGMENTATION_FIXTURES_DIR / "YOLOv8/dataset.yaml", 20 | input_split="train", 21 | ) 22 | _convert_and_test(label_input=label_input, tmp_path=tmp_path) 23 | 24 | 25 | def test_coco_to_yolov8(tmp_path: Path) -> None: 26 | label_input = COCOInstanceSegmentationInput( 27 | input_file=INST_SEGMENTATION_FIXTURES_DIR / "COCO/instances.json" 28 | ) 29 | _convert_and_test(label_input=label_input, tmp_path=tmp_path) 30 | 31 | 32 | def _convert_and_test(label_input: InstanceSegmentationInput, tmp_path: Path) -> None: 33 | YOLOv8InstanceSegmentationOutput( 34 | output_file=tmp_path / "dataset.yaml", 35 | output_split="train", 36 | ).save(label_input=label_input) 37 | 38 | # Compare yaml files. 39 | output_yaml = yaml.safe_load((tmp_path / "dataset.yaml").read_text()) 40 | expected_yaml = yaml.safe_load( 41 | (INST_SEGMENTATION_FIXTURES_DIR / "YOLOv8/dataset.yaml").read_text() 42 | ) 43 | # TODO: Add split_subfolder to YOLOv8 output parameters. 44 | del output_yaml["train"] 45 | del expected_yaml["train"] 46 | assert output_yaml == expected_yaml 47 | 48 | # Compare label files. 49 | _assert_yolov8_labels_equal( 50 | dir1=INST_SEGMENTATION_FIXTURES_DIR / "YOLOv8/labels", 51 | dir2=tmp_path / "labels", 52 | ) 53 | 54 | 55 | def _assert_yolov8_labels_equal( 56 | dir1: Path, 57 | dir2: Path, 58 | ) -> None: 59 | assert dir1.is_dir() 60 | assert dir2.is_dir() 61 | for file1 in dir1.rglob("*"): 62 | if file1.is_dir(): 63 | continue 64 | file2 = dir2 / file1.relative_to(dir1) 65 | for line1, line2 in zip(file1.open(), file2.open()): 66 | parts1 = line1.split() 67 | parts2 = line2.split() 68 | assert parts1[0] == parts2[0], "labels do not match" 69 | polygon1 = [ 70 | (float(x), float(y)) for x, y in zip(parts1[1::2], parts1[2::2]) 71 | ] 72 | polygon2 = [ 73 | (float(x), float(y)) for x, y in zip(parts2[1::2], parts2[2::2]) 74 | ] 75 | integration_utils.assert_multipolygons_almost_equal( 76 | MultiPolygon(polygons=[polygon1]), 77 | MultiPolygon(polygons=[polygon2]), 78 | ) 79 | -------------------------------------------------------------------------------- /docs/quick-start.md: -------------------------------------------------------------------------------- 1 | 2 | # Quick Start Guide 3 | 4 | Get up and running with **Labelformat** in minutes! This Quick Start Guide provides simple, copy-paste examples to help you convert label formats effortlessly. 5 | 6 | ## Scenario 1: Convert COCO to YOLOv8 Using CLI 7 | 8 | ### Step 1: Prepare Your Files 9 | 10 | Ensure you have the following structure: 11 | ``` 12 | project/ 13 | ├── coco-labels/ 14 | │ └── train.json 15 | ├── images/ 16 | │ ├── image1.jpg 17 | │ └── image2.jpg 18 | ``` 19 | 20 | ### Step 2: Run the Conversion Command 21 | 22 | Open your terminal, navigate to your project directory, and execute: 23 | 24 | ```shell 25 | labelformat convert \ 26 | --task object-detection \ 27 | --input-format coco \ 28 | --input-file coco-labels/train.json \ 29 | --output-format yolov8 \ 30 | --output-file yolo-labels/data.yaml \ 31 | --output-split train 32 | ``` 33 | 34 | ### Step 3: Verify the Output 35 | 36 | Your project structure should now include: 37 | 38 | ``` 39 | project/ 40 | ├── yolo-labels/ 41 | │ ├── data.yaml 42 | │ └── labels/ 43 | │ ├── image1.txt 44 | │ └── image2.txt 45 | ``` 46 | 47 | --- 48 | 49 | ## Scenario 2: Convert YOLOv8 to COCO Using Python API 50 | 51 | ### Step 1: Install Labelformat 52 | 53 | If you haven't installed Labelformat yet, do so via pip: 54 | ``` shell 55 | pip install labelformat 56 | ``` 57 | 58 | ### Step 2: Write the Conversion Script 59 | 60 | Create a Python script, `convert_yolo_to_coco.py`, with the following content: 61 | 62 | ``` python 63 | from pathlib import Path 64 | from labelformat.formats import COCOObjectDetectionOutput, YOLOv8ObjectDetectionInput 65 | 66 | # Load YOLOv8 labels 67 | yolo_input = YOLOv8ObjectDetectionInput( 68 | input_file=Path("yolo-labels/data.yaml"), 69 | input_split="train" 70 | ) 71 | 72 | # Convert to COCO format and save 73 | coco_output = COCOObjectDetectionOutput( 74 | output_file=Path("coco-from-yolo/converted_coco.json") 75 | ) 76 | coco_output.save(label_input=yolo_input) 77 | 78 | print("Conversion from YOLOv8 to COCO completed successfully!") 79 | ``` 80 | 81 | ### Step 3: Execute the Script 82 | 83 | Run the script: 84 | 85 | ``` shell 86 | python convert_yolo_to_coco.py 87 | ``` 88 | 89 | ### Step 4: Check the COCO Output 90 | 91 | Your project should now have: 92 | 93 | ``` 94 | project/ 95 | ├── coco-from-yolo/ 96 | │ └── converted_coco.json 97 | ``` 98 | 99 | --- 100 | 101 | ## Scenario 3: Convert Labelbox Export to Lightly Format 102 | 103 | ### Step 1: Export Labels from Labelbox 104 | 105 | Ensure you have the Labelbox export file, e.g., `labelbox-export.ndjson`. 106 | 107 | ### Step 2: Run the Conversion Command 108 | 109 | ``` shell 110 | labelformat convert \ 111 | --task object-detection \ 112 | --input-format labelbox \ 113 | --input-file labelbox-export.ndjson \ 114 | --category-names cat,dog,fish \ 115 | --output-format lightly \ 116 | --output-folder lightly-labels/annotation-task 117 | ``` 118 | 119 | ### Step 3: Verify the Lightly Output 120 | 121 | Your project structure should include: 122 | 123 | ``` 124 | project/ 125 | ├── lightly-labels/ 126 | │ ├── annotation-task/ 127 | │ │ ├── schema.json 128 | │ │ ├── image1.json 129 | │ │ └── image2.json 130 | ``` -------------------------------------------------------------------------------- /docs/formats/object-detection/kitti.md: -------------------------------------------------------------------------------- 1 | # KITTI Object Detection Format 2 | 3 | ## Overview 4 | The KITTI format was developed as part of the KITTI Vision Benchmark Suite, focusing on autonomous driving scenarios. This format is particularly well-suited for 3D object detection and tracking tasks. The complete format specification can be found in the [KITTI development kit documentation](https://github.com/bostondiditeam/kitti/blob/master/resources/devkit_object/readme.txt). 5 | 6 | ## Specification of KITTI Detection Format 7 | Each object is represented by 15 space-separated values: 8 | 9 | ``` 10 | #Values Name Description 11 | ---------------------------------------------------------------------------- 12 | 1 type Object type (Car, Van, Truck, etc.) 13 | 1 truncated Float 0-1 (truncated ratio) 14 | 1 occluded Integer (0=visible, 1=partly occluded, 2=fully occluded) 15 | 1 alpha Observation angle (-pi..pi) 16 | 4 bbox 2D bounding box (x1,y1,x2,y2) in pixels 17 | 3 dimensions 3D dimensions (height, width, length) in meters 18 | 3 location 3D location (x,y,z) in camera coordinates 19 | 1 rotation_y Rotation around Y-axis in camera coordinates 20 | ``` 21 | 22 | ## Directory Structure of KITTI Dataset 23 | ``` 24 | dataset/ 25 | ├── images/ 26 | │ ├── 000000.png 27 | │ └── 000001.png 28 | └── labels/ 29 | ├── 000000.txt 30 | └── 000001.txt 31 | ``` 32 | 33 | ## Label Format 34 | ``` 35 | # Example: 000000.txt 36 | Car -1 -1 -10 614 181 727 284 -1 -1 -1 -1000 -1000 -1000 -10 37 | Pedestrian -1 -1 -10 123 456 789 012 -1 -1 -1 -1000 -1000 -1000 -10 38 | ``` 39 | 40 | Note: The filename of each label file must match its corresponding image file, with .txt extension. 41 | 42 | ## Annotation Format Conversion 43 | ### Using CLI 44 | Convert from YOLOv8 to KITTI format: 45 | ```bash 46 | labelformat convert \ 47 | --task object-detection \ 48 | --input-format yolov8 \ 49 | --input-file yolo-labels/data.yaml \ 50 | --input-split train \ 51 | --output-format kitti \ 52 | --output-folder kitti-labels 53 | ``` 54 | 55 | Convert from KITTI to YOLOv8 format: 56 | ```bash 57 | labelformat convert \ 58 | --task object-detection \ 59 | --input-format kitti \ 60 | --input-folder kitti-labels \ 61 | --category-names car,pedestrian,cyclist \ 62 | --images-rel-path ../images \ 63 | --output-format yolov8 \ 64 | --output-file yolo-labels/data.yaml \ 65 | --output-split train 66 | ``` 67 | 68 | ### Using Python 69 | ```python 70 | from pathlib import Path 71 | from labelformat.formats import KittiObjectDetectionInput, YOLOv8ObjectDetectionOutput 72 | 73 | # Load KITTI labels 74 | label_input = KittiObjectDetectionInput( 75 | input_folder=Path("kitti-labels"), 76 | category_names="car,pedestrian,cyclist", 77 | images_rel_path="../images" 78 | ) 79 | 80 | # Convert to YOLOv8 and save 81 | YOLOv8ObjectDetectionOutput( 82 | output_file=Path("yolo-labels/data.yaml"), 83 | output_split="train" 84 | ).save(label_input=label_input) 85 | ``` 86 | 87 | ## Notes 88 | - KITTI format uses absolute pixel coordinates (x1,y1,x2,y2) for bounding boxes 89 | - Some fields like truncated, occluded, dimensions etc. are optional and can be set to -1 if unknown 90 | - The category name (type) should match one of the predefined categories when converting -------------------------------------------------------------------------------- /tests/unit/model/test_binary_mask_segmentation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import NDArray 3 | 4 | from labelformat.model.binary_mask_segmentation import ( 5 | BinaryMaskSegmentation, 6 | RLEDecoderEncoder, 7 | ) 8 | from labelformat.model.bounding_box import BoundingBox 9 | 10 | 11 | class TestBinaryMaskSegmentation: 12 | def test_from_binary_mask(self) -> None: 13 | # Create a binary mask 14 | binary_mask: NDArray[np.int_] = np.array([[0, 1], [1, 0]], dtype=np.int_) 15 | bounding_box = BoundingBox(0, 0, 2, 2) 16 | 17 | binary_mask_segmentation = BinaryMaskSegmentation.from_binary_mask( 18 | binary_mask=binary_mask, bounding_box=bounding_box 19 | ) 20 | assert binary_mask_segmentation.width == 2 21 | assert binary_mask_segmentation.height == 2 22 | assert binary_mask_segmentation.bounding_box == bounding_box 23 | assert np.array_equal(binary_mask_segmentation.get_binary_mask(), binary_mask) 24 | 25 | 26 | class TestRLEDecoderEncoder: 27 | def test_encode_row_wise_rle(self) -> None: 28 | binary_mask: NDArray[np.int_] = np.array( 29 | [[0, 1, 1, 0], [1, 1, 1, 1]], dtype=np.int_ 30 | ) 31 | rle = RLEDecoderEncoder.encode_row_wise_rle(binary_mask) 32 | assert rle == [1, 2, 1, 4] 33 | 34 | def test_decode_row_wise_rle(self) -> None: 35 | rle = [1, 2, 1, 4] 36 | height = 2 37 | width = 4 38 | binary_mask = RLEDecoderEncoder.decode_row_wise_rle(rle, height, width) 39 | expected_binary_mask: NDArray[np.uint8] = np.array( 40 | [[0, 1, 1, 0], [1, 1, 1, 1]], dtype=np.uint8 41 | ) 42 | assert np.array_equal(binary_mask, expected_binary_mask) 43 | 44 | def test_encode_column_wise_rle(self) -> None: 45 | binary_mask: NDArray[np.int_] = np.array( 46 | [[0, 1, 1, 0], [1, 1, 1, 1]], dtype=np.int_ 47 | ) 48 | rle = RLEDecoderEncoder.encode_column_wise_rle(binary_mask) 49 | assert rle == [1, 5, 1, 1] 50 | 51 | def test_decode_column_wise_rle(self) -> None: 52 | rle = [1, 5, 1, 1] 53 | height = 2 54 | width = 4 55 | binary_mask = RLEDecoderEncoder.decode_column_wise_rle(rle, height, width) 56 | expected_binary_mask: NDArray[np.uint8] = np.array( 57 | [[0, 1, 1, 0], [1, 1, 1, 1]], dtype=np.uint8 58 | ) 59 | assert np.array_equal(binary_mask, expected_binary_mask) 60 | 61 | def test_inverse__row_wise(self) -> None: 62 | mask: NDArray[np.int_] = np.random.randint( 63 | 0, 2, (42, 9), dtype=np.int32 64 | ).astype(np.int_) 65 | 66 | rle = RLEDecoderEncoder.encode_row_wise_rle(mask) 67 | mask_inverse_row_wise = RLEDecoderEncoder.decode_row_wise_rle( 68 | rle, mask.shape[0], mask.shape[1] 69 | ) 70 | assert np.array_equal(mask, mask_inverse_row_wise) 71 | 72 | def test_inverse__column_wise(self) -> None: 73 | mask: NDArray[np.int_] = np.random.randint( 74 | 0, 2, (42, 9), dtype=np.int32 75 | ).astype(np.int_) 76 | 77 | rle = RLEDecoderEncoder.encode_column_wise_rle(mask) 78 | mask_inverse_column_wise = RLEDecoderEncoder.decode_column_wise_rle( 79 | rle, mask.shape[0], mask.shape[1] 80 | ) 81 | assert np.array_equal(mask, mask_inverse_column_wise) 82 | -------------------------------------------------------------------------------- /docs/formats/object-detection/pascalvoc.md: -------------------------------------------------------------------------------- 1 | # PascalVOC Object Detection Format 2 | 3 | ## Overview 4 | PascalVOC (Visual Object Classes) is a widely used format for object detection tasks, introduced in the seminal paper ["The PASCAL Visual Object Classes (VOC) Challenge"](http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham10.pdf) by Everingham et al. It stores annotations in XML files, with one XML file per image containing bounding box coordinates and class labels. The complete format specification is available in the [PascalVOC development kit](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#devkit). 5 | 6 | ## Specification 7 | Each XML annotation file contains: 8 | - Image metadata (filename, size, etc.) 9 | - List of objects, each with: 10 | - Class name (string, allows spaces, e.g., "traffic light" or "stop sign") 11 | - Bounding box coordinates as integer pixel values: 12 | - xmin: left-most pixel coordinate 13 | - ymin: top-most pixel coordinate 14 | - xmax: right-most pixel coordinate 15 | - ymax: bottom-most pixel coordinate 16 | - Optional attributes (difficult, truncated, occluded) 17 | 18 | ## Directory Structure 19 | ``` 20 | dataset/ 21 | ├── images/ 22 | │ ├── image1.jpg 23 | │ └── image2.jpg 24 | └── annotations/ 25 | ├── image1.xml 26 | └── image2.xml 27 | ``` 28 | 29 | ## Example Annotation 30 | ```xml 31 | 32 | images 33 | image1.jpg 34 | 35 | 640 36 | 480 37 | 3 38 | 39 | 40 | cat 41 | Unspecified 42 | 0 43 | 0 44 | 45 | 100 46 | 200 47 | 300 48 | 400 49 | 50 | 51 | 52 | ``` 53 | 54 | ## Format Details 55 | - Coordinates are in absolute pixel values (not normalized) 56 | - Bounding boxes use XYXY format (xmin, ymin, xmax, ymax) 57 | - Each object can have optional attributes: 58 | - `difficult`: Indicates hard to recognize objects 59 | - `truncated`: Indicates objects partially outside the image 60 | - `occluded`: Indicates partially obscured objects 61 | 62 | ## Converting with Labelformat 63 | 64 | ### COCO to PascalVOC 65 | ```bash 66 | labelformat convert \ 67 | --task object-detection \ 68 | --input-format coco \ 69 | --input-file coco-labels/annotations.json \ 70 | --output-format pascalvoc \ 71 | --output-folder pascalvoc-labels 72 | ``` 73 | 74 | ### PascalVOC to COCO 75 | ```bash 76 | labelformat convert \ 77 | --task object-detection \ 78 | --input-format pascalvoc \ 79 | --input-folder pascalvoc-labels \ 80 | --category-names cat,dog,fish \ 81 | --output-format coco \ 82 | --output-file coco-labels/annotations.json 83 | ``` 84 | 85 | ### Required Arguments 86 | - For input: 87 | - `--input-folder`: Directory containing PascalVOC XML files 88 | - `--category-names`: Comma-separated list of category names (e.g., 'dog,cat') 89 | - For output: 90 | - `--output-folder`: Directory to save generated XML files 91 | 92 | ## References 93 | - [Original PascalVOC Dataset](http://host.robots.ox.ac.uk/pascal/VOC/) 94 | - [Format Documentation](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf) -------------------------------------------------------------------------------- /docs/formats/object-detection/labelbox.md: -------------------------------------------------------------------------------- 1 | # Labelbox Object Detection Format 2 | 3 | ## Overview 4 | Labelbox uses NDJSON (Newline Delimited JSON) format for label exports, where each line represents a single image and its annotations. The format supports object detection through bounding boxes. 5 | While Labelformat currently supports Labelbox as an input-only format, you can find the complete format specification in the [Labelbox documentation](https://docs.labelbox.com/reference/label-export). 6 | 7 | ## Specification of Labelbox Detection Format 8 | ``` 9 | dataset/ 10 | └── export-result.ndjson 11 | ``` 12 | 13 | Each line in the NDJSON file contains a complete JSON object with three main sections: 14 | 15 | - `data_row`: Contains image metadata (id, filename, external references) 16 | - `media_attributes`: Image dimensions 17 | - `projects`: Contains the actual annotations 18 | 19 | ## Label Format 20 | Each annotation line follows this structure: 21 | ```json 22 | { 23 | "data_row": { 24 | "id": "data_row_id", 25 | "global_key": "image1.jpg", 26 | "external_id": "image1.jpg" 27 | }, 28 | "media_attributes": { 29 | "width": 640, 30 | "height": 480 31 | }, 32 | "projects": { 33 | "project_id": { 34 | "labels": [{ 35 | "annotations": { 36 | "objects": [{ 37 | "name": "cat", 38 | "annotation_kind": "ImageBoundingBox", 39 | "bounding_box": { 40 | "top": 100, 41 | "left": 200, 42 | "width": 50, 43 | "height": 30 44 | } 45 | }] 46 | } 47 | }] 48 | } 49 | } 50 | } 51 | ``` 52 | 53 | ## Converting from Labelbox Format 54 | Labelbox format can be converted to other formats using labelformat. Here's an example converting to YOLOv8: 55 | 56 | ```bash 57 | labelformat convert \ 58 | --task object-detection \ 59 | --input-format labelbox \ 60 | --input-file labelbox-labels/export-result.ndjson \ 61 | --category-names cat,dog,fish \ 62 | --output-format yolov8 \ 63 | --output-file yolo-labels/data.yaml \ 64 | --output-split train 65 | ``` 66 | 67 | ### Important Parameters 68 | - `--category-names`: Required list of category names (comma-separated) 69 | - `--filename-key`: Which key to use as filename (options: global_key, external_id, id; default: global_key) 70 | 71 | ## Format Details 72 | 73 | ### Bounding Box Format 74 | - Uses absolute pixel coordinates 75 | - Format: `{top, left, width, height}` 76 | - Origin: Top-left corner of the image 77 | 78 | ### Limitations 79 | - Currently supports single project exports only 80 | - Video annotations are not supported 81 | - Only `ImageBoundingBox` annotation types are processed 82 | 83 | ## Example 84 | ```json 85 | {"data_row":{"id":"ckz...","global_key":"image1.jpg","external_id":"img_1"},"media_attributes":{"width":640,"height":480},"projects":{"proj_123":{"labels":[{"annotations":{"objects":[{"name":"cat","annotation_kind":"ImageBoundingBox","bounding_box":{"top":100,"left":200,"width":50,"height":30}}]}}]}}} 86 | {"data_row":{"id":"ckz...","global_key":"image2.jpg","external_id":"img_2"},"media_attributes":{"width":640,"height":480},"projects":{"proj_123":{"labels":[{"annotations":{"objects":[{"name":"dog","annotation_kind":"ImageBoundingBox","bounding_box":{"top":150,"left":300,"width":60,"height":40}}]}}]}}} 87 | ``` 88 | 89 | Note: This format is supported for input only in labelformat. -------------------------------------------------------------------------------- /tests/integration/object_detection/test_object_detection.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | import pytest 5 | 6 | from labelformat.formats.coco import COCOObjectDetectionInput, COCOObjectDetectionOutput 7 | from labelformat.formats.kitti import ( 8 | KittiObjectDetectionInput, 9 | KittiObjectDetectionOutput, 10 | ) 11 | from labelformat.formats.lightly import ( 12 | LightlyObjectDetectionInput, 13 | LightlyObjectDetectionOutput, 14 | ) 15 | 16 | from ..integration_utils import COMMA_JOINED_CATEGORY_NAMES, OBJ_DETECTION_FIXTURES_DIR 17 | 18 | 19 | def test_coco_to_coco(tmp_path: Path) -> None: 20 | coco_file = OBJ_DETECTION_FIXTURES_DIR / "COCO/train.json" 21 | label_input = COCOObjectDetectionInput(input_file=coco_file) 22 | COCOObjectDetectionOutput(output_file=tmp_path / "train.json").save( 23 | label_input=label_input 24 | ) 25 | 26 | # Compare jsons. 27 | output_json = json.loads((tmp_path / "train.json").read_text()) 28 | expected_json = json.loads( 29 | (OBJ_DETECTION_FIXTURES_DIR / "COCO/train.json").read_text() 30 | ) 31 | # Some fields are not converted: 32 | # - info 33 | # - licenses 34 | # - .supercategory 35 | # - .date_captured 36 | # - .id 37 | # - .area 38 | # - .iscrowd 39 | del expected_json["info"] 40 | del expected_json["licenses"] 41 | for category in expected_json["categories"]: 42 | del category["supercategory"] 43 | for image in expected_json["images"]: 44 | del image["date_captured"] 45 | for annotation in expected_json["annotations"]: 46 | del annotation["id"] 47 | del annotation["area"] 48 | del annotation["iscrowd"] 49 | assert output_json == expected_json 50 | 51 | 52 | def test_kitti_to_kitti(tmp_path: Path) -> None: 53 | input_folder = OBJ_DETECTION_FIXTURES_DIR / "KITTI/labels" 54 | label_input = KittiObjectDetectionInput( 55 | input_folder=input_folder, 56 | category_names=COMMA_JOINED_CATEGORY_NAMES, 57 | images_rel_path="../images/a-difficult subfolder", 58 | ) 59 | output_folder = tmp_path / "labels" 60 | KittiObjectDetectionOutput(output_folder=output_folder).save( 61 | label_input=label_input 62 | ) 63 | 64 | # Compare kitti files. 65 | for file1 in input_folder.rglob("*"): 66 | if file1.is_dir(): 67 | continue 68 | file2 = output_folder / file1.relative_to(input_folder) 69 | 70 | contents1 = file1.read_text() 71 | contents2 = file2.read_text() 72 | assert contents1 == contents2 73 | 74 | 75 | def test_lightly_to_lightly(tmp_path: Path) -> None: 76 | input_folder = OBJ_DETECTION_FIXTURES_DIR / "lightly/detection-task-name" 77 | label_input = LightlyObjectDetectionInput( 78 | input_folder=input_folder, 79 | images_rel_path="../images", 80 | ) 81 | output_folder = tmp_path / "detection-task-name" 82 | LightlyObjectDetectionOutput(output_folder=output_folder).save( 83 | label_input=label_input 84 | ) 85 | 86 | # Compare Json files. 87 | for file1 in input_folder.rglob("*.json"): 88 | if file1.is_dir(): 89 | continue 90 | file2 = output_folder / file1.relative_to(input_folder) 91 | 92 | contents1 = json.loads(file1.read_text()) 93 | contents2 = json.loads(file2.read_text()) 94 | assert contents1 == contents2 95 | -------------------------------------------------------------------------------- /docs/formats/object-detection/coco.md: -------------------------------------------------------------------------------- 1 | # COCO Object Detection Format 2 | 3 | ## Overview 4 | COCO (Common Objects in Context) is a large-scale object detection dataset format developed by Microsoft. The format has become one of the most widely adopted standards for object detection tasks. You can find the complete format specification in the [official COCO documentation](https://cocodataset.org/#format-data). 5 | 6 | ## Specification of COCO Detection Format 7 | 8 | COCO uses a single JSON file containing all annotations. The format consists of three main components: 9 | 10 | - **Images:** Defines metadata for each image in the dataset. 11 | - **Categories:** Defines the object classes. 12 | - **Annotations:** Defines object instances. 13 | 14 | ### Images 15 | Defines metadata for each image in the dataset: 16 | ```json 17 | { 18 | "id": 0, // Unique image ID 19 | "file_name": "image1.jpg", // Image filename 20 | "width": 640, // Image width in pixels 21 | "height": 416 // Image height in pixels 22 | } 23 | ``` 24 | 25 | ### Categories 26 | Defines the object classes: 27 | ```json 28 | { 29 | "id": 0, // Unique category ID 30 | "name": "cat" // Category name 31 | } 32 | ``` 33 | 34 | ### Annotations 35 | Defines object instances: 36 | ```json 37 | { 38 | "image_id": 0, // Reference to image 39 | "category_id": 2, // Reference to category 40 | "bbox": [540.0, 295.0, 23.0, 18.0] // [x, y, width, height] in absolute pixels 41 | } 42 | ``` 43 | 44 | ## Directory Structure of COCO Dataset 45 | ``` 46 | dataset/ 47 | ├── images/ # Image files 48 | │ ├── image1.jpg 49 | │ └── image2.jpg 50 | └── annotations.json # Single JSON file containing all annotations 51 | ``` 52 | 53 | ## Converting with Labelformat 54 | 55 | ### Command Line Interface 56 | Convert COCO format to YOLOv8: 57 | ```bash 58 | labelformat convert \ 59 | --task object-detection \ 60 | --input-format coco \ 61 | --input-file coco-labels/annotations.json \ 62 | --output-format yolov8 \ 63 | --output-file yolo-labels/data.yaml \ 64 | --output-split train 65 | ``` 66 | 67 | Convert YOLOv8 format to COCO: 68 | ```bash 69 | labelformat convert \ 70 | --task object-detection \ 71 | --input-format yolov8 \ 72 | --input-file yolo-labels/data.yaml \ 73 | --input-split train \ 74 | --output-format coco \ 75 | --output-file coco-labels/annotations.json 76 | ``` 77 | 78 | ### Python API 79 | ```python 80 | from pathlib import Path 81 | from labelformat.formats import COCOObjectDetectionInput, YOLOv8ObjectDetectionOutput 82 | 83 | # Load COCO format 84 | label_input = COCOObjectDetectionInput( 85 | input_file=Path("coco-labels/annotations.json") 86 | ) 87 | 88 | # Convert to YOLOv8 format 89 | YOLOv8ObjectDetectionOutput( 90 | output_file=Path("yolo-labels/data.yaml"), 91 | output_split="train", 92 | ).save(label_input=label_input) 93 | ``` 94 | 95 | ## Example 96 | Complete annotations.json example: 97 | ```json 98 | { 99 | "images": [ 100 | { 101 | "id": 0, 102 | "file_name": "image1.jpg", 103 | "width": 640, 104 | "height": 416 105 | } 106 | ], 107 | "categories": [ 108 | { 109 | "id": 0, 110 | "name": "cat" 111 | } 112 | ], 113 | "annotations": [ 114 | { 115 | "image_id": 0, 116 | "category_id": 0, 117 | "bbox": [540.0, 295.0, 23.0, 18.0] 118 | } 119 | ] 120 | } 121 | ``` -------------------------------------------------------------------------------- /tests/integration/integration_utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List, Optional 3 | 4 | import pytest 5 | 6 | from labelformat.model.multipolygon import MultiPolygon, Point 7 | 8 | INST_SEGMENTATION_FIXTURES_DIR = ( 9 | Path(__file__).parent.parent / "fixtures/instance_segmentation" 10 | ) 11 | OBJ_DETECTION_FIXTURES_DIR = Path(__file__).parent.parent / "fixtures/object_detection" 12 | 13 | COMMA_JOINED_CATEGORY_NAMES = ",".join( 14 | [ 15 | "person", 16 | "bicycle with space", 17 | "car", 18 | "motorcycle", 19 | "airplane", 20 | "bus", 21 | "train", 22 | "truck", 23 | "boat", 24 | "traffic light", 25 | "fire hydrant", 26 | "stop sign", 27 | "parking meter", 28 | "bench", 29 | "bird", 30 | "cat", 31 | "dog", 32 | "horse", 33 | "sheep", 34 | "cow", 35 | "elephant", 36 | "bear", 37 | "zebra", 38 | "giraffe", 39 | "backpack", 40 | "umbrella", 41 | "handbag", 42 | "tie", 43 | "suitcase", 44 | "frisbee", 45 | "skis", 46 | "snowboard", 47 | ] 48 | ) 49 | 50 | 51 | def assert_almost_equal_recursive( 52 | obj1: object, 53 | obj2: object, 54 | rel: Optional[float] = None, 55 | abs: Optional[float] = None, 56 | nan_ok: bool = False, 57 | ) -> None: 58 | if isinstance(obj1, dict): 59 | assert isinstance(obj2, dict) 60 | assert sorted(obj1.keys()) == sorted(obj2.keys()) 61 | for key in obj1.keys(): 62 | assert_almost_equal_recursive( 63 | obj1[key], obj2[key], rel=rel, abs=abs, nan_ok=nan_ok 64 | ) 65 | elif isinstance(obj1, list): 66 | assert isinstance(obj2, list) 67 | assert len(obj1) == len(obj2) 68 | for item1, item2 in zip(obj1, obj2): 69 | assert_almost_equal_recursive(item1, item2, rel=rel, abs=abs, nan_ok=nan_ok) 70 | elif isinstance(obj1, float) or isinstance(obj1, int): 71 | assert isinstance(obj2, float) or isinstance(obj2, int) 72 | assert pytest.approx(float(obj1), rel=1e-1) == float(obj2) 73 | else: 74 | assert obj1 == obj2 75 | 76 | 77 | def assert_multipolygons_almost_equal(a: MultiPolygon, b: MultiPolygon) -> None: 78 | """ 79 | Heuristic test that two MultiPolygons cover the same area. 80 | 81 | Ideally we would compute the intersection and union of the two MultiPolygon, 82 | which is non-trivial without a helper library. Instead we just check that 83 | * The set of points is almost equal 84 | * Their areas are almost equal 85 | """ 86 | precision = 3 87 | points_a = { 88 | (round(p[0], ndigits=precision), round(p[1], ndigits=precision)) 89 | for polygon in a.polygons 90 | for p in polygon 91 | } 92 | points_b = { 93 | (round(p[0], ndigits=precision), round(p[1], ndigits=precision)) 94 | for polygon in b.polygons 95 | for p in polygon 96 | } 97 | assert points_a == points_b, "multipolygons consist of a different set of points" 98 | 99 | area_a = sum(_polygon_area(polygon) for polygon in a.polygons) 100 | area_b = sum(_polygon_area(polygon) for polygon in b.polygons) 101 | assert abs(area_a - area_b) < (10 ** (-precision)), "multipolygon areas differ" 102 | 103 | 104 | def _polygon_area(polygon: List[Point]) -> float: 105 | """Compute the area of a polygon.""" 106 | n = len(polygon) 107 | area = 0.0 108 | for i in range(n): 109 | j = (i + 1) % n 110 | area += polygon[i][0] * polygon[j][1] 111 | area -= polygon[j][0] * polygon[i][1] 112 | area = abs(area) / 2.0 113 | return area 114 | -------------------------------------------------------------------------------- /tests/unit/formats/test_kitti.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from pathlib import Path 3 | from typing import Iterable 4 | 5 | import pytest 6 | from pytest_mock import MockerFixture 7 | 8 | from labelformat.formats.kitti import ( 9 | KittiObjectDetectionInput, 10 | KittiObjectDetectionOutput, 11 | ) 12 | from labelformat.model.bounding_box import BoundingBox 13 | from labelformat.model.category import Category 14 | from labelformat.model.image import Image 15 | from labelformat.model.object_detection import ( 16 | ImageObjectDetection, 17 | ObjectDetectionInput, 18 | SingleObjectDetection, 19 | ) 20 | 21 | from ... import simple_object_detection_label_input 22 | 23 | 24 | class TestKittiObjectDetectionInput: 25 | def test_get_labels(self, tmp_path: Path, mocker: MockerFixture) -> None: 26 | # Prepare inputs. 27 | annotation = ( 28 | "dog -1 -1 -10 10.0 20.0 30.0 40.0 -1 -1 -1 -1000 -1000 -1000 -10\n" 29 | "cat -1 -1 -10 50.0 60.0 70.0 80.0 -1 -1 -1 -1000 -1000 -1000 -10\n" 30 | ) 31 | label_path = tmp_path / "labels" / "image.txt" 32 | label_path.parent.mkdir(parents=True, exist_ok=True) 33 | label_path.write_text(annotation) 34 | 35 | # Mock the image file. 36 | (tmp_path / "images").mkdir() 37 | (tmp_path / "images/image.jpg").touch() 38 | mock_img = mocker.MagicMock() 39 | mock_img.size = (100, 200) 40 | mock_context_manager = mocker.MagicMock() 41 | mock_context_manager.__enter__.return_value = mock_img 42 | mocker.patch("PIL.Image.open", return_value=mock_context_manager) 43 | 44 | # Convert. 45 | label_input = KittiObjectDetectionInput( 46 | input_folder=tmp_path / "labels", category_names="cat,dog,cow" 47 | ) 48 | labels = list(label_input.get_labels()) 49 | assert labels == [ 50 | ImageObjectDetection( 51 | image=Image(id=0, filename="image.jpg", width=100, height=200), 52 | objects=[ 53 | SingleObjectDetection( 54 | category=Category(id=1, name="dog"), 55 | box=BoundingBox( 56 | xmin=10.0, 57 | ymin=20.0, 58 | xmax=30.0, 59 | ymax=40.0, 60 | ), 61 | ), 62 | SingleObjectDetection( 63 | category=Category(id=0, name="cat"), 64 | box=BoundingBox( 65 | xmin=50.0, 66 | ymin=60.0, 67 | xmax=70.0, 68 | ymax=80.0, 69 | ), 70 | ), 71 | ], 72 | ) 73 | ] 74 | 75 | 76 | class TestKittiObjectDetectionOutput: 77 | @pytest.mark.parametrize("with_confidence", [True, False]) 78 | def test_save(self, tmp_path: Path, with_confidence: bool) -> None: 79 | output_folder = tmp_path / "labels" 80 | KittiObjectDetectionOutput(output_folder=output_folder).save( 81 | label_input=simple_object_detection_label_input.get_input( 82 | with_confidence=with_confidence 83 | ) 84 | ) 85 | assert output_folder.exists() 86 | assert output_folder.is_dir() 87 | 88 | filepaths = list(output_folder.glob("**/*")) 89 | assert len(filepaths) == 1 90 | path = filepaths[0] 91 | assert path == tmp_path / "labels" / "image.txt" 92 | 93 | contents = path.read_text() 94 | expected = ( 95 | "dog -1 -1 -10 10.0 20.0 30.0 40.0 -1 -1 -1 -1000 -1000 -1000 -10\n" 96 | "cat -1 -1 -10 50.0 60.0 70.0 80.0 -1 -1 -1 -1000 -1000 -1000 -10\n" 97 | ) 98 | assert contents == expected 99 | -------------------------------------------------------------------------------- /src/labelformat/formats/__init__.py: -------------------------------------------------------------------------------- 1 | from labelformat.formats.coco import ( 2 | COCOInstanceSegmentationInput, 3 | COCOInstanceSegmentationOutput, 4 | COCOObjectDetectionInput, 5 | COCOObjectDetectionOutput, 6 | ) 7 | from labelformat.formats.cvat import CVATObjectDetectionInput, CVATObjectDetectionOutput 8 | from labelformat.formats.kitti import ( 9 | KittiObjectDetectionInput, 10 | KittiObjectDetectionOutput, 11 | ) 12 | from labelformat.formats.labelbox import LabelboxObjectDetectionInput 13 | from labelformat.formats.lightly import ( 14 | LightlyObjectDetectionInput, 15 | LightlyObjectDetectionOutput, 16 | ) 17 | from labelformat.formats.maskpair import MaskPairInstanceSegmentationInput 18 | from labelformat.formats.pascalvoc import ( 19 | PascalVOCObjectDetectionInput, 20 | PascalVOCObjectDetectionOutput, 21 | ) 22 | from labelformat.formats.rtdetr import ( 23 | RTDETRObjectDetectionInput, 24 | RTDETRObjectDetectionOutput, 25 | ) 26 | from labelformat.formats.rtdetrv2 import ( 27 | RTDETRv2ObjectDetectionInput, 28 | RTDETRv2ObjectDetectionOutput, 29 | ) 30 | from labelformat.formats.yolov5 import ( 31 | YOLOv5ObjectDetectionInput, 32 | YOLOv5ObjectDetectionOutput, 33 | ) 34 | from labelformat.formats.yolov6 import ( 35 | YOLOv6ObjectDetectionInput, 36 | YOLOv6ObjectDetectionOutput, 37 | ) 38 | from labelformat.formats.yolov7 import ( 39 | YOLOv7ObjectDetectionInput, 40 | YOLOv7ObjectDetectionOutput, 41 | ) 42 | from labelformat.formats.yolov8 import ( 43 | YOLOv8InstanceSegmentationInput, 44 | YOLOv8InstanceSegmentationOutput, 45 | YOLOv8ObjectDetectionInput, 46 | YOLOv8ObjectDetectionOutput, 47 | ) 48 | from labelformat.formats.yolov9 import ( 49 | YOLOv9ObjectDetectionInput, 50 | YOLOv9ObjectDetectionOutput, 51 | ) 52 | from labelformat.formats.yolov10 import ( 53 | YOLOv10ObjectDetectionInput, 54 | YOLOv10ObjectDetectionOutput, 55 | ) 56 | from labelformat.formats.yolov11 import ( 57 | YOLOv11ObjectDetectionInput, 58 | YOLOv11ObjectDetectionOutput, 59 | ) 60 | from labelformat.formats.yolov12 import ( 61 | YOLOv12ObjectDetectionInput, 62 | YOLOv12ObjectDetectionOutput, 63 | ) 64 | from labelformat.formats.yolov26 import ( 65 | YOLOv26ObjectDetectionInput, 66 | YOLOv26ObjectDetectionOutput, 67 | ) 68 | 69 | __all__ = [ 70 | "COCOInstanceSegmentationInput", 71 | "COCOInstanceSegmentationOutput", 72 | "COCOObjectDetectionInput", 73 | "COCOObjectDetectionOutput", 74 | "CVATObjectDetectionInput", 75 | "CVATObjectDetectionOutput", 76 | "KittiObjectDetectionInput", 77 | "KittiObjectDetectionOutput", 78 | "LabelboxObjectDetectionInput", 79 | "LightlyObjectDetectionInput", 80 | "LightlyObjectDetectionOutput", 81 | "PascalVOCObjectDetectionInput", 82 | "PascalVOCObjectDetectionOutput", 83 | "RTDETRObjectDetectionInput", 84 | "RTDETRObjectDetectionOutput", 85 | "RTDETRv2ObjectDetectionInput", 86 | "RTDETRv2ObjectDetectionOutput", 87 | "YOLOv5ObjectDetectionInput", 88 | "YOLOv5ObjectDetectionOutput", 89 | "YOLOv6ObjectDetectionInput", 90 | "YOLOv6ObjectDetectionOutput", 91 | "YOLOv7ObjectDetectionInput", 92 | "YOLOv7ObjectDetectionOutput", 93 | "YOLOv8InstanceSegmentationInput", 94 | "YOLOv8InstanceSegmentationOutput", 95 | "YOLOv8ObjectDetectionInput", 96 | "YOLOv8ObjectDetectionOutput", 97 | "YOLOv9ObjectDetectionInput", 98 | "YOLOv9ObjectDetectionOutput", 99 | "YOLOv10ObjectDetectionInput", 100 | "YOLOv10ObjectDetectionOutput", 101 | "YOLOv11ObjectDetectionInput", 102 | "YOLOv11ObjectDetectionOutput", 103 | "YOLOv12ObjectDetectionInput", 104 | "YOLOv12ObjectDetectionOutput", 105 | "YOLOv26ObjectDetectionInput", 106 | "YOLOv26ObjectDetectionOutput", 107 | "MaskPairInstanceSegmentationInput", 108 | ] 109 | -------------------------------------------------------------------------------- /src/labelformat/model/binary_mask_segmentation.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass 4 | 5 | import numpy as np 6 | from numpy.typing import NDArray 7 | 8 | from labelformat.model.bounding_box import BoundingBox 9 | 10 | 11 | @dataclass(frozen=True) 12 | class BinaryMaskSegmentation: 13 | """ 14 | A binary mask. 15 | Internally, the mask is represented as a run-length encoding (RLE) format. 16 | """ 17 | 18 | _rle_row_wise: list[int] 19 | width: int 20 | height: int 21 | bounding_box: BoundingBox 22 | 23 | @classmethod 24 | def from_binary_mask( 25 | cls, binary_mask: NDArray[np.int_], bounding_box: BoundingBox 26 | ) -> "BinaryMaskSegmentation": 27 | """ 28 | Create a BinaryMaskSegmentation instance from a binary mask (2D numpy array) 29 | by converting it to RLE format. 30 | """ 31 | if not isinstance(binary_mask, np.ndarray): 32 | raise ValueError("Binary mask must be a numpy array.") 33 | if binary_mask.ndim != 2: 34 | raise ValueError("Binary mask must be a 2D array.") 35 | height, width = binary_mask.shape 36 | 37 | rle_row_wise = RLEDecoderEncoder.encode_row_wise_rle(binary_mask) 38 | return cls( 39 | _rle_row_wise=rle_row_wise, 40 | width=width, 41 | height=height, 42 | bounding_box=bounding_box, 43 | ) 44 | 45 | def get_binary_mask(self) -> NDArray[np.int_]: 46 | """ 47 | Get the binary mask (2D numpy array) from the RLE format. 48 | """ 49 | return RLEDecoderEncoder.decode_row_wise_rle( 50 | self._rle_row_wise, self.height, self.width 51 | ) 52 | 53 | 54 | class RLEDecoderEncoder: 55 | """ 56 | A class for encoding and decoding binary masks using run-length encoding (RLE). 57 | This class provides methods to encode a binary mask into RLE format and 58 | decode an RLE list back into a binary mask. 59 | 60 | The encoding and decoding can be done both row-wise and column-wise. 61 | 62 | Example: 63 | Consider a binary mask of shape 2x4: 64 | [[0, 1, 1, 0], 65 | [1, 1, 1, 1]] 66 | Row-wise RLE: [1, 2, 1, 4] 67 | Column-wise RLE: [1, 5, 1, 1] 68 | """ 69 | 70 | @staticmethod 71 | def encode_row_wise_rle(binary_mask: NDArray[np.int_]) -> list[int]: 72 | # Encodes a binary mask using row-major order. 73 | flat = np.concatenate(([-1], binary_mask.ravel(order="C"), [-1])) 74 | borders = np.nonzero(np.diff(flat))[0] 75 | rle = np.diff(borders) 76 | if flat[1]: 77 | rle = np.concatenate(([0], rle)) 78 | rle_list: list[int] = rle.tolist() 79 | return rle_list 80 | 81 | @staticmethod 82 | def encode_column_wise_rle(binary_mask: NDArray[np.int_]) -> list[int]: 83 | # Encodes a binary mask using column-major order. 84 | flat = np.concatenate(([-1], binary_mask.ravel(order="F"), [-1])) 85 | borders = np.nonzero(np.diff(flat))[0] 86 | rle = np.diff(borders) 87 | if flat[1]: 88 | rle = np.concatenate(([0], rle)) 89 | rle_list: list[int] = rle.tolist() 90 | return rle_list 91 | 92 | @staticmethod 93 | def decode_row_wise_rle( 94 | rle: list[int], height: int, width: int 95 | ) -> NDArray[np.int_]: 96 | # Decodes a row-major run-length encoded list into a 2D binary mask. 97 | run_val = 0 98 | decoded = [] 99 | for count in rle: 100 | decoded.extend([run_val] * count) 101 | run_val = 1 - run_val 102 | return np.array(decoded, dtype=np.int_).reshape((height, width), order="C") 103 | 104 | @staticmethod 105 | def decode_column_wise_rle( 106 | rle: list[int], height: int, width: int 107 | ) -> NDArray[np.int_]: 108 | # Decodes a column-major run-length encoded list into a 2D binary mask. 109 | run_val = 0 110 | decoded = [] 111 | for count in rle: 112 | decoded.extend([run_val] * count) 113 | run_val = 1 - run_val 114 | return np.array(decoded, dtype=np.int_).reshape((height, width), order="F") 115 | -------------------------------------------------------------------------------- /docs/formats/object-detection/labelformat.md: -------------------------------------------------------------------------------- 1 | # Labelformat Object Detection Format 2 | 3 | ## Overview 4 | The Labelformat format provides an in-memory representation for object detection detections. It's designed for programmatic creation and manipulation of label data within Python scripts. This format is particularly useful when you need to convert detections generated by custom models or dataloaders into a standardized structure before potentially converting them to other file-based formats supported by `labelformat`. 5 | 6 | ## Specification of Labelformat Detection Format 7 | The format is defined by the `LabelformatObjectDetectionInput` dataclass in `labelformat.formats.labelformat`. It holds the following information directly in Python objects: 8 | 9 | - `categories`: A list of `Category` objects, each containing `id` and `name`. 10 | - `images`: A list of `Image` objects, each containing `id`, `filename`, `width`, and `height`. 11 | - `detections`: A list of `ImageObjectDetection` objects. Each object links an `Image` to a list of `SingleObjectDetection` instances. 12 | - `SingleObjectDetection` contains: 13 | - `category`: The `Category` object for the detection. 14 | - `box`: A `BoundingBox` object representing the location (can be created from various formats like XYXY, XYWH, CXCYWH). 15 | - `confidence`: An optional float score (0-1). 16 | 17 | **Note:** This format is primarily intended for programmatic use and does not support direct loading from files via the CLI using `add_cli_arguments`. It serves as a flexible in-memory structure. 18 | 19 | ## Example Usage (Programmatic Creation) 20 | ```python 21 | from labelformat.formats.labelformat import LabelformatObjectDetectionInput 22 | from labelformat.model.bounding_box import BoundingBox, BoundingBoxFormat 23 | from labelformat.model.category import Category 24 | from labelformat.model.image import Image 25 | from labelformat.model.object_detection import ( 26 | ImageObjectDetection, 27 | SingleObjectDetection, 28 | ) 29 | 30 | # Assume you have: 31 | # my_dataloader: An iterable yielding (PILImage, filename) 32 | # my_model: A model with a .predict() method and .get_categories() 33 | # prediction_bbox_format: The format of your model's output boxes ("xyxy", "xywh", etc.) 34 | 35 | categories = [ 36 | Category(id=i, name=category_name) 37 | for i, category_name in enumerate(my_model.get_categories()) 38 | ] 39 | category_map = {cat.name: cat for cat in categories} # Or map by index if model outputs index 40 | 41 | images = [] 42 | detections = [] 43 | image_id_counter = 0 44 | 45 | for pil_image, filename in my_dataloader: 46 | # Create Image object 47 | current_image = Image( 48 | id=image_id_counter, 49 | filename=filename, 50 | width=pil_image.width, 51 | height=pil_image.height 52 | ) 53 | images.append(current_image) 54 | 55 | # Get model predictions 56 | # Assuming predictions is a list of dicts like: 57 | # [{'box': [x, y, w, h], 'category_id': 0, 'confidence': 0.9}] 58 | model_predictions = my_model.predict(pil_image) 59 | 60 | # Create SingleObjectDetection objects 61 | objects = [] 62 | for pred in model_predictions: 63 | objects.append( 64 | SingleObjectDetection( 65 | # Ensure you correctly map prediction category to Category object 66 | category=categories[pred['category_id']], 67 | box=BoundingBox.from_format( 68 | box=pred['box'], 69 | format=BoundingBoxFormat(prediction_bbox_format), # Use BoundingBoxFormat enum 70 | ), 71 | confidence=pred.get('confidence'), # Use .get for optional confidence 72 | ) 73 | ) 74 | 75 | # Create ImageObjectDetection object 76 | detections.append( 77 | ImageObjectDetection( 78 | image=current_image, 79 | objects=objects, 80 | ) 81 | ) 82 | image_id_counter += 1 83 | 84 | # Create the final input object 85 | labelformat_input = LabelformatObjectDetectionInput( 86 | categories=categories, 87 | images=images, 88 | labels=detections, 89 | ) 90 | 91 | # Now labelformat_input can be used, e.g., to convert to another format: 92 | # from labelformat.formats.lightly import LightlyObjectDetectionOutput 93 | # output_converter = LightlyObjectDetectionOutput(output_folder="path/to/lightly_output") 94 | # output_converter.save(labelformat_input) 95 | 96 | ``` 97 | -------------------------------------------------------------------------------- /tests/integration/object_detection/test_to_yolov8.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | import yaml 5 | 6 | from labelformat.formats.coco import COCOObjectDetectionInput 7 | from labelformat.formats.kitti import KittiObjectDetectionInput 8 | from labelformat.formats.labelbox import LabelboxObjectDetectionInput 9 | from labelformat.formats.lightly import LightlyObjectDetectionInput 10 | from labelformat.formats.pascalvoc import PascalVOCObjectDetectionInput 11 | from labelformat.formats.yolov8 import ( 12 | YOLOv8ObjectDetectionInput, 13 | YOLOv8ObjectDetectionOutput, 14 | ) 15 | from labelformat.model.object_detection import ObjectDetectionInput 16 | 17 | from ..integration_utils import COMMA_JOINED_CATEGORY_NAMES, OBJ_DETECTION_FIXTURES_DIR 18 | 19 | 20 | def test_yolov8_to_yolov8(tmp_path: Path) -> None: 21 | label_input = YOLOv8ObjectDetectionInput( 22 | input_file=OBJ_DETECTION_FIXTURES_DIR / "YOLOv8/example.yaml", 23 | input_split="train", 24 | ) 25 | _convert_and_test(label_input=label_input, tmp_path=tmp_path) 26 | 27 | 28 | def test_coco_to_yolov8(tmp_path: Path) -> None: 29 | label_input = COCOObjectDetectionInput( 30 | input_file=OBJ_DETECTION_FIXTURES_DIR / "COCO/train.json" 31 | ) 32 | _convert_and_test(label_input=label_input, tmp_path=tmp_path) 33 | 34 | 35 | def test_pascalvoc_to_yolov8(tmp_path: Path) -> None: 36 | label_input = PascalVOCObjectDetectionInput( 37 | input_folder=OBJ_DETECTION_FIXTURES_DIR / "PascalVOC", 38 | category_names=COMMA_JOINED_CATEGORY_NAMES, 39 | ) 40 | _convert_and_test(label_input=label_input, tmp_path=tmp_path) 41 | 42 | 43 | def test_kitty_to_yolov8(tmp_path: Path) -> None: 44 | label_input = KittiObjectDetectionInput( 45 | input_folder=OBJ_DETECTION_FIXTURES_DIR / "KITTI/labels", 46 | category_names=COMMA_JOINED_CATEGORY_NAMES, 47 | images_rel_path="../images/a-difficult subfolder", 48 | ) 49 | _convert_and_test(label_input=label_input, tmp_path=tmp_path) 50 | 51 | 52 | def test_lightly_to_yolov8(tmp_path: Path) -> None: 53 | label_input = LightlyObjectDetectionInput( 54 | input_folder=OBJ_DETECTION_FIXTURES_DIR / "lightly/detection-task-name", 55 | images_rel_path="../images", 56 | ) 57 | _convert_and_test(label_input=label_input, tmp_path=tmp_path) 58 | 59 | 60 | def test_labelbox_to_yolov8(tmp_path: Path) -> None: 61 | label_input = LabelboxObjectDetectionInput( 62 | input_file=OBJ_DETECTION_FIXTURES_DIR / "Labelbox/export-result.ndjson", 63 | category_names=COMMA_JOINED_CATEGORY_NAMES, 64 | ) 65 | _convert_and_test(label_input=label_input, tmp_path=tmp_path) 66 | 67 | 68 | def _convert_and_test(label_input: ObjectDetectionInput, tmp_path: Path) -> None: 69 | YOLOv8ObjectDetectionOutput( 70 | output_file=tmp_path / "data.yaml", 71 | output_split="train", 72 | ).save(label_input=label_input) 73 | 74 | # Compare yaml files. 75 | output_yaml = yaml.safe_load((tmp_path / "data.yaml").read_text()) 76 | expected_yaml = yaml.safe_load( 77 | (OBJ_DETECTION_FIXTURES_DIR / "YOLOv8/example.yaml").read_text() 78 | ) 79 | # TODO: Add split_subfolder to YOLOv8 output parameters. 80 | del output_yaml["train"] 81 | del expected_yaml["train"] 82 | assert output_yaml == expected_yaml 83 | 84 | # Compare label files. 85 | _assert_yolov8_labels_equal( 86 | dir1=OBJ_DETECTION_FIXTURES_DIR / "YOLOv8/labels/a-difficult subfolder", 87 | dir2=tmp_path / "labels", 88 | ) 89 | 90 | 91 | def _assert_yolov8_labels_equal( 92 | dir1: Path, 93 | dir2: Path, 94 | ) -> None: 95 | assert dir1.is_dir() 96 | assert dir2.is_dir() 97 | for file1 in dir1.rglob("*"): 98 | if file1.is_dir(): 99 | continue 100 | file2 = dir2 / file1.relative_to(dir1) 101 | for line1, line2 in zip(file1.open(), file2.open()): 102 | parts1 = line1.split() 103 | parts2 = line2.split() 104 | assert parts1[0] == parts2[0], "labels do not match" 105 | assert pytest.approx(float(parts1[1]), rel=1e-1) == float(parts2[1]) 106 | assert pytest.approx(float(parts1[2]), rel=1e-1) == float(parts2[2]) 107 | assert pytest.approx(float(parts1[3]), rel=1e-1) == float(parts2[3]) 108 | assert pytest.approx(float(parts1[4]), rel=1e-1) == float(parts2[4]) 109 | -------------------------------------------------------------------------------- /docs/tutorials/converting-coco-to-yolov8.md: -------------------------------------------------------------------------------- 1 | # Converting COCO Labels to YOLOv8 Format 2 | 3 | This tutorial walks you through converting object detection labels from the COCO format to the YOLOv8 format using Labelformat's CLI and Python API. 4 | 5 | ## Prerequisites 6 | 7 | - **Labelformat Installed:** Follow the [Installation Guide](../installation.md). 8 | - **COCO Dataset:** Ensure you have a COCO-formatted dataset ready for conversion. 9 | 10 | ## Step 1: Prepare Your Dataset 11 | 12 | Organize your dataset with the following structure: 13 | 14 | ``` 15 | project/ 16 | ├── coco-labels/ 17 | │ └── train.json 18 | ├── images/ 19 | │ ├── image1.jpg 20 | │ └── image2.jpg 21 | ``` 22 | 23 | Ensure that `train.json` contains the COCO annotations and that all images are located in the `images/` directory. 24 | 25 | ## Step 2: Using the CLI for Conversion 26 | 27 | Open your terminal and navigate to the `project/` directory. 28 | 29 | Run the following command to convert COCO labels to YOLOv8: 30 | 31 | ``` shell 32 | labelformat convert \ 33 | --task object-detection \ 34 | --input-format coco \ 35 | --input-file coco-labels/train.json \ 36 | --output-format yolov8 \ 37 | --output-file yolo-labels/data.yaml \ 38 | --output-split train 39 | ``` 40 | 41 | ### Explanation of the Command: 42 | 43 | - `--task object-detection`: Specifies the task type. 44 | - `--input-format coco`: Defines the input label format. 45 | - `--input-file coco-labels/train.json`: Path to the COCO annotations file. 46 | - `--output-format yolov8`: Desired output format. 47 | - `--output-file yolo-labels/data.yaml`: Path to save the YOLOv8 configuration file. 48 | - `--output-split train`: Data split label. 49 | 50 | ## Step 3: Verify the Conversion 51 | 52 | After running the command, your project structure should include: 53 | 54 | ``` 55 | project/ 56 | ├── yolo-labels/ 57 | │ ├── data.yaml 58 | │ └── labels/ 59 | │ ├── image1.txt 60 | │ └── image2.txt 61 | ``` 62 | 63 | - `data.yaml`: YOLOv8 configuration file containing category names and paths. 64 | - `labels/`: Directory containing YOLOv8-formatted label files. 65 | 66 | **Sample `data.yaml`:** 67 | ``` yaml 68 | names: 69 | 0: cat 70 | 1: dog 71 | 2: fish 72 | nc: 3 73 | path: . 74 | train: images 75 | ``` 76 | 77 | **Sample Label File (`image1.txt`):** 78 | 79 | ``` 80 | 2 0.8617 0.7308 0.0359 0.0433 81 | 0 0.8180 0.6911 0.0328 0.0793 82 | ``` 83 | 84 | - **Format:** ` ` 85 | - **Coordinates:** Normalized between 0 and 1. 86 | 87 | ## Step 4: Using the Python API for Conversion 88 | 89 | If you prefer using Python for more control, follow these steps. 90 | 91 | ### 4.1: Write the Conversion Script 92 | 93 | Create a Python script named `coco_to_yolov8.py` with the following content: 94 | 95 | ``` python 96 | from pathlib import Path 97 | from labelformat.formats import COCOObjectDetectionInput, YOLOv8ObjectDetectionOutput 98 | 99 | # Define input and output paths 100 | coco_input_path = Path("coco-labels/train.json") 101 | yolo_output_path = Path("yolo-labels/data.yaml") 102 | 103 | # Initialize input and output classes 104 | coco_input = COCOObjectDetectionInput(input_file=coco_input_path) 105 | yolo_output = YOLOv8ObjectDetectionOutput( 106 | output_file=yolo_output_path, 107 | output_split="train" 108 | ) 109 | 110 | # Perform the conversion 111 | yolo_output.save(label_input=coco_input) 112 | 113 | print("Conversion from COCO to YOLOv8 completed successfully!") 114 | ``` 115 | 116 | ### 4.2: Execute the Script 117 | 118 | Run the script using Python: 119 | 120 | ``` shell 121 | python coco_to_yolov8.py 122 | ``` 123 | 124 | Upon successful execution, you will see: 125 | 126 | ``` 127 | Conversion from COCO to YOLOv8 completed successfully! 128 | ``` 129 | 130 | ## Step 5: Integrate with Your Training Pipeline 131 | 132 | Use the generated YOLOv8 labels (`data.yaml` and `labels/` directory) to train your YOLOv8 models seamlessly. 133 | 134 | **Example YOLOv8 Training Command:** 135 | 136 | ``` shell 137 | yolo detect train data=yolo-labels/data.yaml model=yolov8s.pt epochs=100 imgsz=640 138 | ``` 139 | 140 | ## Conclusion 141 | 142 | You've successfully converted COCO labels to YOLOv8 format using both the CLI and Python API. Labelformat simplifies label format conversions, enabling efficient integration into your computer vision projects. 143 | 144 | --- 145 | 146 | ## Next Steps 147 | 148 | - Explore other supported 2d object detection formats in the [Supported Object Detection Formats](../formats/object-detection/index.md) section. 149 | -------------------------------------------------------------------------------- /tests/unit/test_utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Tuple 3 | 4 | import PIL.Image 5 | import pytest 6 | 7 | from labelformat.utils import ( 8 | ImageDimensionError, 9 | get_image_dimensions, 10 | get_jpeg_dimensions, 11 | get_png_dimensions, 12 | ) 13 | 14 | FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" 15 | 16 | 17 | def test_get_jpeg_dimensions() -> None: 18 | image_path = FIXTURES_DIR / "instance_segmentation/YOLOv8/images/000000109005.jpg" 19 | width, height = get_jpeg_dimensions(image_path) 20 | assert width == 640 21 | assert height == 428 22 | 23 | 24 | def test_get_jpeg_dimensions__baseline(tmp_path: Path) -> None: 25 | # Tests SOF0 (0xC0) - Baseline DCT. 26 | jpeg_path = tmp_path / "baseline.jpg" 27 | _create_test_jpeg(path=jpeg_path, size=(800, 600)) 28 | 29 | width, height = get_jpeg_dimensions(jpeg_path) 30 | assert width == 800 31 | assert height == 600 32 | 33 | 34 | def test_get_jpeg_dimensions__progressive(tmp_path: Path) -> None: 35 | # Tests SOF2 (0xC2) - Progressive DCT with DHT markers before SOF. 36 | jpeg_path = tmp_path / "progressive.jpg" 37 | _create_test_jpeg(path=jpeg_path, size=(1920, 1440), progressive=True) 38 | 39 | width, height = get_jpeg_dimensions(jpeg_path) 40 | assert width == 1920 41 | assert height == 1440 42 | 43 | 44 | def test_get_jpeg_dimensions__optimized(tmp_path: Path) -> None: 45 | # Tests SOF0 (0xC0) with custom Huffman tables (more DHT markers before SOF). 46 | jpeg_path = tmp_path / "optimized.jpg" 47 | _create_test_jpeg(path=jpeg_path, size=(1024, 768), optimize=True) 48 | 49 | width, height = get_jpeg_dimensions(jpeg_path) 50 | assert width == 1024 51 | assert height == 768 52 | 53 | 54 | def test_get_jpeg_dimensions__progressive_optimized(tmp_path: Path) -> None: 55 | # Tests SOF2 (0xC2) with custom Huffman tables. 56 | jpeg_path = tmp_path / "progressive_optimized.jpg" 57 | _create_test_jpeg( 58 | path=jpeg_path, size=(2048, 1536), progressive=True, optimize=True 59 | ) 60 | 61 | width, height = get_jpeg_dimensions(jpeg_path) 62 | assert width == 2048 63 | assert height == 1536 64 | 65 | 66 | def test_get_jpeg_dimensions__nonexistent_file() -> None: 67 | with pytest.raises(ImageDimensionError): 68 | get_jpeg_dimensions(Path("nonexistent.jpg")) 69 | 70 | 71 | def test_get_jpeg_dimensions__invalid_format() -> None: 72 | yaml_file = FIXTURES_DIR / "object_detection/YOLOv8/example.yaml" 73 | with pytest.raises(ImageDimensionError): 74 | get_jpeg_dimensions(yaml_file) 75 | 76 | 77 | def test_get_png_dimensions() -> None: 78 | png_path = FIXTURES_DIR / "image_file_loading/0001.png" 79 | width, height = get_png_dimensions(png_path) 80 | assert width == 278 81 | assert height == 181 82 | 83 | 84 | def test_get_png_dimensions__nonexistent_file() -> None: 85 | with pytest.raises(ImageDimensionError): 86 | get_png_dimensions(Path("nonexistent.png")) 87 | 88 | 89 | def test_get_png_dimensions__invalid_format() -> None: 90 | yaml_file = FIXTURES_DIR / "object_detection/YOLOv8/example.yaml" 91 | with pytest.raises(ImageDimensionError): 92 | get_png_dimensions(yaml_file) 93 | 94 | 95 | def test_get_image_dimensions__jpeg() -> None: 96 | jpeg_path = FIXTURES_DIR / "instance_segmentation/YOLOv8/images/000000109005.jpg" 97 | width, height = get_image_dimensions(jpeg_path) 98 | assert width == 640 99 | assert height == 428 100 | 101 | 102 | def test_get_image_dimensions__jpeg_second_file() -> None: 103 | jpeg_path = FIXTURES_DIR / "instance_segmentation/YOLOv8/images/000000036086.jpg" 104 | width, height = get_image_dimensions(jpeg_path) 105 | assert width == 482 106 | assert height == 640 107 | 108 | 109 | def test_get_image_dimensions__png() -> None: 110 | png_path = FIXTURES_DIR / "image_file_loading/0001.png" 111 | width, height = get_image_dimensions(png_path) 112 | assert width == 278 113 | assert height == 181 114 | 115 | 116 | def test_get_image_dimensions__unsupported_format() -> None: 117 | yaml_file = FIXTURES_DIR / "object_detection/YOLOv8/example.yaml" 118 | with pytest.raises(Exception): 119 | get_image_dimensions(yaml_file) 120 | 121 | 122 | def _create_test_jpeg( 123 | path: Path, 124 | size: Tuple[int, int], 125 | progressive: bool = False, 126 | optimize: bool = False, 127 | ) -> None: 128 | img = PIL.Image.new("RGB", size, color="red") 129 | img.save(path, "JPEG", progressive=progressive, optimize=optimize) 130 | -------------------------------------------------------------------------------- /tests/simple_instance_segmentation_label_input.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from typing import Iterable 3 | 4 | from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation 5 | from labelformat.model.bounding_box import BoundingBox 6 | from labelformat.model.category import Category 7 | from labelformat.model.image import Image 8 | from labelformat.model.instance_segmentation import ( 9 | ImageInstanceSegmentation, 10 | InstanceSegmentationInput, 11 | SingleInstanceSegmentation, 12 | ) 13 | from labelformat.model.multipolygon import MultiPolygon 14 | 15 | 16 | class _BaseSimpleInstanceSegmentationInput(InstanceSegmentationInput): 17 | def get_categories(self) -> Iterable[Category]: 18 | return [ 19 | Category(id=0, name="cat"), 20 | Category(id=1, name="dog"), 21 | Category(id=2, name="cow"), 22 | ] 23 | 24 | def get_images(self) -> Iterable[Image]: 25 | return [ 26 | Image(id=0, filename="image.jpg", width=100, height=200), 27 | ] 28 | 29 | @staticmethod 30 | def add_cli_arguments(parser: ArgumentParser) -> None: 31 | # Default implementation (no arguments) 32 | pass 33 | 34 | 35 | class SimpleInstanceSegmentationInput(_BaseSimpleInstanceSegmentationInput): 36 | def get_labels(self) -> Iterable[ImageInstanceSegmentation]: 37 | return [ 38 | ImageInstanceSegmentation( 39 | image=Image(id=0, filename="image.jpg", width=100, height=200), 40 | objects=[ 41 | SingleInstanceSegmentation( 42 | category=Category(id=1, name="dog"), 43 | segmentation=MultiPolygon( 44 | polygons=[ 45 | [ 46 | (10.0, 10.0), 47 | (10.0, 20.0), 48 | (20.0, 20.0), 49 | (20.0, 10.0), 50 | ], 51 | ], 52 | ), 53 | ), 54 | SingleInstanceSegmentation( 55 | category=Category(id=0, name="cat"), 56 | segmentation=MultiPolygon( 57 | polygons=[ 58 | [ 59 | (30.0, 30.0), 60 | (40.0, 40.0), 61 | (40.0, 30.0), 62 | ], 63 | [ 64 | (50.0, 50.0), 65 | (60.0, 60.0), 66 | (60.0, 50.0), 67 | ], 68 | ], 69 | ), 70 | ), 71 | ], 72 | ) 73 | ] 74 | 75 | 76 | class SimpleInstanceSegmentationInputWithBinaryMask( 77 | _BaseSimpleInstanceSegmentationInput 78 | ): 79 | def get_labels(self) -> Iterable[ImageInstanceSegmentation]: 80 | return [ 81 | ImageInstanceSegmentation( 82 | image=Image(id=0, filename="image.jpg", width=100, height=200), 83 | objects=[ 84 | SingleInstanceSegmentation( 85 | category=Category(id=1, name="dog"), 86 | segmentation=MultiPolygon( 87 | polygons=[ 88 | [ 89 | (10.0, 10.0), 90 | (10.0, 20.0), 91 | (20.0, 20.0), 92 | (20.0, 10.0), 93 | ], 94 | ], 95 | ), 96 | ), 97 | SingleInstanceSegmentation( 98 | category=Category(id=0, name="cat"), 99 | segmentation=BinaryMaskSegmentation( 100 | _rle_row_wise=[1, 2, 3], 101 | width=3, 102 | height=2, 103 | bounding_box=BoundingBox( 104 | 0.0, 105 | 0.0, 106 | 3.0, 107 | 2.0, 108 | ), 109 | ), 110 | ), 111 | ], 112 | ) 113 | ] 114 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | # MacOS 163 | # Created by https://www.toptal.com/developers/gitignore/api/macos 164 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos 165 | 166 | ### macOS ### 167 | # General 168 | .DS_Store 169 | .AppleDouble 170 | .LSOverride 171 | 172 | # Icon must end with two \r 173 | Icon 174 | 175 | 176 | # Thumbnails 177 | ._* 178 | 179 | # Files that might appear in the root of a volume 180 | .DocumentRevisions-V100 181 | .fseventsd 182 | .Spotlight-V100 183 | .TemporaryItems 184 | .Trashes 185 | .VolumeIcon.icns 186 | .com.apple.timemachine.donotpresent 187 | 188 | # Directories potentially created on remote AFP share 189 | .AppleDB 190 | .AppleDesktop 191 | Network Trash Folder 192 | Temporary Items 193 | .apdisk 194 | 195 | ### macOS Patch ### 196 | # iCloud generated files 197 | *.icloud 198 | 199 | # End of https://www.toptal.com/developers/gitignore/api/macos 200 | 201 | # VScode 202 | .vscode 203 | *.code-workspace -------------------------------------------------------------------------------- /tests/unit/formats/test_pascalvoc.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from labelformat.formats.pascalvoc import ( 6 | PascalVOCObjectDetectionInput, 7 | PascalVOCObjectDetectionOutput, 8 | ) 9 | from labelformat.model.bounding_box import BoundingBox 10 | from labelformat.model.category import Category 11 | from labelformat.model.image import Image 12 | from labelformat.model.object_detection import ( 13 | ImageObjectDetection, 14 | SingleObjectDetection, 15 | ) 16 | 17 | from ... import simple_object_detection_label_input 18 | 19 | 20 | class TestPascalVOCObjectDetectionInput: 21 | def test_get_labels(self, tmp_path: Path) -> None: 22 | # Prepare inputs. 23 | annotation = """ 24 | 25 | image.jpg 26 | 27 | 100 28 | 200 29 | 30 | 31 | dog 32 | 33 | 10.0 34 | 20.0 35 | 30.0 36 | 40.0 37 | 38 | 39 | 40 | """ 41 | xml_path = tmp_path / "labels" / "image.xml" 42 | xml_path.parent.mkdir(parents=True, exist_ok=True) 43 | xml_path.write_text(annotation) 44 | 45 | # Convert. 46 | label_input = PascalVOCObjectDetectionInput( 47 | input_folder=tmp_path / "labels", category_names="cat,dog" 48 | ) 49 | labels = list(label_input.get_labels()) 50 | assert labels == [ 51 | ImageObjectDetection( 52 | image=Image(id=0, filename="image.jpg", width=100, height=200), 53 | objects=[ 54 | SingleObjectDetection( 55 | category=Category(id=1, name="dog"), 56 | box=BoundingBox( 57 | xmin=10.0, 58 | ymin=20.0, 59 | xmax=30.0, 60 | ymax=40.0, 61 | ), 62 | ) 63 | ], 64 | ) 65 | ] 66 | 67 | 68 | class TestPascalVOCObjectDetectionOutput: 69 | @pytest.mark.parametrize("with_confidence", [True, False]) 70 | def test_save(self, tmp_path: Path, with_confidence: bool) -> None: 71 | output_folder = tmp_path / "labels" 72 | PascalVOCObjectDetectionOutput(output_folder=output_folder).save( 73 | label_input=simple_object_detection_label_input.get_input( 74 | with_confidence=with_confidence 75 | ) 76 | ) 77 | assert output_folder.exists() 78 | assert output_folder.is_dir() 79 | 80 | filepaths = list(output_folder.glob("**/*")) 81 | assert len(filepaths) == 1 82 | path = filepaths[0] 83 | assert path == tmp_path / "labels" / "image.xml" 84 | 85 | contents = path.read_text() 86 | expected = """ 87 | 88 | labels 89 | image.jpg 90 | 91 | 100 92 | 200 93 | 3 94 | 95 | 0 96 | 97 | dog 98 | Unspecified 99 | 0 100 | 0 101 | 0 102 | 103 | 10.0 104 | 20.0 105 | 30.0 106 | 40.0 107 | 108 | 109 | 110 | cat 111 | Unspecified 112 | 0 113 | 0 114 | 0 115 | 116 | 50.0 117 | 60.0 118 | 70.0 119 | 80.0 120 | 121 | 122 | 123 | """ 124 | assert contents == expected.replace(" ", "").replace("\n", "") 125 | -------------------------------------------------------------------------------- /tests/fixtures/object_detection/Labelbox/export-result.ndjson: -------------------------------------------------------------------------------- 1 | {"data_row": {"id": "vglwulbsc2mwew8zd5fgk1ua1", "global_key": "a-weird Filename.asdf.jpg", "external_id": "a-weird Filename.asdf.jpg", "row_data": "https://url-to-the-image/a-weird%02dFilename.asdf.jpg"}, "media_attributes": {"height": 416, "width": 640, "mime_type": "image/jpg"}, "projects": {"8ikpemouf4oszagoc7vlj67wq": {"name": "labelbox-project-name", "labels": [{"label_kind": "Default", "version": "1.0.0", "id": "4wwiax7u91a4qhkd07xjphnk3", "annotations": {"objects": [{"feature_id": "7gi2p91mabs85shl7nss9ju5z", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "dog", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 295.0, "left": 540.0, "height": 18.0, "width": 23.0}}, {"feature_id": "ym8nbxl1n7w0g7k7p4n5dg540", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "person", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 271.0, "left": 513.0, "height": 33.0, "width": 21.0}}, {"feature_id": "rwsb8191nwfr9cshb1blh3rzl", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "person", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 268.0, "left": 586.0, "height": 23.0, "width": 18.0}}, {"feature_id": "cat2zrg7qp5asmrgckbwr01j3", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "person", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 250.0, "left": 548.0, "height": 8.0, "width": 5.0}}, {"feature_id": "14g13zjyinu24g7mf9zs4fepk", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "bench", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 261.0, "left": 377.0, "height": 24.0, "width": 34.0}}, {"feature_id": "qj9wk8gj5fp8ff93kp3aooiy6", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "bench", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 282.0, "left": 448.0, "height": 23.0, "width": 40.0}}, {"feature_id": "ev4ribptxcxn1t6qioapsqvjj", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "bicycle with space", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 198.0, "left": 75.0, "height": 15.0, "width": 6.0}}, {"feature_id": "0agwevzdjhoe43n52rc17x9up", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "person", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 271.0, "left": 604.0, "height": 11.0, "width": 9.0}}], "classifications": [], "relationships": []}}]}}} 2 | {"data_row": {"id": "4p1jv9dlh5ne9bx58v5crrmm3", "global_key": "aNother-weird__ filename.with.many.characters.jpg", "external_id": "aNother-weird__ filename.with.many.characters.jpg", "row_data": "https://url-to-the-image/aNother-weird__%02dfilename.with.many.characters.jpg"}, "media_attributes": {"height": 426, "width": 640, "mime_type": "image/jpg"}, "projects": {"8ikpemouf4oszagoc7vlj67wq": {"name": "labelbox-project-name", "labels": [{"label_kind": "Default", "version": "1.0.0", "id": "gyovxn1a9jgc4k85yfn5f6si9", "annotations": {"objects": [{"feature_id": "w67fukt8vrz5uar8cah03tjfd", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "snowboard", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 32.0, "left": 334.0, "height": 363.0, "width": 155.0}}, {"feature_id": "2rfhoxtj0a1fvwgduguvh9f2j", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "snowboard", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 17.0, "left": 284.0, "height": 362.0, "width": 146.0}}, {"feature_id": "g9rpfsdd7qwcma5lqbdx2ceuj", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "snowboard", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 49.0, "left": 266.0, "height": 294.0, "width": 95.0}}, {"feature_id": "sumiwox0ncjnce4v5jd14o3ye", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "snowboard", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 53.0, "left": 243.0, "height": 293.0, "width": 86.0}}, {"feature_id": "u2z5hgyeliclch01gd8oour7w", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "snowboard", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 47.0, "left": 212.0, "height": 287.0, "width": 83.0}}, {"feature_id": "61eap8ocv3g3aaxrbaa7sdzk1", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "person", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 195.0, "left": 615.0, "height": 17.0, "width": 12.0}}, {"feature_id": "4x6b3a3f7vnjadxp75wytaapu", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "bicycle with space", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 194.0, "left": 586.0, "height": 18.0, "width": 12.0}}, {"feature_id": "bj8m06o1otbje80nstbwzirc5", "feature_schema_id": "9eribjrx1h4j46czvkrg04fro", "name": "snowboard", "annotation_kind": "ImageBoundingBox", "classifications": [], "bounding_box": {"top": 63.0, "left": 268.0, "height": 295.0, "width": 94.0}}], "classifications": [], "relationships": []}}]}}} 3 | -------------------------------------------------------------------------------- /tests/unit/formats/semantic_segmentation/test_pascalvoc.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | from pathlib import Path 5 | from typing import Dict 6 | 7 | import numpy as np 8 | import pytest 9 | from PIL import Image as PILImage 10 | 11 | from labelformat.formats.semantic_segmentation import pascalvoc as pascalvoc_module 12 | from labelformat.formats.semantic_segmentation.pascalvoc import ( 13 | PascalVOCSemanticSegmentationInput, 14 | ) 15 | from labelformat.model.image import Image 16 | from tests.unit.test_utils import FIXTURES_DIR 17 | 18 | FIXTURES_ROOT_PASCALVOC = FIXTURES_DIR / "semantic_segmentation/pascalvoc" 19 | IMAGES_DIR = FIXTURES_ROOT_PASCALVOC / "JPEGImages" 20 | MASKS_DIR = FIXTURES_ROOT_PASCALVOC / "SegmentationClass" 21 | CLASS_MAP_PATH = FIXTURES_ROOT_PASCALVOC / "class_id_to_name.json" 22 | 23 | 24 | def _load_class_mapping_int_keys() -> Dict[int, str]: 25 | with CLASS_MAP_PATH.open("r") as f: 26 | data = json.load(f) 27 | return {int(k): str(v) for k, v in data.items()} 28 | 29 | 30 | class TestPascalVOCSemanticSegmentationInput: 31 | def test_from_dirs__builds_categories_and_images(self) -> None: 32 | mapping = _load_class_mapping_int_keys() 33 | ds = PascalVOCSemanticSegmentationInput.from_dirs( 34 | images_dir=IMAGES_DIR, masks_dir=MASKS_DIR, class_id_to_name=mapping 35 | ) 36 | 37 | # Categories contain the same ids and names as mapping 38 | cats = list(ds.get_categories()) 39 | loaded_mapping = {c.id: c.name for c in cats} 40 | assert loaded_mapping == mapping 41 | 42 | # Images are discovered from the folder 43 | imgs = list(ds.get_images()) 44 | assert len(imgs) == 2 45 | filenames = {img.filename for img in imgs} 46 | assert filenames == {"2007_000032.jpg", "subdir/2007_000033.jpg"} 47 | 48 | def test_get_mask__returns_int2d_and_matches_image_shape(self) -> None: 49 | mapping = _load_class_mapping_int_keys() 50 | ds = PascalVOCSemanticSegmentationInput.from_dirs( 51 | images_dir=IMAGES_DIR, masks_dir=MASKS_DIR, class_id_to_name=mapping 52 | ) 53 | 54 | for img in ds.get_images(): 55 | mask = ds.get_mask(img.filename) 56 | assert mask.array.ndim == 2 57 | assert np.issubdtype(mask.array.dtype, np.integer) 58 | assert mask.array.shape == (img.height, img.width) 59 | 60 | def test_from_dirs__missing_mask_raises(self, tmp_path: Path) -> None: 61 | masks_tmp = tmp_path / "SegmentationClass" 62 | masks_tmp.mkdir(parents=True, exist_ok=True) 63 | # Copy only one of the two masks 64 | (masks_tmp / "2007_000032.png").write_bytes( 65 | (MASKS_DIR / "2007_000032.png").read_bytes() 66 | ) 67 | 68 | with pytest.raises( 69 | ValueError, match=r"Missing mask PNG for image 'subdir/2007_000033\.jpg'" 70 | ): 71 | PascalVOCSemanticSegmentationInput.from_dirs( 72 | images_dir=IMAGES_DIR, 73 | masks_dir=masks_tmp, 74 | class_id_to_name=_load_class_mapping_int_keys(), 75 | ) 76 | 77 | def test_get_mask__unknown_image_raises(self) -> None: 78 | ds = PascalVOCSemanticSegmentationInput.from_dirs( 79 | images_dir=IMAGES_DIR, 80 | masks_dir=MASKS_DIR, 81 | class_id_to_name=_load_class_mapping_int_keys(), 82 | ) 83 | with pytest.raises( 84 | ValueError, 85 | match=r"Unknown image filepath does_not_exist\.jpg", 86 | ): 87 | ds.get_mask("does_not_exist.jpg") 88 | 89 | 90 | def test__validate_mask__unknown_class_value_raises() -> None: 91 | # Arrange: simple image and a mask with out-of-vocabulary value 92 | img = Image(id=0, filename="foo.jpg", width=4, height=3) 93 | mask = np.zeros((3, 4), dtype=np.int_) 94 | mask[0, 0] = 254 95 | valid_ids = set(_load_class_mapping_int_keys().keys()) 96 | 97 | # Act/Assert 98 | with pytest.raises(ValueError, match=r"Mask contains unknown class ids: 254"): 99 | pascalvoc_module._validate_mask( 100 | image_obj=img, mask_np=mask, valid_class_ids=valid_ids 101 | ) 102 | 103 | 104 | def test__validate_mask__shape_mismatch_raises() -> None: 105 | img = Image(id=0, filename="foo.jpg", width=4, height=3) 106 | # Wrong shape (2,4) instead of (3,4) 107 | mask = np.zeros((2, 4), dtype=np.int_) 108 | valid_ids = set(_load_class_mapping_int_keys().keys()) 109 | 110 | with pytest.raises(ValueError, match=r"Mask shape must match image dimensions"): 111 | pascalvoc_module._validate_mask( 112 | image_obj=img, mask_np=mask, valid_class_ids=valid_ids 113 | ) 114 | 115 | 116 | def test__validate_mask__non_2d_mask_raises() -> None: 117 | img = Image(id=0, filename="foo.jpg", width=4, height=3) 118 | # 3D array to simulate multi-channel mask 119 | mask = np.zeros((3, 4, 3), dtype=np.int_) 120 | valid_ids = set(_load_class_mapping_int_keys().keys()) 121 | 122 | with pytest.raises(ValueError, match=r"Mask must be 2D \(H, W\)"): 123 | pascalvoc_module._validate_mask( 124 | image_obj=img, mask_np=mask, valid_class_ids=valid_ids 125 | ) 126 | -------------------------------------------------------------------------------- /tests/integration/object_detection/test_inverse.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from pytest_mock import MockerFixture 5 | 6 | from labelformat.formats.coco import COCOObjectDetectionInput, COCOObjectDetectionOutput 7 | from labelformat.formats.kitti import ( 8 | KittiObjectDetectionInput, 9 | KittiObjectDetectionOutput, 10 | ) 11 | from labelformat.formats.lightly import ( 12 | LightlyObjectDetectionInput, 13 | LightlyObjectDetectionOutput, 14 | ) 15 | from labelformat.formats.pascalvoc import ( 16 | PascalVOCObjectDetectionInput, 17 | PascalVOCObjectDetectionOutput, 18 | ) 19 | from labelformat.formats.yolov8 import ( 20 | YOLOv8ObjectDetectionInput, 21 | YOLOv8ObjectDetectionOutput, 22 | ) 23 | from labelformat.model.object_detection import ( 24 | ImageObjectDetection, 25 | SingleObjectDetection, 26 | ) 27 | 28 | from ... import simple_object_detection_label_input 29 | 30 | 31 | def test_coco_inverse(tmp_path: Path) -> None: 32 | start_label_input = simple_object_detection_label_input.get_input() 33 | COCOObjectDetectionOutput(output_file=tmp_path / "train.json").save( 34 | label_input=start_label_input 35 | ) 36 | end_label_input = COCOObjectDetectionInput(input_file=tmp_path / "train.json") 37 | assert list(start_label_input.get_labels()) == list(end_label_input.get_labels()) 38 | 39 | 40 | def test_yolov8_inverse(tmp_path: Path, mocker: MockerFixture) -> None: 41 | start_label_input = simple_object_detection_label_input.get_input() 42 | YOLOv8ObjectDetectionOutput( 43 | output_file=tmp_path / "data.yaml", 44 | output_split="train", 45 | ).save(label_input=start_label_input) 46 | 47 | # For YOLOv8 we have to also provide the image files. 48 | _mock_input_images(mocker=mocker, folder=tmp_path / "images") 49 | 50 | end_label_input = YOLOv8ObjectDetectionInput( 51 | input_file=tmp_path / "data.yaml", 52 | input_split="train", 53 | ) 54 | assert list(start_label_input.get_labels()) == list(end_label_input.get_labels()) 55 | 56 | 57 | def test_pascalvoc_inverse(tmp_path: Path) -> None: 58 | start_label_input = simple_object_detection_label_input.get_input() 59 | PascalVOCObjectDetectionOutput(output_folder=tmp_path).save( 60 | label_input=start_label_input 61 | ) 62 | end_label_input = PascalVOCObjectDetectionInput( 63 | input_folder=tmp_path, 64 | category_names="cat,dog,cow", 65 | ) 66 | assert list(start_label_input.get_labels()) == list(end_label_input.get_labels()) 67 | 68 | 69 | def test_kitti_inverse(tmp_path: Path, mocker: MockerFixture) -> None: 70 | start_label_input = simple_object_detection_label_input.get_input() 71 | KittiObjectDetectionOutput(output_folder=tmp_path / "labels").save( 72 | label_input=start_label_input 73 | ) 74 | 75 | # For KITTI we have to also provide the image files. 76 | _mock_input_images(mocker=mocker, folder=tmp_path / "images") 77 | 78 | end_label_input = KittiObjectDetectionInput( 79 | input_folder=tmp_path / "labels", 80 | category_names="cat,dog,cow", 81 | ) 82 | assert list(start_label_input.get_labels()) == list(end_label_input.get_labels()) 83 | 84 | 85 | @pytest.mark.parametrize("with_confidence", [True, False]) 86 | def test_lightly_inverse( 87 | tmp_path: Path, mocker: MockerFixture, with_confidence: bool 88 | ) -> None: 89 | start_label_input = simple_object_detection_label_input.get_input( 90 | with_confidence=with_confidence 91 | ) 92 | LightlyObjectDetectionOutput(output_folder=tmp_path / "task").save( 93 | label_input=start_label_input 94 | ) 95 | 96 | # For Lightly we have to also provide the image files. 97 | _mock_input_images(mocker=mocker, folder=tmp_path / "images") 98 | 99 | end_label_input = LightlyObjectDetectionInput( 100 | input_folder=tmp_path / "task", 101 | ) 102 | 103 | if with_confidence: 104 | expected_labels = list(start_label_input.get_labels()) 105 | else: 106 | # If confidence is None in the input, it is set to 0.0 in the output. 107 | expected_labels = [] 108 | for label in start_label_input.get_labels(): 109 | expected_objects = [] 110 | for obj in label.objects: 111 | if obj.confidence is None: 112 | obj = SingleObjectDetection( 113 | category=obj.category, 114 | box=obj.box, 115 | confidence=0.0, 116 | ) 117 | expected_objects.append(obj) 118 | expected_labels.append( 119 | ImageObjectDetection( 120 | image=label.image, 121 | objects=expected_objects, 122 | ) 123 | ) 124 | assert list(end_label_input.get_labels()) == expected_labels 125 | 126 | 127 | def _mock_input_images(mocker: MockerFixture, folder: Path) -> None: 128 | folder.mkdir() 129 | (folder / "image.jpg").touch() 130 | mock_img = mocker.MagicMock() 131 | mock_img.size = (100, 200) 132 | mock_context_manager = mocker.MagicMock() 133 | mock_context_manager.__enter__.return_value = mock_img 134 | mocker.patch("PIL.Image.open", return_value=mock_context_manager) 135 | -------------------------------------------------------------------------------- /tests/integration/test_maskpair_cli.py: -------------------------------------------------------------------------------- 1 | """Test CLI integration for maskpair format.""" 2 | 3 | import json 4 | import subprocess 5 | from pathlib import Path 6 | 7 | import cv2 8 | import numpy as np 9 | from numpy.typing import NDArray 10 | 11 | 12 | def create_cli_test_data(base_path: Path) -> None: 13 | """Create test data for CLI testing.""" 14 | images_dir = base_path / "images" 15 | masks_dir = base_path / "masks" 16 | images_dir.mkdir(parents=True) 17 | masks_dir.mkdir(parents=True) 18 | 19 | # Create a few test image/mask pairs 20 | for i in range(2): 21 | # Create simple test image 22 | image = np.random.randint(50, 200, (80, 80, 3), dtype=np.uint8) 23 | image_path = images_dir / f"sample_{i:02d}.jpg" 24 | cv2.imwrite(str(image_path), image) 25 | 26 | # Create corresponding mask 27 | mask: NDArray[np.uint8] = np.zeros((80, 80), dtype=np.uint8) 28 | # Add some rectangular regions 29 | mask[20:60, 20:40] = 255 30 | if i == 1: 31 | mask[20:40, 50:70] = 255 # Add second region for second image 32 | 33 | mask_path = masks_dir / f"sample_{i:02d}.png" 34 | cv2.imwrite(str(mask_path), mask) 35 | 36 | 37 | def test_cli_maskpair_to_coco(tmp_path: Path) -> None: 38 | """Test complete CLI conversion from maskpair to COCO.""" 39 | create_cli_test_data(tmp_path) 40 | 41 | output_file = tmp_path / "output.json" 42 | 43 | # Run CLI command 44 | cmd = [ 45 | "labelformat", 46 | "convert", 47 | "--task", 48 | "instance-segmentation", 49 | "--input-format", 50 | "maskpair", 51 | "--image-glob", 52 | "images/*.jpg", 53 | "--mask-glob", 54 | "masks/*.png", 55 | "--base-path", 56 | str(tmp_path), 57 | "--category-names", 58 | "crack,defect", 59 | "--pairing-mode", 60 | "stem", 61 | "--segmentation-type", 62 | "polygon", 63 | "--min-area", 64 | "100", 65 | "--output-format", 66 | "coco", 67 | "--output-file", 68 | str(output_file), 69 | ] 70 | 71 | # Execute CLI command 72 | result = subprocess.run(cmd, cwd=tmp_path, capture_output=True, text=True) 73 | 74 | # Verify command succeeded 75 | assert result.returncode == 0, f"CLI command failed: {result.stderr}" 76 | 77 | # Verify output file was created 78 | assert output_file.exists() 79 | 80 | # Verify COCO format structure 81 | with output_file.open() as f: 82 | coco_data = json.load(f) 83 | 84 | assert "images" in coco_data 85 | assert "categories" in coco_data 86 | assert "annotations" in coco_data 87 | 88 | # Should have 2 images 89 | assert len(coco_data["images"]) == 2 90 | 91 | # Should have 2 categories (crack, defect) 92 | assert len(coco_data["categories"]) == 2 93 | 94 | # Should have some annotations 95 | assert len(coco_data["annotations"]) > 0 96 | 97 | # Verify polygon format 98 | for ann in coco_data["annotations"]: 99 | assert ann["iscrowd"] == 0 # polygon format 100 | assert isinstance(ann["segmentation"], list) 101 | 102 | 103 | def test_cli_maskpair_to_yolov8(tmp_path: Path) -> None: 104 | """Test CLI conversion from maskpair to YOLOv8.""" 105 | create_cli_test_data(tmp_path) 106 | 107 | output_file = tmp_path / "data.yaml" 108 | 109 | # Run CLI command 110 | cmd = [ 111 | "labelformat", 112 | "convert", 113 | "--task", 114 | "instance-segmentation", 115 | "--input-format", 116 | "maskpair", 117 | "--image-glob", 118 | "images/*.jpg", 119 | "--mask-glob", 120 | "masks/*.png", 121 | "--base-path", 122 | str(tmp_path), 123 | "--category-names", 124 | "object", 125 | "--pairing-mode", 126 | "stem", 127 | "--segmentation-type", 128 | "polygon", 129 | "--output-format", 130 | "yolov8", 131 | "--output-file", 132 | str(output_file), 133 | "--output-split", 134 | "train", 135 | ] 136 | 137 | # Execute CLI command 138 | result = subprocess.run(cmd, cwd=tmp_path, capture_output=True, text=True) 139 | 140 | # Verify command succeeded 141 | assert result.returncode == 0, f"CLI command failed: {result.stderr}" 142 | 143 | # Verify YOLOv8 structure was created 144 | assert output_file.exists() 145 | assert (tmp_path / "labels").exists() 146 | 147 | 148 | def test_cli_help_shows_maskpair_options() -> None: 149 | """Test that CLI help shows maskpair-specific options.""" 150 | result = subprocess.run( 151 | [ 152 | "labelformat", 153 | "convert", 154 | "--task", 155 | "instance-segmentation", 156 | "--input-format", 157 | "maskpair", 158 | "--output-format", 159 | "coco", 160 | "--help", 161 | ], 162 | capture_output=True, 163 | text=True, 164 | ) 165 | 166 | # Verify help includes our custom options 167 | assert "--image-glob" in result.stdout 168 | assert "--mask-glob" in result.stdout 169 | assert "--pairing-mode" in result.stdout 170 | assert "--category-names" in result.stdout 171 | assert "--segmentation-type" in result.stdout 172 | assert "--min-area" in result.stdout 173 | assert "--threshold" in result.stdout 174 | -------------------------------------------------------------------------------- /src/labelformat/formats/kitti.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from argparse import ArgumentParser 3 | from pathlib import Path 4 | from typing import Iterable 5 | 6 | from labelformat import utils 7 | from labelformat.cli.registry import Task, cli_register 8 | from labelformat.model.bounding_box import BoundingBox, BoundingBoxFormat 9 | from labelformat.model.category import Category 10 | from labelformat.model.image import Image 11 | from labelformat.model.object_detection import ( 12 | ImageObjectDetection, 13 | ObjectDetectionInput, 14 | ObjectDetectionOutput, 15 | SingleObjectDetection, 16 | ) 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | @cli_register(format="kitti", task=Task.OBJECT_DETECTION) 22 | class KittiObjectDetectionInput(ObjectDetectionInput): 23 | @staticmethod 24 | def add_cli_arguments(parser: ArgumentParser) -> None: 25 | parser.add_argument( 26 | "--input-folder", 27 | type=Path, 28 | required=True, 29 | help="Input folder containing Kitti label txt files", 30 | ) 31 | parser.add_argument( 32 | "--category-names", 33 | type=str, 34 | required=True, 35 | help="Comma separated list of category names without spaces, e.g. 'dog,cat'", 36 | ) 37 | parser.add_argument( 38 | "--images-rel-path", 39 | type=str, 40 | default="../images", 41 | help="Relative path to images folder from label folder", 42 | ) 43 | 44 | def __init__( 45 | self, 46 | input_folder: Path, 47 | category_names: str, 48 | images_rel_path: str = "../images", 49 | ) -> None: 50 | self._input_folder = input_folder 51 | self._images_rel_path = images_rel_path 52 | self._categories = [ 53 | Category(id=idx, name=name) 54 | for idx, name in enumerate(category_names.split(",")) 55 | ] 56 | 57 | def get_categories(self) -> Iterable[Category]: 58 | return self._categories 59 | 60 | def get_images(self) -> Iterable[Image]: 61 | yield from utils.get_images_from_folder( 62 | folder=self._input_folder / self._images_rel_path 63 | ) 64 | 65 | def get_labels(self) -> Iterable[ImageObjectDetection]: 66 | category_name_to_category = {cat.name: cat for cat in self._categories} 67 | 68 | for image in self.get_images(): 69 | label_path = (self._input_folder / image.filename).with_suffix(".txt") 70 | if not label_path.exists(): 71 | logger.warning( 72 | f"Label file '{label_path}' for image '{image.filename}' does not exist." 73 | ) 74 | 75 | objects = [] 76 | with label_path.open() as file: 77 | for line in file.readlines(): 78 | # Last 14 tokens are floats. The rest in the beginning is a label. 79 | tokens = line.split(" ") 80 | category_name = " ".join(tokens[:-14]) 81 | left = float(tokens[-11]) 82 | top = float(tokens[-10]) 83 | right = float(tokens[-9]) 84 | bottom = float(tokens[-8]) 85 | objects.append( 86 | SingleObjectDetection( 87 | category=category_name_to_category[category_name], 88 | box=BoundingBox.from_format( 89 | bbox=[left, top, right, bottom], 90 | format=BoundingBoxFormat.XYXY, 91 | ), 92 | ) 93 | ) 94 | 95 | yield ImageObjectDetection( 96 | image=image, 97 | objects=objects, 98 | ) 99 | 100 | 101 | @cli_register(format="kitti", task=Task.OBJECT_DETECTION) 102 | class KittiObjectDetectionOutput(ObjectDetectionOutput): 103 | @staticmethod 104 | def add_cli_arguments(parser: ArgumentParser) -> None: 105 | parser.add_argument( 106 | "--output-folder", 107 | type=Path, 108 | required=True, 109 | help="Output folder for generated Kitti label txt files", 110 | ) 111 | 112 | def __init__( 113 | self, 114 | output_folder: Path, 115 | ) -> None: 116 | self._output_folder = output_folder 117 | 118 | def save(self, label_input: ObjectDetectionInput) -> None: 119 | for image_label in label_input.get_labels(): 120 | label_path = (self._output_folder / image_label.image.filename).with_suffix( 121 | ".txt" 122 | ) 123 | label_path.parent.mkdir(parents=True, exist_ok=True) 124 | with label_path.open("w") as file: 125 | for obj in image_label.objects: 126 | left, top, right, bottom = obj.box.to_format( 127 | format=BoundingBoxFormat.XYXY 128 | ) 129 | # Unknown values match Kitti dataset "DontCare" label values. 130 | file.write( 131 | f"{obj.category.name} " 132 | "-1 " # truncated 133 | "-1 " # occluded 134 | "-10 " # alpha 135 | f"{left} {top} {right} {bottom} " # bbox 136 | "-1 -1 -1 " # dimensions 137 | "-1000 -1000 -1000 " # location 138 | "-10\n" # rotation_y 139 | ) 140 | -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | # Detailed Usage Guide 2 | 3 | Labelformat offers both a Command-Line Interface (CLI) and a Python API to cater to different workflows. This guide provides in-depth instructions on how to use both interfaces effectively. 4 | 5 | To get a detailed overview of the supported formats and their specifications, please refer to the [Supported Object Detection Formats](formats/object-detection/index.md) section. 6 | 7 | ## CLI Usage 8 | 9 | Labelformat's CLI provides a straightforward way to convert label formats directly from the terminal. 10 | 11 | ### Basic Conversion Command 12 | 13 | **Example:** Convert Object Detection labels from COCO to YOLOv8. 14 | 15 | ``` shell 16 | labelformat convert \ 17 | --task object-detection \ 18 | --input-format coco \ 19 | --input-file path/to/coco/train.json \ 20 | --output-format yolov8 \ 21 | --output-file path/to/yolo/data.yaml \ 22 | --output-split train 23 | ``` 24 | 25 | **Parameters:** 26 | 27 | - `--task`: Specify the task type (`object-detection` or `instance-segmentation`). 28 | - `--input-format`: The format of the input labels (e.g., `coco`). 29 | - `--input-file` or `--input-folder`: Path to the input label file or folder. 30 | - `--output-format`: The desired output label format (e.g., `yolov8`). 31 | - `--output-file` or `--output-folder`: Path to save the converted labels. 32 | - `--output-split`: Define the data split (`train`, `val`, `test`). 33 | 34 | ### Advanced CLI Options 35 | 36 | **Listing Supported Formats:** 37 | 38 | To see all supported input and output formats for a specific task: 39 | 40 | ``` shell 41 | labelformat convert --task object-detection --help 42 | ``` 43 | 44 | **Specifying Category Names:** 45 | 46 | Some formats require explicit category names. The names must be separated by commas and must be in the same order as the categories in the input file. 47 | 48 | Use the `--category-names` argument to specify the category names: 49 | 50 | ``` shell 51 | labelformat convert \ 52 | --task object-detection \ 53 | --input-format labelbox \ 54 | --input-file labelbox-export.ndjson \ 55 | --category-names cat,dog,fish \ 56 | --output-format coco \ 57 | --output-file coco-output/train.json \ 58 | --output-split train 59 | ``` 60 | 61 | **Handling Missing Images:** 62 | 63 | When converting formats that require image files (e.g., YOLO to COCO), ensure your image paths are correctly specified. Use `--images-rel-path` to define the relative path from the input folder to the images folder: 64 | 65 | ``` shell 66 | labelformat convert \ 67 | --task object-detection \ 68 | --input-format kitti \ 69 | --input-folder kitti-labels/labels \ 70 | --images-rel-path ../images \ 71 | --output-format pascalvoc \ 72 | --output-folder pascalvoc-labels 73 | ``` 74 | 75 | ## Python API Usage 76 | 77 | For more flexible integrations, Labelformat provides a Python API. 78 | 79 | ### Basic Conversion 80 | 81 | **Example:** Convert COCO to YOLOv8. 82 | 83 | ``` python 84 | from pathlib import Path 85 | from labelformat.formats import COCOObjectDetectionInput, YOLOv8ObjectDetectionOutput 86 | 87 | # Initialize input and output classes 88 | coco_input = COCOObjectDetectionInput(input_file=Path("coco-labels/train.json")) 89 | yolo_output = YOLOv8ObjectDetectionOutput( 90 | output_file=Path("yolo-labels/data.yaml"), 91 | output_split="train" 92 | ) 93 | 94 | # Perform the conversion 95 | yolo_output.save(label_input=coco_input) 96 | 97 | print("Conversion from COCO to YOLOv8 completed successfully!") 98 | ``` 99 | 100 | ### Customizing Conversion 101 | 102 | **Example:** Adding Custom Fields or Handling Special Cases. 103 | 104 | ``` python 105 | from pathlib import Path 106 | from labelformat.formats import COCOInstanceSegmentationInput, YOLOv8InstanceSegmentationOutput 107 | 108 | # Initialize input for instance segmentation 109 | coco_inst_input = COCOInstanceSegmentationInput(input_file=Path("coco-instance/train.json")) 110 | 111 | # Initialize YOLOv8 instance segmentation output 112 | yolo_inst_output = YOLOv8InstanceSegmentationOutput( 113 | output_file=Path("yolo-instance-labels/data.yaml"), 114 | output_split="train" 115 | ) 116 | 117 | # Perform the conversion 118 | yolo_inst_output.save(label_input=coco_inst_input) 119 | 120 | print("Instance segmentation conversion completed successfully!") 121 | ``` 122 | 123 | ## Common Tasks 124 | 125 | ### Handling Category Names 126 | 127 | Some label formats require you to specify category names explicitly. Ensure that category names are consistent across your dataset. 128 | 129 | **Example:** 130 | 131 | ``` shell 132 | labelformat convert \ 133 | --task object-detection \ 134 | --input-format labelbox \ 135 | --input-file labelbox-export.ndjson \ 136 | --category-names cat,dog,fish \ 137 | --output-format coco \ 138 | --output-file coco-output/train.json \ 139 | --output-split train 140 | ``` 141 | 142 | ### Managing Image Paths 143 | 144 | When converting formats that reference image files, accurately specify the relative paths to avoid missing files. 145 | 146 | **Example:** 147 | 148 | ``` shell 149 | labelformat convert \ 150 | --task object-detection \ 151 | --input-format kitti \ 152 | --input-folder kitti-labels/labels \ 153 | --images-rel-path ../images \ 154 | --output-format pascalvoc \ 155 | --output-folder pascalvoc-labels 156 | ``` 157 | 158 | --- 159 | 160 | ## Tips and Best Practices 161 | 162 | - **Backup Your Data:** Always keep a backup of your original labels before performing conversions. 163 | - **Validate Output:** After conversion, verify the output labels to ensure accuracy. 164 | - **Consistent Naming:** Maintain consistent naming conventions for categories and files across different formats. 165 | - **Leverage Round-Trip Tests:** Use Labelformat's testing capabilities to ensure label consistency when converting back and forth between formats. 166 | 167 | For more detailed examples and advanced usage scenarios, explore our [Tutorials](tutorials/converting-coco-to-yolov8.md) section. 168 | -------------------------------------------------------------------------------- /src/labelformat/cli/cli.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from argparse import ArgumentParser, Namespace, RawTextHelpFormatter, _ArgumentGroup 3 | from pathlib import Path 4 | from typing import Optional 5 | 6 | from labelformat.cli.registry import _REGISTRY, Task 7 | from labelformat.formats import * 8 | 9 | logger = logging.getLogger(__name__) 10 | logger.addHandler(logging.StreamHandler()) 11 | logger.setLevel(logging.INFO) 12 | 13 | 14 | def main() -> None: 15 | parser = ArgumentParser( 16 | prog="labelformat", 17 | formatter_class=RawTextHelpFormatter, 18 | description="""\ 19 | Labelformat is a Python package for converting between different computer vision label formats. 20 | It provides a CLI interface as well as an easy-to-use Python API. 21 | The CLI interface is available as the `labelformat` command. 22 | 23 | Supported label formats for object detection: 24 | - YOLOv5, YOLOv6, YOLOv7, YOLOv8, YOLOv9, YOLOv10, YOLOv11, YOLOv12, YOLOv26 25 | - COCO, RT-DETR, RT-DETRv2 26 | - VOC 27 | - Labelbox 28 | - and many more 29 | 30 | Supported label formats for instance segmentation: 31 | - COCO 32 | - YOLOv8 33 | 34 | Run `labelformat convert --help` for more information about how to convert between label formats. 35 | """, 36 | ) 37 | subparsers = parser.add_subparsers(dest="command") 38 | convert_parser = subparsers.add_parser( 39 | name="convert", 40 | description="Convert labels from one format to another.", 41 | add_help=False, 42 | ) 43 | convert_parser.add_argument("-h", "--help", action="store_true") 44 | # Parse already to check if --help flag is set. We have to do this now because 45 | # the convert --input-format and --output-format arguments are required and 46 | # raise an error otherwise. 47 | args, remaining_args = parser.parse_known_args() 48 | 49 | if args.command == "convert": 50 | convert_parser.add_argument( 51 | "--task", 52 | choices=sorted({task.value for task in Task}), 53 | type=str, 54 | required=True, 55 | ) 56 | 57 | # Parse the task argument. We modify the input/output format choices 58 | # based on it. We print the help message first if the task argument is missing. 59 | if args.help and "--task" not in remaining_args: 60 | convert_parser.print_help() 61 | return 62 | args, _ = parser.parse_known_args() 63 | task = Task(args.task) 64 | 65 | # Add input/output format arguments. 66 | convert_parser.add_argument( 67 | "--input-format", 68 | choices=sorted(_REGISTRY.input[task].keys()), 69 | help="Input format", 70 | required=True, 71 | ) 72 | convert_parser.add_argument( 73 | "--output-format", 74 | choices=sorted(_REGISTRY.output[task].keys()), 75 | type=str, 76 | help="Output format", 77 | required=True, 78 | ) 79 | 80 | # Print help message if input/output format arguments are missing. If both 81 | # arguments are set, then we want to wait with printing the help message 82 | # until the input/output format specific arguments are added to the parser. 83 | if args.help and ( 84 | "--input-format" not in remaining_args 85 | or "--output-format" not in remaining_args 86 | ): 87 | convert_parser.print_help() 88 | return 89 | 90 | # Parse again to verify that all arguments are set. 91 | # This raises an error if --input-format or --output-format is missing. 92 | args, _ = parser.parse_known_args() 93 | 94 | # Make groups for input and output arguments. This groups input/output arguments 95 | # in the help message. 96 | input_group = convert_parser.add_argument_group( 97 | f"'{args.input_format}' input arguments" 98 | ) 99 | output_group = convert_parser.add_argument_group( 100 | f"'{args.output_format}' output arguments" 101 | ) 102 | 103 | # Add arguments depending on input/output format. We use a new parser here 104 | # to make typing for users easier when defining the add_parser_arguments in 105 | # the input/output format classes. This allows users to import 106 | # 'argparse.ArgumentParser' instead of 'argparse._ArgumentGroup'. 107 | input_parser = ArgumentParser(add_help=False) 108 | output_parser = ArgumentParser(add_help=False) 109 | task = Task(args.task) 110 | input_cls = _REGISTRY.input[task][args.input_format] 111 | output_cls = _REGISTRY.output[task][args.output_format] 112 | input_cls.add_cli_arguments(parser=input_parser) 113 | output_cls.add_cli_arguments(parser=output_parser) 114 | 115 | # Copy added arguments to the input/output group. 116 | for action in input_parser._actions: 117 | input_group._add_action(action) 118 | for action in output_parser._actions: 119 | output_group._add_action(action) 120 | 121 | # Dispaly help message if requested. This help message will now also contain 122 | # the input/output format specific arguments. 123 | if args.help: 124 | convert_parser.print_help() 125 | return 126 | 127 | # Parse and get group specific arguments. 128 | args = parser.parse_args() 129 | input_args = _get_group_args(args, input_group) 130 | output_args = _get_group_args(args, output_group) 131 | 132 | # Create input and output instances and convert. 133 | logger.info("Loading labels...") 134 | label_input = input_cls(**vars(input_args)) 135 | label_output = output_cls(**vars(output_args)) 136 | 137 | logger.info("Converting labels...") 138 | label_output.save(label_input=label_input) 139 | 140 | logger.info("Done!") 141 | else: 142 | # Print a help message if no command is given. 143 | # Because only convert command is currently available we print its help message. 144 | convert_parser.print_help() 145 | return 146 | 147 | 148 | def _get_group_args(args: Namespace, group: _ArgumentGroup) -> Namespace: 149 | return Namespace( 150 | **{ 151 | action.dest: getattr(args, action.dest, None) 152 | for action in group._group_actions 153 | } 154 | ) 155 | -------------------------------------------------------------------------------- /src/labelformat/formats/semantic_segmentation/pascalvoc.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | """Pascal VOC semantic segmentation input. 4 | 5 | Assumptions: 6 | - Masks live under a separate directory mirroring the images directory structure. 7 | - For each image at ``images_dir/.ext``, the mask is at ``masks_dir/.png``. 8 | - Masks are PNGs with pixel values equal to class IDs. 9 | """ 10 | 11 | from collections.abc import Iterable, Mapping 12 | from dataclasses import dataclass 13 | from pathlib import Path 14 | 15 | import numpy as np 16 | from numpy.typing import NDArray 17 | from PIL import Image as PILImage 18 | 19 | from labelformat import utils 20 | from labelformat.model.category import Category 21 | from labelformat.model.image import Image 22 | from labelformat.model.semantic_segmentation import ( 23 | SemanticSegmentationInput, 24 | SemanticSegmentationMask, 25 | ) 26 | 27 | """TODO(Malte, 11/2025): 28 | Support what is already supported in LightlyTrain. https://docs.lightly.ai/train/stable/semantic_segmentation.html#data 29 | Support using a template against the image filepath. https://docs.lightly.ai/train/stable/semantic_segmentation.html#using-a-template-against-the-image-filepath 30 | Support using multi-channel masks. https://docs.lightly.ai/train/stable/semantic_segmentation.html#using-multi-channel-masks 31 | Support optional ignore_classes: list[int] that should be ignored during training. https://docs.lightly.ai/train/stable/semantic_segmentation.html#specify-training-classes 32 | Support merging multiple labels into one class during training. https://docs.lightly.ai/train/stable/semantic_segmentation.html#specify-training-classes 33 | """ 34 | 35 | 36 | @dataclass 37 | class PascalVOCSemanticSegmentationInput(SemanticSegmentationInput): 38 | _images_dir: Path 39 | _masks_dir: Path 40 | _filename_to_image: dict[str, Image] 41 | _categories: list[Category] 42 | 43 | @classmethod 44 | def from_dirs( 45 | cls, 46 | images_dir: Path, 47 | masks_dir: Path, 48 | class_id_to_name: Mapping[int, str], 49 | ) -> "PascalVOCSemanticSegmentationInput": 50 | """Create a PascalVOCSemanticSegmentationInput from directory pairs. 51 | 52 | Args: 53 | images_dir: Root directory containing images (nested structure allowed). 54 | masks_dir: Root directory containing PNG masks mirroring images structure. 55 | class_id_to_name: Mapping of class_id -> class name, with integer keys. 56 | 57 | Raises: 58 | ValueError: If directories are invalid, a mask is missing or not PNG, 59 | or if class_id keys cannot be parsed as integers. 60 | """ 61 | if not images_dir.is_dir(): 62 | raise ValueError(f"Images directory is not a directory: {images_dir}") 63 | if not masks_dir.is_dir(): 64 | raise ValueError(f"Masks directory is not a directory: {masks_dir}") 65 | 66 | # Build categories from mapping 67 | categories = [ 68 | Category(id=cid, name=cname) for cid, cname in class_id_to_name.items() 69 | ] 70 | 71 | # Collect images using helper and ensure a PNG mask exists for each. 72 | images_by_filename: dict[str, Image] = {} 73 | for img in utils.get_images_from_folder(images_dir): 74 | mask_path = masks_dir / Path(img.filename).with_suffix(".png") 75 | if not mask_path.is_file(): 76 | raise ValueError( 77 | f"Missing mask PNG for image '{img.filename}' at path: {mask_path}" 78 | ) 79 | images_by_filename[img.filename] = img 80 | 81 | return cls( 82 | _images_dir=images_dir, 83 | _masks_dir=masks_dir, 84 | _filename_to_image=images_by_filename, 85 | _categories=categories, 86 | ) 87 | 88 | def get_categories(self) -> Iterable[Category]: 89 | return list(self._categories) 90 | 91 | def get_images(self) -> Iterable[Image]: 92 | yield from self._filename_to_image.values() 93 | 94 | def get_mask(self, image_filepath: str) -> SemanticSegmentationMask: 95 | # Validate image exists in our index. 96 | image_obj = self._filename_to_image.get(image_filepath) 97 | if image_obj is None: 98 | raise ValueError( 99 | f"Unknown image filepath {image_filepath}. Use one returned by get_images()." 100 | ) 101 | 102 | mask_path = self._masks_dir / Path(image_filepath).with_suffix(".png") 103 | if not mask_path.is_file(): 104 | raise ValueError( 105 | f"Mask PNG not found for image '{image_filepath}': {mask_path}" 106 | ) 107 | 108 | # Load and validate mask by shape and value set. 109 | with PILImage.open(mask_path) as mimg: 110 | mask_np: NDArray[np.int_] = np.asarray(mimg, dtype=np.int_) 111 | _validate_mask( 112 | image_obj=image_obj, 113 | mask_np=mask_np, 114 | valid_class_ids={c.id for c in self._categories}, 115 | ) 116 | 117 | return SemanticSegmentationMask(array=mask_np) 118 | 119 | 120 | def _validate_mask( 121 | image_obj: Image, mask_np: NDArray[np.int_], valid_class_ids: set[int] 122 | ) -> None: 123 | """Validate a semantic segmentation mask against an image and categories. 124 | 125 | Args: 126 | image_obj: The image metadata with filename, width, and height used for shape validation. 127 | mask_np: The mask as a 2D numpy array with integer class IDs. 128 | valid_class_ids: The set of allowed class IDs that may appear in the mask. 129 | 130 | Raises: 131 | ValueError: If the mask is not 2D, does not match the image size, or contains 132 | class IDs not present in `valid_class_ids`. 133 | """ 134 | if mask_np.ndim != 2: 135 | raise ValueError( 136 | f"Mask must be 2D (H, W) for: {image_obj.filename}. Got shape {mask_np.shape}" 137 | ) 138 | 139 | mh, mw = int(mask_np.shape[0]), int(mask_np.shape[1]) 140 | if (mw, mh) != (image_obj.width, image_obj.height): 141 | raise ValueError( 142 | f"Mask shape must match image dimensions for '{image_obj.filename}': " 143 | f"mask (W,H)=({mw},{mh}) vs image (W,H)=({image_obj.width},{image_obj.height})" 144 | ) 145 | 146 | uniques = np.unique(mask_np.astype(int)) 147 | unique_values = set(uniques) 148 | unknown_values = unique_values.difference(valid_class_ids) 149 | if unknown_values: 150 | raise ValueError( 151 | f"Mask contains unknown class ids: {', '.join(map(str, sorted(unknown_values)))}" 152 | ) 153 | -------------------------------------------------------------------------------- /src/labelformat/formats/lightly.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | from argparse import ArgumentParser 4 | from pathlib import Path 5 | from typing import Any, Dict, Iterable, List, Sequence 6 | 7 | from labelformat import utils 8 | from labelformat.cli.registry import Task, cli_register 9 | from labelformat.errors import LabelWithoutImageError 10 | from labelformat.model.bounding_box import BoundingBox, BoundingBoxFormat 11 | from labelformat.model.category import Category 12 | from labelformat.model.image import Image 13 | from labelformat.model.object_detection import ( 14 | ImageObjectDetection, 15 | ObjectDetectionInput, 16 | ObjectDetectionOutput, 17 | SingleObjectDetection, 18 | ) 19 | 20 | 21 | @cli_register(format="lightly", task=Task.OBJECT_DETECTION) 22 | class LightlyObjectDetectionInput(ObjectDetectionInput): 23 | @staticmethod 24 | def add_cli_arguments(parser: ArgumentParser) -> None: 25 | parser.add_argument( 26 | "--input-folder", 27 | type=Path, 28 | required=True, 29 | help="Path to input folder with JSON files", 30 | ) 31 | parser.add_argument( 32 | "--images-rel-path", 33 | type=str, 34 | default="../images", 35 | help="Relative path to images folder from label folder", 36 | ) 37 | parser.add_argument( 38 | "--skip-labels-without-image", 39 | action="store_true", 40 | help="Skip labels without corresponding image", 41 | ) 42 | 43 | def __init__( 44 | self, 45 | input_folder: Path, 46 | images_rel_path: str = "../images", 47 | skip_labels_without_image: bool = False, 48 | ) -> None: 49 | self._input_folder = input_folder 50 | self._images_rel_path = images_rel_path 51 | self._skip_labels_without_image = skip_labels_without_image 52 | self._categories = self._get_categories() 53 | 54 | def get_categories(self) -> Iterable[Category]: 55 | return self._categories 56 | 57 | def get_images(self) -> Iterable[Image]: 58 | yield from utils.get_images_from_folder( 59 | folder=self._input_folder / self._images_rel_path 60 | ) 61 | 62 | def get_labels(self) -> Iterable[ImageObjectDetection]: 63 | category_id_to_category = { 64 | category.id: category for category in self.get_categories() 65 | } 66 | filename_to_image = {image.filename: image for image in self.get_images()} 67 | 68 | for json_path in self._input_folder.rglob("*.json"): 69 | if json_path.name == "schema.json": 70 | continue 71 | data = json.loads(json_path.read_text()) 72 | if data["file_name"] not in filename_to_image: 73 | if self._skip_labels_without_image: 74 | continue 75 | raise LabelWithoutImageError( 76 | f"Label '{json_path.name}' does not have a corresponding image." 77 | ) 78 | image = filename_to_image[data["file_name"]] 79 | objects = [] 80 | for prediction in data["predictions"]: 81 | objects.append( 82 | SingleObjectDetection( 83 | category=category_id_to_category[prediction["category_id"]], 84 | box=BoundingBox.from_format( 85 | bbox=[float(x) for x in prediction["bbox"]], 86 | format=BoundingBoxFormat.XYWH, 87 | ), 88 | confidence=( 89 | float(prediction["score"]) 90 | if "score" in prediction 91 | else None 92 | ), 93 | ) 94 | ) 95 | yield ImageObjectDetection( 96 | image=image, 97 | objects=objects, 98 | ) 99 | 100 | def _get_categories(self) -> Sequence[Category]: 101 | schema_path = self._input_folder / "schema.json" 102 | schema_json = json.loads(schema_path.read_text()) 103 | if schema_json["task_type"] != "object-detection": 104 | raise ValueError( 105 | f"Schema type '{schema_json['task_type']}' is not supported. " 106 | f"Expected 'object-detection'." 107 | ) 108 | return [ 109 | Category( 110 | id=category["id"], 111 | name=category["name"], 112 | ) 113 | for category in schema_json["categories"] 114 | ] 115 | 116 | 117 | @cli_register(format="lightly", task=Task.OBJECT_DETECTION) 118 | class LightlyObjectDetectionOutput(ObjectDetectionOutput): 119 | @staticmethod 120 | def add_cli_arguments(parser: ArgumentParser) -> None: 121 | parser.add_argument( 122 | "--output-folder", 123 | type=Path, 124 | required=True, 125 | help=( 126 | "Path to output folder with JSON files. The folder name should " 127 | "match the detection task name." 128 | ), 129 | ) 130 | 131 | def __init__(self, output_folder: Path) -> None: 132 | self._output_folder = output_folder 133 | 134 | def save(self, label_input: ObjectDetectionInput) -> None: 135 | self._output_folder.mkdir(parents=True, exist_ok=True) 136 | 137 | # Save schema. 138 | schema = { 139 | "task_type": "object-detection", 140 | "categories": [ 141 | { 142 | "id": category.id, 143 | "name": category.name, 144 | } 145 | for category in label_input.get_categories() 146 | ], 147 | } 148 | schema_file = self._output_folder / "schema.json" 149 | with schema_file.open("w") as file: 150 | json.dump(schema, file, indent=2) 151 | 152 | # Save labels. 153 | for label in label_input.get_labels(): 154 | data = { 155 | "file_name": label.image.filename, 156 | "predictions": [ 157 | { 158 | "category_id": obj.category.id, 159 | "bbox": obj.box.to_format(BoundingBoxFormat.XYWH), 160 | "score": 0.0 if obj.confidence is None else obj.confidence, 161 | } 162 | for obj in label.objects 163 | ], 164 | } 165 | label_file = (self._output_folder / f"{label.image.filename}").with_suffix( 166 | ".json" 167 | ) 168 | label_file.parent.mkdir(parents=True, exist_ok=True) 169 | with label_file.open("w") as file: 170 | json.dump(data, file, indent=2) 171 | -------------------------------------------------------------------------------- /docs/formats/object-detection/rtdetr.md: -------------------------------------------------------------------------------- 1 | # RT-DETR Object Detection Format 2 | 3 | ## Overview 4 | 5 | **RT-DETR (Real-Time DEtection TRansformer)** is a groundbreaking end-to-end object detection framework introduced in the paper [DETRs Beat YOLOs on Real-time Object Detection](https://arxiv.org/abs/2304.08069). RT-DETR represents the first real-time end-to-end object detector that successfully challenges the dominance of YOLO detectors in real-time applications. Unlike traditional detectors that require Non-Maximum Suppression (NMS) post-processing, RT-DETR eliminates NMS entirely while achieving superior speed and accuracy performance. 6 | 7 | > **Info:** RT-DETR was introduced through the academic paper "DETRs Beat YOLOs on Real-time Object Detection" published in 2023. 8 | For the full paper, see: [arXiv:2304.08069](https://arxiv.org/abs/2304.08069) 9 | For implementation details and code, see: [GitHub Repository: lyuwenyu/RT-DETR](https://github.com/lyuwenyu/RT-DETR) 10 | 11 | > **Availability:** RT-DETR is now available in multiple frameworks: 12 | - [Hugging Face Transformers](https://huggingface.co/docs/transformers/model_doc/rt_detr) 13 | - [Ultralytics](https://docs.ultralytics.com/models/rtdetr/) 14 | 15 | ## Key RT-DETR Model Features 16 | 17 | RT-DETR uses the standard **COCO annotation format** while introducing revolutionary architectural innovations for real-time detection: 18 | 19 | - **End-to-End Architecture:** First real-time detector to completely eliminate NMS post-processing, providing more stable and predictable inference times. 20 | - **Efficient Hybrid Encoder:** Novel encoder design that decouples intra-scale interaction and cross-scale fusion to significantly reduce computational overhead. 21 | - **Uncertainty-Minimal Query Selection:** Advanced query initialization scheme that optimizes both classification and localization confidence for improved detection quality. 22 | - **Flexible Speed Tuning:** Supports adjustable inference speed by modifying the number of decoder layers without retraining. 23 | - **Superior Performance:** Achieves state-of-the-art results (e.g., RT-DETR-R50 reaches 53.1% mAP @ 108 FPS on T4 GPU, outperforming YOLOv8-L in both speed and accuracy). 24 | - **Multiple Model Scales:** Available in various scales (R18, R34, R50, R101) to accommodate different computational requirements. 25 | 26 | These architectural innovations are handled internally by the model design and training pipeline, requiring no changes to the standard COCO annotation format described below. 27 | 28 | ## Specification of RT-DETR Detection Format 29 | 30 | RT-DETR uses the standard **COCO format** for annotations, ensuring seamless integration with existing COCO datasets and tools. The format consists of a single JSON file containing three main components: 31 | 32 | ### `images` 33 | Defines metadata for each image in the dataset: 34 | ```json 35 | { 36 | "id": 0, // Unique image ID 37 | "file_name": "image1.jpg", // Image filename 38 | "width": 640, // Image width in pixels 39 | "height": 416 // Image height in pixels 40 | } 41 | ``` 42 | 43 | ### `categories` 44 | Defines the object classes: 45 | ```json 46 | { 47 | "id": 0, // Unique category ID 48 | "name": "cat" // Category name 49 | } 50 | ``` 51 | 52 | ### `annotations` 53 | Defines object instances: 54 | ```json 55 | { 56 | "image_id": 0, // Reference to image 57 | "category_id": 2, // Reference to category 58 | "bbox": [540.0, 295.0, 23.0, 18.0] // [x, y, width, height] in absolute pixels 59 | } 60 | ``` 61 | 62 | ## Directory Structure of RT-DETR Dataset 63 | 64 | ``` 65 | dataset/ 66 | ├── images/ # Image files 67 | │ ├── image1.jpg 68 | │ └── image2.jpg 69 | └── annotations.json # Single JSON file containing all annotations 70 | ``` 71 | 72 | ## Benefits of RT-DETR Format 73 | 74 | - **Standard Compatibility:** Uses the widely-adopted COCO format, ensuring compatibility with existing tools and frameworks. 75 | - **Flexibility:** Supports adjustable inference speeds without retraining, making it adaptable to various real-time scenarios. 76 | - **Superior Accuracy:** Achieves better accuracy than comparable YOLO detectors while maintaining competitive speed. 77 | 78 | ## Converting Annotations to RT-DETR Format with Labelformat 79 | 80 | Since RT-DETR uses the standard COCO format, converting annotations to RT-DETR format is equivalent to converting to COCO format. 81 | 82 | ### Installation 83 | 84 | First, ensure that Labelformat is installed: 85 | 86 | ```shell 87 | pip install labelformat 88 | ``` 89 | 90 | ### Conversion Example: YOLOv8 to RT-DETR 91 | 92 | Assume you have annotations in YOLOv8 format and wish to convert them to RT-DETR. Here's how you can achieve this using Labelformat. 93 | 94 | **Step 1: Prepare Your Dataset** 95 | 96 | Ensure your dataset follows the standard YOLOv8 structure with `data.yaml` and label files. 97 | 98 | **Step 2: Run the Conversion Command** 99 | 100 | Use the Labelformat CLI to convert YOLOv8 annotations to RT-DETR (COCO format): 101 | ```bash 102 | labelformat convert \ 103 | --task object-detection \ 104 | --input-format yolov8 \ 105 | --input-file dataset/data.yaml \ 106 | --input-split train \ 107 | --output-format rtdetr \ 108 | --output-file dataset/rtdetr_annotations.json 109 | ``` 110 | 111 | **Step 3: Verify the Converted Annotations** 112 | 113 | After conversion, your dataset structure will be: 114 | ``` 115 | dataset/ 116 | ├── images/ 117 | │ ├── image1.jpg 118 | │ ├── image2.jpg 119 | │ └── ... 120 | └── rtdetr_annotations.json # COCO format annotations for RT-DETR 121 | ``` 122 | 123 | ### Python API Example 124 | 125 | ```python 126 | from pathlib import Path 127 | from labelformat.formats import YOLOv8ObjectDetectionInput, RTDETRObjectDetectionOutput 128 | 129 | # Load YOLOv8 format 130 | label_input = YOLOv8ObjectDetectionInput( 131 | input_file=Path("dataset/data.yaml"), 132 | input_split="train" 133 | ) 134 | 135 | # Convert to RT-DETR format 136 | RTDETRObjectDetectionOutput( 137 | output_file=Path("dataset/rtdetr_annotations.json") 138 | ).save(label_input=label_input) 139 | ``` 140 | 141 | ## Error Handling in Labelformat 142 | 143 | Since RT-DETR uses the COCO format, the same validation and error handling applies: 144 | 145 | - **Invalid JSON Structure:** Proper error reporting for malformed JSON files 146 | - **Missing Required Fields:** Validation ensures all required COCO fields are present 147 | - **Reference Integrity:** Checks that image_id and category_id references are valid 148 | - **Bounding Box Validation:** Ensures bounding boxes are within image boundaries 149 | 150 | Example of a properly formatted annotation: 151 | ```json 152 | { 153 | "images": [{"id": 0, "file_name": "image1.jpg", "width": 640, "height": 480}], 154 | "categories": [{"id": 1, "name": "person"}], 155 | "annotations": [{"image_id": 0, "category_id": 1, "bbox": [100, 120, 50, 80]}] 156 | } 157 | ``` -------------------------------------------------------------------------------- /src/labelformat/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import Iterable, Tuple 4 | 5 | import PIL.Image 6 | 7 | from labelformat.model.image import Image 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | IMAGE_EXTENSIONS = { 12 | ".jpg", 13 | ".jpeg", 14 | ".png", 15 | ".ppm", 16 | ".bmp", 17 | ".pgm", 18 | ".tif", 19 | ".tiff", 20 | ".webp", 21 | } 22 | 23 | # JPEG SOF (Start of Frame) markers that contain image dimensions. 24 | # Excludes 0xC4 (DHT - Define Huffman Table) and 0xC8 (JPG reserved). 25 | # List from https://www.disktuna.com/list-of-jpeg-markers/ 26 | JPEG_SOF_MARKERS = { 27 | 0xC0, # SOF0 - Baseline DCT 28 | 0xC1, # SOF1 - Extended Sequential DCT 29 | 0xC2, # SOF2 - Progressive DCT 30 | 0xC3, # SOF3 - Lossless (sequential) 31 | 0xC5, # SOF5 - Differential sequential DCT 32 | 0xC6, # SOF6 - Differential progressive DCT 33 | 0xC7, # SOF7 - Differential lossless (sequential) 34 | 0xC9, # SOF9 - Extended sequential DCT, Arithmetic coding 35 | 0xCA, # SOF10 - Progressive DCT, Arithmetic coding 36 | 0xCB, # SOF11 - Lossless (sequential), Arithmetic coding 37 | 0xCD, # SOF13 - Differential sequential DCT, Arithmetic coding 38 | 0xCE, # SOF14 - Differential progressive DCT, Arithmetic coding 39 | 0xCF, # SOF15 - Differential lossless (sequential), Arithmetic coding 40 | } 41 | 42 | 43 | class ImageDimensionError(Exception): 44 | """Raised when unable to extract image dimensions using fast methods.""" 45 | 46 | pass 47 | 48 | 49 | def get_jpeg_dimensions(file_path: Path) -> Tuple[int, int]: 50 | """Try to efficiently get JPEG dimensions from file headers without decoding the image. 51 | 52 | This method reads only the JPEG file headers looking for the Start Of Frame (SOFn) 53 | marker which contains the dimensions. This is much faster than decoding the entire 54 | image as it: 55 | - Only reads the file headers (typically a few KB) instead of the entire file 56 | - Doesn't perform any image decompression 57 | - Doesn't load the pixel data into memory 58 | 59 | This works for most standard JPEG files (including progressive JPEGs) but may fail 60 | for some unusual formats or corrupted files. In those cases, an ImageDimensionError 61 | is raised and a full image decode may be needed as fallback. 62 | 63 | Args: 64 | file_path: Path to the JPEG file 65 | 66 | Returns: 67 | Tuple of (width, height) 68 | 69 | Raises: 70 | ImageDimensionError: If dimensions cannot be extracted from headers 71 | """ 72 | try: 73 | with open(file_path, "rb") as img_file: 74 | # Skip SOI marker 75 | img_file.seek(2) 76 | while True: 77 | marker = img_file.read(2) 78 | if len(marker) < 2: 79 | raise ImageDimensionError("Invalid JPEG format") 80 | # Find SOFn marker 81 | if marker[0] == 0xFF and marker[1] in JPEG_SOF_MARKERS: 82 | # Skip marker length 83 | img_file.seek(3, 1) 84 | h = int.from_bytes(img_file.read(2), "big") 85 | w = int.from_bytes(img_file.read(2), "big") 86 | return w, h 87 | # Skip to next marker 88 | length = int.from_bytes(img_file.read(2), "big") 89 | img_file.seek(length - 2, 1) 90 | except Exception as e: 91 | raise ImageDimensionError(f"Failed to read JPEG dimensions: {str(e)}") 92 | 93 | 94 | def get_png_dimensions(file_path: Path) -> Tuple[int, int]: 95 | """Try to efficiently get PNG dimensions from file headers without decoding the image. 96 | 97 | This method reads only the PNG IHDR (Image Header) chunk which is always the first 98 | chunk after the PNG signature. This is much faster than decoding the entire image as it: 99 | - Only reads the first ~30 bytes of the file 100 | - Doesn't perform any image decompression 101 | - Doesn't load the pixel data into memory 102 | 103 | This works for all valid PNG files since the IHDR chunk is mandatory and must appear 104 | first according to the PNG specification. However, it may fail for corrupted files 105 | or files that don't follow the PNG spec. In those cases, an ImageDimensionError is 106 | raised and a full image decode may be needed as fallback. 107 | 108 | Args: 109 | file_path: Path to the PNG file 110 | 111 | Returns: 112 | Tuple of (width, height) 113 | 114 | Raises: 115 | ImageDimensionError: If dimensions cannot be extracted from headers 116 | """ 117 | try: 118 | with open(file_path, "rb") as img_file: 119 | # Skip PNG signature 120 | img_file.seek(8) 121 | # Read IHDR chunk 122 | chunk_length = int.from_bytes(img_file.read(4), "big") 123 | chunk_type = img_file.read(4) 124 | if chunk_type == b"IHDR": 125 | w = int.from_bytes(img_file.read(4), "big") 126 | h = int.from_bytes(img_file.read(4), "big") 127 | return w, h 128 | raise ImageDimensionError("Invalid PNG format") 129 | except Exception as e: 130 | raise ImageDimensionError(f"Failed to read PNG dimensions: {str(e)}") 131 | 132 | 133 | def get_image_dimensions(image_path: Path) -> Tuple[int, int]: 134 | """Get image dimensions using the most efficient method available. 135 | 136 | Args: 137 | image_path: Path to the image file 138 | 139 | Returns: 140 | Tuple of (width, height) 141 | 142 | Raises: 143 | Exception: If image dimensions cannot be extracted using any method 144 | """ 145 | suffix = image_path.suffix.lower() 146 | if suffix in {".jpg", ".jpeg"}: 147 | try: 148 | return get_jpeg_dimensions(image_path) 149 | except ImageDimensionError: 150 | pass 151 | elif suffix == ".png": 152 | try: 153 | return get_png_dimensions(image_path) 154 | except ImageDimensionError: 155 | pass 156 | 157 | with PIL.Image.open(image_path) as img: 158 | return img.size 159 | 160 | 161 | def get_images_from_folder(folder: Path) -> Iterable[Image]: 162 | """Yields an Image structure for all images in the given folder. 163 | 164 | The order of the images is arbitrary. Images in nested folders are included. 165 | 166 | Args: 167 | folder: Path to the folder containing images. 168 | """ 169 | image_id = 0 170 | logger.debug(f"Listing images in '{folder}'...") 171 | for image_path in folder.rglob("*"): 172 | if image_path.suffix.lower() not in IMAGE_EXTENSIONS: 173 | logger.debug(f"Skipping non-image file '{image_path}'") 174 | continue 175 | image_filename = str(image_path.relative_to(folder)) 176 | image_width, image_height = get_image_dimensions(image_path) 177 | yield Image( 178 | id=image_id, 179 | filename=image_filename, 180 | width=image_width, 181 | height=image_height, 182 | ) 183 | image_id += 1 184 | --------------------------------------------------------------------------------