├── .gitignore
├── LICENSE
├── README.md
├── buildkite.yml
├── datatap
├── __init__.py
├── api
│ ├── .gitignore
│ ├── __init__.py
│ ├── endpoints
│ │ ├── __init__.py
│ │ ├── database_endpoints.py
│ │ ├── dataset_endpoints.py
│ │ ├── endpoints.py
│ │ ├── repository_endpoints.py
│ │ ├── request.py
│ │ └── user_endpoints.py
│ ├── entities
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── database.py
│ │ ├── dataset.py
│ │ ├── repository.py
│ │ └── user.py
│ └── types
│ │ ├── __init__.py
│ │ ├── database.py
│ │ ├── dataset.py
│ │ ├── repository.py
│ │ └── user.py
├── comet
│ └── __init__.py
├── droplet
│ ├── __init__.py
│ ├── _media.py
│ ├── attributes.py
│ ├── bounding_box.py
│ ├── class_annotation.py
│ ├── frame_annotation.py
│ ├── image.py
│ ├── image_annotation.py
│ ├── instance.py
│ ├── keypoint.py
│ ├── multi_instance.py
│ ├── segmentation.py
│ ├── video.py
│ └── video_annotation.py
├── examples
│ ├── __init__.py
│ └── torch.ipynb
├── geometry
│ ├── __init__.py
│ ├── mask.py
│ ├── point.py
│ ├── polygon.py
│ └── rectangle.py
├── metrics
│ ├── __init__.py
│ ├── _types.py
│ ├── confusion_matrix.py
│ ├── iou.py
│ └── precision_recall_curve.py
├── py.typed
├── template
│ ├── __init__.py
│ ├── class_annotation_template.py
│ ├── frame_annotation_template.py
│ ├── image_annotation_template.py
│ ├── instance_template.py
│ ├── multi_instance_template.py
│ └── video_annotation_template.py
├── tf
│ ├── __init__.py
│ └── dataset.py
├── torch
│ ├── __init__.py
│ ├── _patch_torch.py
│ ├── dataloader.py
│ ├── dataset.py
│ └── utils.py
└── utils
│ ├── __init__.py
│ ├── cache_generator.py
│ ├── environment.py
│ ├── helpers.py
│ ├── or_nullish.py
│ └── print_helpers.py
├── dev_requirements.txt
├── examples
└── coco_to_droplet.py
├── pyrightconfig.json
├── requirements.txt
├── requirements_image.txt
├── requirements_importers.txt
├── requirements_metrics.txt
├── requirements_tf.txt
├── requirements_torch.txt
├── setup.py
├── tests
├── __init__.py
└── metrics
│ ├── __init__.py
│ ├── test_iou.py
│ └── test_precision_recall_curve.py
└── typings
├── PIL
├── Image.pyi
└── __init__.pyi
├── boto3
└── __init__.pyi
├── comet_ml
├── API.pyi
├── APIExperiment.pyi
├── ExistingExperiment.pyi
├── Experiment.pyi
├── __init__.pyi
├── exceptions.pyi
└── query
│ └── __init__.pyi
├── dask
├── __init__.pyi
├── bag.pyi
└── delayed.pyi
├── fsspec
└── __init__.pyi
├── matplotlib
├── __init__.py
└── pyplot
│ └── __init__.py
├── neo4j
├── __init__.pyi
├── driver.pyi
├── graph_database.pyi
├── record.pyi
├── result.pyi
├── session.pyi
└── transaction.pyi
├── pycocotools
├── __init__.pyi
└── mask.pyi
├── requests
└── __init__.pyi
├── scipy
├── __init__.pyi
└── optimize
│ └── __init__.pyi
├── shapely
├── __init__.pyi
└── geometry
│ └── __init__.pyi
├── skimage
├── __init__.pyi
└── measure.pyi
├── sortedcontainers
└── __init__.pyi
├── tensorflow
├── __init__.pyi
├── data
│ ├── __init__.pyi
│ └── dataset.pyi
├── distribute
│ ├── __init__.pyi
│ ├── distributed_dataset.pyi
│ ├── experimental
│ │ ├── __init__.pyi
│ │ └── strategy.pyi
│ └── input_context.pyi
├── io
│ ├── __init__.py
│ └── decode_image.py
├── tensor.pyi
└── types.pyi
└── torchvision
├── __init__.py
└── transforms
├── __init__.py
└── functional.pyi
/.gitignore:
--------------------------------------------------------------------------------
1 | /out
2 | /test
3 | /dask-*
4 | /*.egg-info
5 | /tmp*
6 | .venv
7 | __pycache__
8 | build
9 | dist
10 | .vscode
11 | html
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | The visual data management platform from Zensors.
13 |
14 |
15 | ----------
16 |
17 |
18 | Join for free at app.datatap.dev.
19 |
20 |
21 |
22 | The dataTap Python library is the primary interface for using dataTap's rich data management tools. Create datasets, stream annotations, and analyze model performance all with one library.
23 |
24 | ----------
25 |
26 | ## Documentation
27 |
28 | Full documentation is available at [docs.datatap.dev](https://docs.datatap.dev/).
29 |
30 | ## Features
31 |
32 | - [x] ⚡ Begin training instantly
33 | - [x] 🔥 Works with all major ML frameworks (Pytorch, TensorFlow, etc.)
34 | - [x] 🛰️ Real-time streaming to avoid large dataset downloads
35 | - [x] 🌐 Universal data format for simple data exchange
36 | - [x] 🎨 Combine data from multiples sources into a single dataset easily
37 | - [x] 🧮 Rich ML utilities to compute PR-curves, confusion matrices, and accuracy metrics.
38 | - [x] 💽 Free access to a variety of open datasets.
39 |
40 | ## Getting Started (Platform)
41 |
42 | To begin, select a dataset from the dataTap repository.
43 |
44 |
45 |
46 |
47 |
48 | Then copy the starter code based on your library preference.
49 |
50 |
51 |
52 |
53 |
54 | Paste the starter code and start training.
55 |
56 |
57 |
58 |
59 |
60 | ## Getting Started (API)
61 |
62 | Install the client library.
63 |
64 | ```bash
65 | pip install datatap
66 | ```
67 |
68 | Register at [app.datatap.dev](https://app.datatap.dev). Then, go to `Settings > Api Keys` to find your personal API key.
69 |
70 | ```bash
71 | export DATATAP_API_KEY="XXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXX"
72 | ```
73 |
74 | Start using open datasets instantly.
75 |
76 | ```python
77 | from datatap import Api
78 |
79 | api = Api()
80 | coco = api.get_default_database().get_repository("_/coco")
81 | dataset = coco.get_dataset("latest")
82 | print("COCO: ", dataset)
83 | ```
84 |
85 |
86 |
87 | ## Data Streaming Example
88 |
89 | ```python
90 | import itertools
91 | from datatap import Api
92 |
93 | api = Api()
94 | dataset = (api
95 | .get_default_database()
96 | .get_repository("_/wider-person")
97 | .get_dataset("latest")
98 | )
99 |
100 | training_stream = dataset_version.stream_split("training")
101 | for annotation in itertools.islice(training_stream, 5):
102 | print("Received annotation:", annotation)
103 | ```
104 |
105 | ## More Examples
106 | - [Documented Sample](https://github.com/Zensors/datatap-python/tree/master/datatap/examples/streaming-sample.md)
107 | - [Pytorch Jupyter Notebook](https://github.com/Zensors/datatap-python/tree/master/datatap/examples/torch.ipynb)
108 |
109 |
110 | ## Support and FAQ
111 |
112 | **Q. How do I resolve a missing API Key?**
113 |
114 | If you see the error `Exception: No API key available. Either provide it or use the [DATATAP_API_KEY] environment variable`, then the dataTap library was not able to find your API key. You can find your API key on [app.datatap.dev](https://app.datatap.dev) under settings. You can either set it as an environment variable or as the first argument to the `Api` constructor.
115 |
116 | **Q. Can dataTap be used offline?**
117 |
118 | Some functionality can be used offline, such as the droplet utilities and metrics. However, repository access and dataset streaming require internet access, even for local databases.
119 |
120 | **Q. Is dataTap accepting contributions?**
121 |
122 | dataTap currently uses a separate code review system for managing contributions. The team is looking into switching that system to GitHub to allow public contributions. Until then, we will actively monitor the GitHub issue tracker to help accomodate the community's needs.
123 |
124 | **Q. How can I get help using dataTap?**
125 |
126 | You can post a question in the [issue tracker](https://github.com/zensors/datatap-python/issues). The dataTap team actively monitors the repository, and will try to get back to you as soon as possible.
127 |
--------------------------------------------------------------------------------
/buildkite.yml:
--------------------------------------------------------------------------------
1 | steps:
2 | - label: ":copyright: Typechecking Module"
3 | commands:
4 | - "echo '--- Setting Up'"
5 | - "mkdir -p /build"
6 | - "cp -R . /build"
7 | - "cd /build"
8 | - "echo '--- Installing Packages'"
9 | - "pip install -r dev_requirements.txt"
10 | - "pip install -r requirements_torch.txt"
11 | - "pip install -e '.[metrics,torch]'"
12 | - "yarn global add 'pyright@1.1.264'"
13 | - "echo '+++ Running Pyright'"
14 | - "pyright"
15 | if: build.message !~ /skip tests/
16 | plugins:
17 | - docker#v3.7.0:
18 | image: "nikolaik/python-nodejs:python3.8-nodejs12"
19 |
20 | - label: ":python: Testing Module"
21 | commands:
22 | - "echo '--- Setting Up'"
23 | - "mkdir -p /test"
24 | - "cp -R . /test"
25 | - "cd /test"
26 | - "echo '--- Installing Packages'"
27 | - "pip install -e '.[metrics]'"
28 | - "echo '+++ Running Tests'"
29 | - "python -m unittest discover tests"
30 | if: build.message !~ /skip tests/
31 | plugins:
32 | - docker#v3.7.0:
33 | image: "python:3.8"
34 |
35 | - wait
36 |
37 | - label: ":package: Building and Pushing Wheel"
38 | commands:
39 | - "python3.7 setup.py bdist_wheel"
40 | - "twine upload -r datatap dist/*"
41 | - "twine upload -r zensors dist/*"
42 | if: build.branch == "master"
43 |
44 | - label: ":package: Building and Pushing Documentation"
45 | commands:
46 | - "echo '--- Setting Up'"
47 | - "mkdir -p /build"
48 | - "cp -R . /build"
49 | - "cd /build"
50 | - "echo '--- Installing Packages'"
51 | - "pip install -r requirements_torch.txt"
52 | - "pip install -e '.[metrics,torch]'"
53 | - "pip install pdoc3 awscli comet_ml"
54 | - "echo '+++ Compiling Docs'"
55 | - "pdoc3 datatap --html"
56 | - "echo '+++ Uploading Docs'"
57 | - "cd html/datatap && aws s3 cp --recursive --acl public-read . s3://docs.datatap.dev/"
58 | plugins:
59 | - docker#v3.7.0:
60 | image: "python:3.8"
61 | volumes:
62 | - "/var/lib/buildkite-agent/.aws/:/root/.aws/"
63 | if: build.branch == "master"
64 |
65 | - label: ":github: Pushing to github"
66 | commands:
67 | - "git remote add gh git@github.com:Zensors/datatap-python.git || true"
68 | - "git push gh HEAD:${BUILDKITE_BRANCH}"
69 | if: "build.branch !~ /^refs/"
70 |
--------------------------------------------------------------------------------
/datatap/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module provides classes and methods for interacting with dataTap. This includes inspecting individual annotations,
3 | creating or importing new annotations, and creating or loading datasets for machine learning.
4 |
5 | .. include:: ../README.md
6 | """
7 |
8 | import sys as _sys
9 |
10 | if _sys.version_info < (3, 7):
11 | print("\x1b[38;5;1mUsing an unsupported python version. Please install Python 3.7 or greater\x1b[0m")
12 | raise Exception("Invalid python version")
13 |
14 | from .api.entities import Api
15 |
16 | __all__ = [
17 | "Api",
18 | "api",
19 | "droplet",
20 | "geometry",
21 | "template",
22 | "utils",
23 | ]
--------------------------------------------------------------------------------
/datatap/api/.gitignore:
--------------------------------------------------------------------------------
1 | test.py
--------------------------------------------------------------------------------
/datatap/api/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The `datatap.api` module provides two different interfaces for the API.
3 |
4 | The simplest of these is found in `endpoints`, and contains classes and methods
5 | for directly interfacing with the API using its HTTP/JSON protocol.
6 |
7 | The more powerful interface is the `entities` interface, which wraps these
8 | endpoints into python objects with convenience methods for accessing other
9 | entities.
10 | """
11 |
12 | from . import endpoints
13 | from . import entities
14 | from . import types
15 |
16 | __all__ = [
17 | "endpoints",
18 | "entities",
19 | "types",
20 | ]
--------------------------------------------------------------------------------
/datatap/api/endpoints/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Encapsulates all of the raw API requests.
3 |
4 | In most cases, it is preferable to interact with the api through the
5 | `datatap.api.entities` submodule. However, this module can be used
6 | as well.
7 |
8 | ```py
9 | from datatap.api.endpoints import ApiEndpoints
10 |
11 | api_endpoints = ApiEndpoints()
12 |
13 | print(api_endpoints.user.current())
14 | print(api_endpoints.database.list())
15 | ```
16 | """
17 |
18 | from .endpoints import ApiEndpoints
19 |
20 | __all__ = [
21 | "ApiEndpoints"
22 | ]
--------------------------------------------------------------------------------
/datatap/api/endpoints/database_endpoints.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from .request import ApiNamespace
4 | from ..types import JsonDatabase
5 |
6 | class Database(ApiNamespace):
7 | """
8 | Raw API for interacting with database endpoints.
9 | """
10 | def list(self) -> List[JsonDatabase]:
11 | """
12 | Returns a list of `JsonDatabase`s that the current user has access to.
13 | """
14 | return self.get[List[JsonDatabase]]("/database")
15 |
16 | def query_by_uid(self, database: str) -> JsonDatabase:
17 | """
18 | Returns a specific `JsonDatabase`, identified by UID.
19 | """
20 | return self.get[JsonDatabase](f"/database/{database}")
21 |
22 | def query_by_name(self, database_name: str) -> List[JsonDatabase]:
23 | """
24 | Returns a list of `JsonDatabase`s with the name `database_name`.
25 | """
26 | return self.post[List[JsonDatabase]](f"/database/query", { "name": database_name })
--------------------------------------------------------------------------------
/datatap/api/endpoints/dataset_endpoints.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | from datatap.api.types.dataset import JsonDataset
3 |
4 | import tempfile
5 | import ctypes
6 | from typing import Generator
7 | from multiprocessing import Array, set_start_method
8 |
9 | from datatap.droplet import ImageAnnotationJson
10 | from datatap.utils import CacheGenerator
11 |
12 | from .request import ApiNamespace
13 |
14 | set_start_method("spawn", force = True)
15 | process_directory_value = Array(ctypes.c_char, tempfile.mkdtemp(prefix="datatap-").encode("ascii"))
16 | process_directory: str = process_directory_value.value.decode("ascii")
17 |
18 | class Dataset(ApiNamespace):
19 | """
20 | Raw API for interacting with dataset endpoints.
21 | """
22 |
23 | def query(self, database_uid: str, namespace: str, name: str, tag: str) -> JsonDataset:
24 | """
25 | Queries the database for a dataset with given `namespace`, `name`, and `tag`.
26 | Returns a `JsonDataset`.
27 | """
28 | return self.get[JsonDataset](f"/database/{database_uid}/repository/{namespace}/{name}/{tag}")
29 |
30 | def stream_split(
31 | self,
32 | *,
33 | database_uid: str,
34 | namespace: str,
35 | name: str,
36 | uid: str,
37 | split: str,
38 | chunk: int,
39 | nchunks: int
40 | ) -> Generator[ImageAnnotationJson, None, None]:
41 | """
42 | Streams a split of a dataset. Required to stream are the `database_uid`, the full path of the daataset, and the
43 | `split`. Additionally, since this endpoint automatically shards the split, you must provide a chunk number
44 | (`chunk`) and the total number of chunks in the shard (`nchunks`).
45 |
46 | The result is a generator of `ImageAnnotationJson`s.
47 | """
48 | if chunk < 0 or chunk >= nchunks:
49 | raise Exception(f"Invalid chunk specification. {chunk} must be in the range [0, {nchunks})")
50 |
51 | dir_name = f"{process_directory}/{namespace}-{name}-{uid}-{split}-{nchunks}"
52 | file_name = f"{dir_name}/chunk-{chunk}.jsonl"
53 |
54 | def create_stream():
55 | return self.stream[ImageAnnotationJson](
56 | f"/database/{database_uid}/repository/{namespace}/{name}/{uid}/split/{split}/stream",
57 | { "chunk": str(chunk), "nchunks": str(nchunks) }
58 | )
59 |
60 | return CacheGenerator(file_name, create_stream)
--------------------------------------------------------------------------------
/datatap/api/endpoints/endpoints.py:
--------------------------------------------------------------------------------
1 | from datatap.api.endpoints.repository_endpoints import Repository
2 | from typing import Optional
3 |
4 | from .request import Request
5 | from .user_endpoints import User
6 | from .database_endpoints import Database
7 | from .dataset_endpoints import Dataset
8 |
9 | class ApiEndpoints:
10 | """
11 | Class for performing raw API requests.
12 | """
13 |
14 | user: User
15 | """
16 | User endpoints.
17 | """
18 |
19 | database: Database
20 | """
21 | Database endpoints.
22 | """
23 |
24 | repository: Repository
25 | """
26 | Repository endpoints.
27 | """
28 |
29 | dataset: Dataset
30 | """
31 | Dataset endpoints.
32 | """
33 |
34 | _request: Request
35 |
36 | def __init__(self, api_key: Optional[str] = None, uri: Optional[str] = None):
37 | self._request = Request(api_key, uri)
38 |
39 | self.user = User(self._request)
40 | self.database = Database(self._request)
41 | self.repository = Repository(self._request)
42 | self.dataset = Dataset(self._request)
--------------------------------------------------------------------------------
/datatap/api/endpoints/repository_endpoints.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import List
4 | from .request import ApiNamespace
5 | from ..types import JsonRepository
6 |
7 | class Repository(ApiNamespace):
8 | """
9 | Raw API for interacting with repository endpoints.
10 | """
11 | def list(self, database_uid: str) -> List[JsonRepository]:
12 | """
13 | Returns a list of `JsonRepository`s in the database specified by `database_uid`.
14 | """
15 | return self.get[List[JsonRepository]](f"/database/{database_uid}/repository")
16 |
17 | def query(self, database_uid: str, namespace: str, name: str) -> JsonRepository:
18 | """
19 | Queries the database for the repository with a given `namespace` and `name`, and
20 | returns the corresponding `JsonRepository` list.
21 | """
22 | return self.get[JsonRepository](f"/database/{database_uid}/repository/{namespace}/{name}")
23 |
--------------------------------------------------------------------------------
/datatap/api/endpoints/request.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | from datatap.utils.environment import Environment
3 |
4 | import json
5 | from base64 import b64encode
6 | from urllib.parse import urljoin
7 | from typing import Generator, Optional, Dict, TypeVar, Generic, Type, Any, cast
8 |
9 | import requests
10 |
11 | _T = TypeVar("_T")
12 | _S = TypeVar("_S")
13 |
14 | class GetRequester(Generic[_T]):
15 | """
16 | A callable-class for performing typed `GET` requests to the API.
17 | """
18 | api_key: str
19 | uri: str
20 |
21 | def __init__(self, api_key: str, base_uri: str):
22 | self.api_key = api_key
23 | self.uri = base_uri
24 |
25 | def __getitem__(self, s: Type[_S]) -> GetRequester[_S]:
26 | return cast(GetRequester[_S], self)
27 |
28 | def __call__(self, endpoint: str, query_params: Optional[Dict[str, str]] = None) -> _T:
29 | qualified_uri = urljoin(self.uri, "/api/" + endpoint)
30 | encoded_api_key = b64encode(bytes(self.api_key, "ascii")).decode("ascii")
31 |
32 | response = requests.get(
33 | qualified_uri,
34 | params=query_params,
35 | headers={
36 | "Authorization": f"Bearer {encoded_api_key}"
37 | },
38 | )
39 |
40 | if not response.ok:
41 | error: str
42 | try:
43 | error = response.json()["error"]
44 | except:
45 | error = response.content.decode("ascii")
46 | raise Exception(error)
47 |
48 | return response.json()
49 |
50 | class PostRequester(Generic[_T]):
51 | """
52 | A callable-class for performing typed `Post` requests to the API.
53 | """
54 | api_key: str
55 | uri: str
56 |
57 | def __init__(self, api_key: str, base_uri: str):
58 | self.api_key = api_key
59 | self.uri = base_uri
60 |
61 | def __getitem__(self, s: Type[_S]) -> PostRequester[_S]:
62 | return cast(PostRequester[_S], self)
63 |
64 | def __call__(self, endpoint: str, body: Dict[str, Any], query_params: Optional[Dict[str, str]] = None) -> _T:
65 | qualified_uri = urljoin(self.uri, "/api/" + endpoint)
66 | encoded_api_key = b64encode(bytes(self.api_key, "ascii")).decode("ascii")
67 |
68 | response = requests.post(
69 | qualified_uri,
70 | params=query_params,
71 | headers={
72 | "Authorization": f"Bearer {encoded_api_key}"
73 | },
74 | json=body,
75 | )
76 |
77 | if not response.ok:
78 | error: str
79 | try:
80 | error = response.json()["error"]
81 | except:
82 | error = response.content.decode("ascii")
83 | raise Exception(error)
84 |
85 | return response.json()
86 |
87 | class StreamRequester(Generic[_T]):
88 | """
89 | A callable-class for performing typed stream requests to the API.
90 | """
91 | api_key: str
92 | uri: str
93 |
94 | def __init__(self, api_key: str, uri: str):
95 | self.api_key = api_key
96 | self.uri = uri
97 |
98 | def __getitem__(self, s: Type[_S]) -> StreamRequester[_S]:
99 | return cast(StreamRequester[_S], self)
100 |
101 | def __call__(self, endpoint: str, query_params: Optional[Dict[str, str]] = None) -> Generator[_T, None, None]:
102 | qualified_uri = urljoin(self.uri, "/api/" + endpoint)
103 | encoded_api_key = b64encode(bytes(self.api_key, "ascii")).decode("ascii")
104 |
105 | response = requests.get(
106 | qualified_uri,
107 | params=query_params,
108 | headers={
109 | "Authorization": f"Bearer {encoded_api_key}"
110 | },
111 | stream=True
112 | )
113 |
114 | if not response.ok:
115 | error: str
116 | try:
117 | error = response.json()["error"]
118 | except:
119 | error = response.content.decode("ascii")
120 | raise Exception(error)
121 |
122 | for line in response.iter_lines(decode_unicode=True):
123 | yield json.loads(line)
124 |
125 |
126 | class Request:
127 | """
128 | A helper class that encapsulates the logic for making requests to the
129 | dataTap server. It is passed an optional `api_key`, which defaults to
130 | the `DATATAP_API_KEY` environment variable. It can also be passed a base
131 | `uri` for connecting to a different dataTap server (such as through a
132 | proxy).
133 | """
134 |
135 | get: GetRequester[Any]
136 | """
137 | Function for typesafe `GET` requests.
138 | """
139 |
140 | post: PostRequester[Any]
141 | """
142 | Function for typesafe `POST` requests.
143 | """
144 |
145 | stream: StreamRequester[Any]
146 | """
147 | Function for typesafe streaming requests.
148 | """
149 |
150 | def __init__(self, api_key: Optional[str] = None, base_uri: Optional[str] = None):
151 | api_key = api_key or Environment.API_KEY
152 | base_uri = base_uri or Environment.BASE_URI
153 | if api_key is None:
154 | raise Exception("No API key available. Either provide it or use the [DATATAP_API_KEY] environment variable")
155 |
156 | self.get = GetRequester[Any](api_key, base_uri)
157 | self.post = PostRequester[Any](api_key, base_uri)
158 | self.stream = StreamRequester[Any](api_key, base_uri)
159 |
160 | class ApiNamespace:
161 | """
162 | Base class for API endpoints.
163 | """
164 | def __init__(self, request: Request):
165 | self.request = request
166 | self.get = request.get
167 | self.post = request.post
168 | self.stream = request.stream
169 |
--------------------------------------------------------------------------------
/datatap/api/endpoints/user_endpoints.py:
--------------------------------------------------------------------------------
1 | from .request import ApiNamespace
2 | from ..types import JsonUser
3 |
4 | class User(ApiNamespace):
5 | """
6 | Raw API for interacting with user endpoints.
7 | """
8 | def current(self) -> JsonUser:
9 | """
10 | Returns a `JsonUser` representing the logged in user.
11 | """
12 | return self.get[JsonUser]("/user")
13 |
--------------------------------------------------------------------------------
/datatap/api/entities/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The `datatap.api.entities` submodule contains several enttiies
3 | that provide a user-friendly abstraction for the dataTap API.
4 | """
5 |
6 | from .api import Api
7 |
8 | from .user import User
9 | from .database import Database
10 | from .dataset import AnyDataset, Dataset
11 | from .repository import Repository, Tag, Split
12 |
13 | __all__ = [
14 | "Api",
15 | "User",
16 | "Database",
17 | "AnyDataset",
18 | "Dataset",
19 | "Repository",
20 | "Tag",
21 | "Split",
22 | ]
--------------------------------------------------------------------------------
/datatap/api/entities/api.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional, Union, overload
2 |
3 | from typing_extensions import Literal
4 |
5 | from datatap.utils.helpers import assert_one
6 |
7 | from .user import User
8 | from .database import Database
9 | from ..endpoints import ApiEndpoints
10 |
11 | class Api:
12 | """
13 | The `Api` object is the primary method of interacting with the dataTap API.
14 |
15 | The `Api` constructor takes two optional arguments.
16 |
17 | The first, `api_key`, should be the current user's personal API key. In
18 | order to encourage good secret practices, this class will use the value
19 | found in the `DATATAP_API_KEY` if no key is passed in. Consider using
20 | environment variables or another secret manager for your API keys.
21 |
22 | The second argument is `uri`. This should only be used if you would like
23 | to target a different API server than the default. For instance, if you
24 | are using a proxy to reach the API, you can use the `uri` argument to
25 | point toward your proxy.
26 |
27 | This object encapsulates most of the logic for interacting with API.
28 | For instance, to get a list of all datasets that a user has access to,
29 | you can run
30 |
31 | ```py
32 | from datatap import Api
33 |
34 | api = Api()
35 | print([
36 | dataset
37 | for database in api.get_database_list()
38 | for dataset in database.get_dataset_list()
39 | ])
40 | ```
41 |
42 | For more details on the functionality provided by the Api object, take
43 | a look at its documentation.
44 | """
45 | def __init__(self, api_key: Optional[str] = None, uri: Optional[str] = None):
46 | self.endpoints = ApiEndpoints(api_key, uri)
47 |
48 | def get_current_user(self) -> User:
49 | """
50 | Returns the current logged-in user.
51 | """
52 | return User.from_json(self.endpoints, self.endpoints.user.current())
53 |
54 | def get_database_list(self) -> List[Database]:
55 | """
56 | Returns a list of all databases that the current user has access to.
57 | """
58 | return [
59 | Database.from_json(self.endpoints, json_db)
60 | for json_db in self.endpoints.database.list()
61 | ]
62 |
63 | def get_default_database(self) -> Database:
64 | """
65 | Returns the default database for the user (this defaults to the public
66 | database).
67 | """
68 |
69 | # TODO(zwade): Have a way of specifying a per-user default
70 | current_user = self.get_current_user()
71 | if current_user.default_database is None:
72 | raise Exception("Trying to find the default database, but none is specified")
73 |
74 | return self.get_database_by_uid(current_user.default_database)
75 |
76 | def get_database_by_uid(self, uid: str) -> Database:
77 | """
78 | Queries a database by its UID and returns it.
79 | """
80 | return Database.from_json(self.endpoints, self.endpoints.database.query_by_uid(uid))
81 |
82 |
83 | @overload
84 | def get_database_by_name(self, name: str, allow_multiple: Literal[True]) -> List[Database]: ...
85 | @overload
86 | def get_database_by_name(self, name: str, allow_multiple: Literal[False] = False) -> Database: ...
87 | def get_database_by_name(self, name: str, allow_multiple: bool = False) -> Union[Database, List[Database]]:
88 | """
89 | Queries a database by its name and returns it. If `allow_multiple` is true, it will return
90 | a list of databases.
91 | """
92 | database_list = [
93 | Database.from_json(self.endpoints, database)
94 | for database in self.endpoints.database.query_by_name(name)
95 | ]
96 |
97 | if allow_multiple:
98 | return database_list
99 | else:
100 | return assert_one(database_list)
101 |
--------------------------------------------------------------------------------
/datatap/api/entities/database.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | from datatap.api.entities.dataset import AnyDataset
3 | from typing import Any, List, overload
4 |
5 | from datatap.utils import basic_repr
6 |
7 | from .repository import Repository
8 | from ..endpoints import ApiEndpoints
9 | from ..types import JsonDatabase, JsonDatabaseOptions
10 |
11 | class Database:
12 | """
13 | Represents a database. This database could either be the public database,
14 | or a user's private database that they have connected to the dataTap
15 | platform.
16 |
17 | This class provides utilites for viewing and updating the database's
18 | configuration, as well as inspecting its contents.
19 | """
20 | _endpoints: ApiEndpoints
21 |
22 | uid: str
23 | """
24 | The UID of this database.
25 | """
26 |
27 | name: str
28 | """
29 | The name of this database.
30 | """
31 |
32 | connection_options: JsonDatabaseOptions
33 | """
34 | How this database is configured. Sensitive details, such as database
35 | credentials, are omitted.
36 | """
37 |
38 | @staticmethod
39 | def from_json(endpoints: ApiEndpoints, json: JsonDatabase) -> Database:
40 | """
41 | Creates a `Database` from a `JsonDatabase`.
42 | """
43 | return Database(
44 | endpoints,
45 | uid = json["uid"],
46 | name = json["name"],
47 | connection_options = json["connectionOptions"]
48 | )
49 |
50 | def __init__(self, endpoints: ApiEndpoints, uid: str, *, name: str, connection_options: JsonDatabaseOptions):
51 | self._endpoints = endpoints
52 | self.uid = uid
53 | self.name = name
54 | self.connection_options = connection_options
55 |
56 | def get_repository_list(self) -> List[Repository]:
57 | """
58 | Returns a list of all `Repository`s that are stored in this database.
59 | """
60 | return [
61 | Repository.from_json(self._endpoints, self.uid, repository_json)
62 | for repository_json in self._endpoints.repository.list(self.uid)
63 | ]
64 |
65 |
66 | @overload
67 | def get_repository(self, slug: str) -> Repository: ...
68 | @overload
69 | def get_repository(self, namespace: str, name: str) -> Repository: ...
70 | def get_repository(self, *args: str, **kwargs: Any) -> Repository:
71 | """
72 | Queries a `Repository` by its namespace and name, or via its slug (namespace/name).
73 | """
74 | if len(kwargs) > 0:
75 | raise ValueError("get_repository is positional-only")
76 | elif len(args) == 1:
77 | namespace, name = args[0].split("/")
78 | else:
79 | namespace, name = args
80 |
81 | return Repository.from_json(self._endpoints, self.uid, self._endpoints.repository.query(self.uid, namespace, name))
82 |
83 | @overload
84 | def get_dataset(self, slug: str) -> AnyDataset: ...
85 | @overload
86 | def get_dataset(self, namespace: str, name: str, tag: str) -> AnyDataset: ...
87 | def get_dataset(self, *args: str, **kwargs: Any) -> AnyDataset:
88 | """
89 | Queries a `Dataset` by its namespace, name, and tag, or via its slug (namespace/name:tag).
90 | """
91 | if len(kwargs) > 0:
92 | raise ValueError("get_repository is positional-only")
93 | elif len(args) == 1:
94 | repo_slug, tag = args[0].split(":")
95 | repo = self.get_repository(repo_slug)
96 | else:
97 | namespace, name, tag = args
98 | repo = self.get_repository(namespace, name)
99 |
100 | return repo.get_dataset(tag)
101 |
102 | def __repr__(self):
103 | return basic_repr("Database", self.uid, name = self.name)
104 |
--------------------------------------------------------------------------------
/datatap/api/entities/dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | from datatap.api.types.dataset import JsonDatasetRepository
3 |
4 | from typing import Generator, Generic, List, TypeVar, Union, overload
5 |
6 | from datatap.droplet import ImageAnnotation, VideoAnnotation
7 | from datatap.template import ImageAnnotationTemplate, VideoAnnotationTemplate
8 | from datatap.utils import basic_repr
9 |
10 | from ..endpoints import ApiEndpoints
11 | from ..types import JsonDataset
12 |
13 | T = TypeVar("T", ImageAnnotationTemplate, VideoAnnotationTemplate)
14 |
15 | class DatasetRepository:
16 | """
17 | An object representing the repository a dataset came from.
18 | """
19 |
20 | name: str
21 | """
22 | The name of the repository.
23 | """
24 |
25 | namespace: str
26 | """
27 | The namespace of the repository.
28 | """
29 |
30 | @staticmethod
31 | def from_json(json: JsonDatasetRepository) -> DatasetRepository:
32 | """
33 | Creates a new `DatasetRepository` from a `JsonDatasetRepository`.
34 | """
35 | return DatasetRepository(name = json["name"], namespace = json["namespace"])
36 |
37 | def __init__(self, *, name: str, namespace: str):
38 | self.name = name
39 | self.namespace = namespace
40 |
41 | class Dataset(Generic[T]):
42 | """
43 | Represents a concrete version of a dataset. Critically, `Dataset`s cannot be changed
44 | once they're created.
45 |
46 | For reproducable training, ensure that you store the specific `Dataset` used
47 | during training.
48 | """
49 | _endpoints: ApiEndpoints
50 |
51 | uid: str
52 | """
53 | The UID of this `Dataset`.
54 | """
55 |
56 | database: str
57 | """
58 | The UID of the database in which this dataset lives.
59 | """
60 |
61 | repository: DatasetRepository
62 | """
63 | The repository this dataset belongs to.
64 | """
65 |
66 | splits: List[str]
67 | """
68 | A list of all the splits that this dataset has. By default, this will be
69 | `["training", "validation"]`.
70 | """
71 |
72 | template: T
73 | """
74 | The template that all annotations in this dataset version adhere to.
75 | """
76 |
77 | @staticmethod
78 | def from_json(endpoints: ApiEndpoints, json: JsonDataset) -> AnyDataset:
79 | """
80 | Creates a new `Dataset` from a `JsonDataset`.
81 | """
82 | template_json = json["template"]
83 | template: Union[ImageAnnotationTemplate, VideoAnnotationTemplate]
84 |
85 | if template_json["kind"] == "ImageAnnotationTemplate":
86 | template = ImageAnnotationTemplate.from_json(template_json)
87 | elif template_json["kind"] == "VideoAnnotationTemplate":
88 | template = VideoAnnotationTemplate.from_json(template_json)
89 | else:
90 | raise ValueError(f"Unknown template kind: {template_json['kind']}")
91 |
92 | return Dataset(
93 | endpoints,
94 | uid = json["uid"],
95 | database = json["database"],
96 | repository = DatasetRepository.from_json(json["repository"]),
97 | splits = json["splits"],
98 | template = template
99 | )
100 |
101 | def __init__(
102 | self,
103 | endpoints: ApiEndpoints,
104 | uid: str,
105 | *,
106 | database: str,
107 | repository: DatasetRepository,
108 | splits: List[str],
109 | template: Union[ImageAnnotationTemplate, VideoAnnotationTemplate]
110 | ):
111 | self._endpoints = endpoints
112 | self.uid = uid
113 | self.database = database
114 | self.repository = repository
115 | self.splits = splits
116 | self.template = template
117 |
118 | @overload
119 | def stream_split(
120 | self: Dataset[ImageAnnotationTemplate],
121 | split: str
122 | ) -> Generator[ImageAnnotation, None, None]: ...
123 | @overload
124 | def stream_split(
125 | self: Dataset[ImageAnnotationTemplate],
126 | split: str,
127 | chunk: int,
128 | nchunks: int
129 | ) -> Generator[ImageAnnotation, None, None]: ...
130 | @overload
131 | def stream_split(
132 | self: Dataset[VideoAnnotationTemplate],
133 | split: str
134 | ) -> Generator[VideoAnnotation, None, None]: ...
135 | @overload
136 | def stream_split(
137 | self: Dataset[VideoAnnotationTemplate],
138 | split: str,
139 | chunk: int,
140 | nchunks: int
141 | ) -> Generator[VideoAnnotation, None, None]: ...
142 | def stream_split(
143 | self,
144 | split: str,
145 | chunk: int = 0,
146 | nchunks: int = 1
147 | ) -> Generator[Union[ImageAnnotation, VideoAnnotation], None, None]:
148 | """
149 | Streams a specific split of this dataset from the database. All yielded annotations will adhere to this
150 | dataset's annotation template.
151 |
152 | If `chunk` and `nchunks` are omitted, then the full split will be streamed. Otherwise, the split will be
153 | broken into `nchunks` pieces, and only the chunk identified by `chunk` will be streamed.
154 | """
155 | for droplet in self._endpoints.dataset.stream_split(
156 | database_uid = self.database,
157 | namespace = self.repository.namespace,
158 | name = self.repository.name,
159 | uid = self.uid,
160 | split = split,
161 | chunk = chunk,
162 | nchunks = nchunks,
163 | ):
164 | if isinstance(self.template, ImageAnnotationTemplate):
165 | yield ImageAnnotation.from_json(droplet)
166 | elif isinstance(self.template, VideoAnnotationTemplate): # type: ignore - isinstance is excessive
167 | yield VideoAnnotation.from_json(droplet)
168 | else:
169 | raise ValueError(f"Unknown template kind: {type(self.template)}")
170 |
171 | def get_stable_identifier(self) -> str:
172 | return f"{self.repository.namespace}/{self.repository.name}:{self.uid}"
173 |
174 | def __repr__(self) -> str:
175 | return basic_repr(
176 | "Dataset",
177 | self.get_stable_identifier(),
178 | database = self.database,
179 | splits = self.splits
180 | )
181 |
182 | AnyDataset = Union[Dataset[ImageAnnotationTemplate], Dataset[VideoAnnotationTemplate]]
183 |
--------------------------------------------------------------------------------
/datatap/api/entities/repository.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from datetime import datetime
4 | from typing import Sequence
5 |
6 | from datatap.utils import basic_repr
7 |
8 | from .dataset import AnyDataset, Dataset
9 | from ..types import JsonRepository, JsonSplit, JsonTag
10 | from ..endpoints import ApiEndpoints
11 |
12 | class Split:
13 | """
14 | Represents the splits available for a given dataset.
15 | """
16 |
17 | split: str
18 | """
19 | The kind of the split (e.g, "training" or "validation").
20 | """
21 |
22 | annotation_count: int
23 | """
24 | The number of annotations available in this split.
25 | """
26 |
27 | @staticmethod
28 | def from_json(json: JsonSplit) -> Split:
29 | """
30 | Creates a `Split` from a `JsonSplit`
31 | """
32 | return Split(json["split"], json["annotationCount"])
33 |
34 | def __init__(self, split: str, annotation_count: int):
35 | self.split = split
36 | self.annotation_count = annotation_count
37 |
38 | def __repr__(self) -> str:
39 | return basic_repr("Split", self.split, annotation_count = self.annotation_count)
40 |
41 | class Tag:
42 | """
43 | Represents a single tag that may be accessed in this repository.
44 | """
45 |
46 | tag: str
47 | """
48 | A slug representing this tag (such as "latest").
49 | """
50 |
51 | dataset: str
52 | """
53 | The uid of the dataset to which this tag points.
54 | """
55 |
56 | updated_at: datetime
57 | """
58 | When this tag was most recently updated.
59 | """
60 |
61 | splits: Sequence[Split]
62 | """
63 | A list of splits available on this tag.
64 | """
65 |
66 | @staticmethod
67 | def from_json(json: JsonTag) -> Tag:
68 | """
69 | Creates a `Tag` from a `JsonTag`.
70 | """
71 | return Tag(
72 | json["tag"],
73 | json["dataset"],
74 | datetime.fromtimestamp(json["updatedAt"] / 1000),
75 | [Split.from_json(split) for split in json["splits"]]
76 | )
77 |
78 | def __init__(self, tag: str, dataset: str, updated_at: datetime, splits: Sequence[Split]):
79 | self.tag = tag
80 | self.dataset = dataset
81 | self.updated_at = updated_at
82 | self.splits = splits
83 |
84 | def __repr__(self) -> str:
85 | return basic_repr("Tag", self.tag, dataset = self.dataset, splits = self.splits)
86 |
87 | class Repository:
88 | """
89 | Represents a repository that contains one or more datasets.
90 | """
91 | _endpoints: ApiEndpoints
92 | _database: str
93 |
94 | name: str
95 | """
96 | The name of this repository.
97 | """
98 |
99 | namespace: str
100 | """
101 | The namespace of this repository.
102 | """
103 |
104 | tags: Sequence[Tag]
105 | """
106 | The tags available for this repository.
107 | """
108 |
109 | @staticmethod
110 | def from_json(endpoints: ApiEndpoints, database: str, json: JsonRepository) -> Repository:
111 | """
112 | Creates a `Dataset` from a `JsonDataset`.
113 | """
114 | return Repository(
115 | endpoints,
116 | database,
117 | name = json["name"],
118 | namespace = json["namespace"],
119 | tags = [Tag.from_json(tag) for tag in json["tags"]],
120 | )
121 |
122 | def __init__(self, endpoints: ApiEndpoints, database: str, *, name: str, namespace: str, tags: Sequence[Tag]):
123 | self._endpoints = endpoints
124 | self._database = database
125 | self.name = name
126 | self.namespace = namespace
127 | self.tags = tags
128 |
129 | def get_dataset(self, tag: str) -> AnyDataset:
130 | """
131 | Fetches dataset by its tag (or UID).
132 | """
133 | return Dataset.from_json(
134 | self._endpoints,
135 | self._endpoints.dataset.query(self._database, self.namespace, self.name, tag)
136 | )
137 |
138 | def __repr__(self) -> str:
139 | return basic_repr("Repository", name = self.name, namespace = self.namespace, tags = [tag.tag for tag in self.tags])
140 |
--------------------------------------------------------------------------------
/datatap/api/entities/user.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | from typing import Optional
3 |
4 | from datatap.utils import basic_repr
5 |
6 | from ..endpoints import ApiEndpoints
7 | from ..types import JsonUser
8 |
9 | class User:
10 | """
11 | Represents a user account in the dataTap platform.
12 | """
13 |
14 | _endpoints: ApiEndpoints
15 |
16 | uid: str
17 | """
18 | The user's UID.
19 | """
20 |
21 | username: str
22 | """
23 | The user's username.
24 | """
25 |
26 | email: str
27 | """
28 | The user's email address.
29 | """
30 |
31 | default_database: Optional[str]
32 | """
33 | The user's default database
34 | """
35 |
36 | @staticmethod
37 | def from_json(endpoints: ApiEndpoints, json: JsonUser) -> User:
38 | """
39 | Creates a `User` from a `JsonUser`.
40 | """
41 | return User(
42 | endpoints,
43 | json["uid"],
44 | username = json["username"],
45 | email = json["email"],
46 | default_database = json["defaultDatabase"]
47 | )
48 |
49 | def __init__(self, endpoints: ApiEndpoints, uid: str, *, username: str, email: str, default_database: Optional[str]):
50 | self._endpoints = endpoints
51 | self.uid = uid
52 | self.username = username
53 | self.email = email
54 | self.default_database = default_database
55 |
56 | def __repr__(self) -> str:
57 | return basic_repr("User", self.uid, username = self.username, email = self.email)
--------------------------------------------------------------------------------
/datatap/api/types/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The `datatap.api.types` library contains all of the types returned by the API.
3 | """
4 |
5 | from .database import JsonDatabaseOptions, JsonDatabase
6 | from .dataset import JsonDataset
7 | from .repository import JsonRepository, JsonTag, JsonSplit
8 | from .user import JsonUser
9 |
10 | __all__ = [
11 | "JsonDatabaseOptions",
12 | "JsonDatabase",
13 | "JsonDataset",
14 | "JsonRepository",
15 | "JsonTag",
16 | "JsonSplit",
17 | "JsonUser",
18 | ]
--------------------------------------------------------------------------------
/datatap/api/types/database.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 | from typing_extensions import Literal, TypedDict
3 |
4 | class JsonDatabaseOptionsDirect(TypedDict):
5 | """
6 | Configuration options for a database that the server connects to directly.
7 | """
8 | kind: Literal["direct"]
9 | protocol: Union[Literal["neo4j"], Literal["neo4j+s"]]
10 | host: str
11 | port: int
12 |
13 | JsonDatabaseOptions = JsonDatabaseOptionsDirect
14 |
15 | class JsonDatabase(TypedDict):
16 | """
17 | The API type of a database.
18 | """
19 | uid: str
20 | name: str
21 | connectionOptions: JsonDatabaseOptions
22 |
23 |
--------------------------------------------------------------------------------
/datatap/api/types/dataset.py:
--------------------------------------------------------------------------------
1 | from typing import List, Union
2 |
3 | from datatap.template.image_annotation_template import \
4 | ImageAnnotationTemplateJson
5 | from datatap.template.video_annotation_template import \
6 | VideoAnnotationTemplateJson
7 | from typing_extensions import TypedDict
8 |
9 |
10 | class JsonDatasetRepository(TypedDict):
11 | namespace: str
12 | name: str
13 |
14 | class JsonDataset(TypedDict):
15 | """
16 | The API type of a dataset.
17 | """
18 | uid: str
19 | database: str
20 | repository: JsonDatasetRepository
21 | template: Union[ImageAnnotationTemplateJson, VideoAnnotationTemplateJson]
22 | splits: List[str]
23 |
24 |
--------------------------------------------------------------------------------
/datatap/api/types/repository.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 | from typing_extensions import TypedDict
3 |
4 | class JsonSplit(TypedDict):
5 | split: str
6 | annotationCount: int
7 |
8 | class JsonTag(TypedDict):
9 | tag: str
10 | dataset: str
11 | updatedAt: int
12 | splits: List[JsonSplit]
13 |
14 | class JsonRepository(TypedDict):
15 | """
16 | The API type of a repository.
17 | """
18 | namespace: str
19 | name: str
20 | tags: List[JsonTag]
21 |
22 |
--------------------------------------------------------------------------------
/datatap/api/types/user.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from typing_extensions import TypedDict
3 |
4 | class JsonUser(TypedDict):
5 | """
6 | The API type of an individual user.
7 | """
8 | uid: str
9 | username: str
10 | email: str
11 | defaultDatabase: Optional[str]
12 |
13 |
--------------------------------------------------------------------------------
/datatap/comet/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Optional, Sequence
4 |
5 | try:
6 | from comet_ml import APIExperiment, Experiment
7 | from comet_ml.exceptions import NotFound
8 | except ImportError:
9 | from datatap.utils import pprint
10 | pprint("{yellow}Unable to import comet_ml.")
11 |
12 | from datatap.api.entities import AnyDataset
13 | from datatap.droplet.image_annotation import ImageAnnotation
14 |
15 |
16 | def init_experiment(experiment: Experiment, dataset: AnyDataset):
17 | """
18 | Initializes an experiment by logging the template and the validation set ground truths if they have not already
19 | been logged.
20 | """
21 | api_experiment = APIExperiment(previous_experiment = experiment.id)
22 |
23 | if get_dataset(experiment) is None:
24 | log_dataset(experiment, dataset)
25 |
26 | try:
27 | api_experiment.get_asset("datatap/template.json")
28 | except NotFound:
29 | experiment.log_asset_data(
30 | [annotation.to_json() for annotation in dataset.stream_split("validation")],
31 | name = "datatap/validation/ground_truth.json"
32 | )
33 |
34 | experiment.log_asset_data(
35 | dataset.template.to_json(),
36 | name = "datatap/template.json"
37 | )
38 |
39 | def log_dataset(experiment: Experiment, dataset: AnyDataset):
40 | experiment.log_other("datatap-dataset", dataset.get_stable_identifier())
41 |
42 | def get_dataset(experiment: Experiment) -> Optional[str]:
43 | api_experiment = APIExperiment(previous_experiment = experiment.id)
44 | others = api_experiment.get_others_summary()
45 | dataset_metrics = [other for other in others if other["name"] == "datatap-dataset"]
46 |
47 | if len(dataset_metrics) == 0:
48 | return None
49 |
50 | return dataset_metrics[0].get("valueCurrent", None)
51 |
52 | def log_validation_proposals(experiment: Experiment, proposals: Sequence[ImageAnnotation]):
53 | experiment.log_asset_data(
54 | [annotation.to_json() for annotation in proposals],
55 | name = "datatap/validation/proposals.json"
56 | )
57 |
--------------------------------------------------------------------------------
/datatap/droplet/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module provides classes for working with ML data. Specifically, it provides methods for creating new ML data
3 | objects, converting ML data objects to and from the JSON droplet format, and manipulating ML data objects.
4 | """
5 |
6 | from .bounding_box import BoundingBox, BoundingBoxJson
7 | from .class_annotation import ClassAnnotation, ClassAnnotationJson
8 | from .frame_annotation import FrameAnnotation, FrameAnnotationJson
9 | from .image import Image, ImageJson
10 | from .image_annotation import ImageAnnotation, ImageAnnotationJson
11 | from .instance import Instance, InstanceJson
12 | from .keypoint import Keypoint, KeypointJson
13 | from .multi_instance import MultiInstance, MultiInstanceJson
14 | from .segmentation import Segmentation, SegmentationJson
15 | from .video import Video, VideoJson
16 | from .video_annotation import VideoAnnotation, VideoAnnotationJson
17 |
18 | __all__ = [
19 | "BoundingBox",
20 | "BoundingBoxJson",
21 | "ClassAnnotation",
22 | "ClassAnnotationJson",
23 | "FrameAnnotation",
24 | "FrameAnnotationJson",
25 | "Image",
26 | "ImageJson",
27 | "ImageAnnotation",
28 | "ImageAnnotationJson",
29 | "Instance",
30 | "InstanceJson",
31 | "Keypoint",
32 | "KeypointJson",
33 | "MultiInstance",
34 | "MultiInstanceJson",
35 | "Segmentation",
36 | "SegmentationJson",
37 | "Video",
38 | "VideoJson",
39 | "VideoAnnotation",
40 | "VideoAnnotationJson",
41 | ]
42 |
--------------------------------------------------------------------------------
/datatap/droplet/_media.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import sys
4 | from io import BytesIO
5 | from typing import Sequence
6 |
7 | try:
8 | import boto3
9 | except ImportError:
10 | boto3 = None
11 |
12 | try:
13 | import requests
14 | except ImportError:
15 | requests = None
16 |
17 | from ..utils import basic_repr
18 |
19 | class Media:
20 | """
21 | The `Media` class acts as a base class for all loadable media.
22 | """
23 |
24 | paths: Sequence[str]
25 | """
26 | A sequence of URIs where the media can be found. The loader
27 | will try them in order until it finds one it can load.
28 |
29 | Supported schemes include `http(s):`, `s3:`
30 | """
31 |
32 | def __init__(self, *, paths: Sequence[str]):
33 | self.paths = paths
34 |
35 | def __repr__(self) -> str:
36 | return basic_repr("Media", paths = self.paths)
37 |
38 | def __eq__(self, other: object) -> bool:
39 | if not isinstance(other, Media):
40 | return NotImplemented
41 | return self.paths == other.paths
42 |
43 | def load(self, quiet: bool = False, attempts: int = 3, allow_local: bool = False) -> BytesIO:
44 | """
45 | Attempts to load the Video file specified by this reference.
46 | Resolution happpens in this order:
47 |
48 | 1. Load from an internal cache (either from a previous load, or from `from_pil`)
49 | 2. Try loading every path in order, returning once one loads
50 |
51 | Warning! `load` may attempt to read from the local file system or from private
52 | networks. Please ensure that the annotation you are loading is trusted.
53 | """
54 | for path in self.paths:
55 | for i in range(attempts):
56 | try:
57 | scheme, file_name, *_ = path.split(":")
58 | if scheme.lower() == "s3" and boto3 is not None:
59 | bucket_name, *path_components = [
60 | component
61 | for component in file_name.split("/")
62 | if component != ""
63 | ]
64 | path_name = "/".join(path_components)
65 |
66 | s3 = boto3.resource("s3") # type: ignore
67 | file_obj = s3.Object(bucket_name, path_name) # type: ignore
68 | data: bytes = file_obj.get()["Body"].read() # type: ignore
69 | elif scheme.lower() in ["http", "https"] and requests is not None:
70 | response = requests.get(path)
71 | data = response.content
72 | elif scheme.lower() == "file" and allow_local:
73 | with open(file_name, "rb") as file_obj:
74 | data = file_obj.read()
75 | else:
76 | raise NotImplementedError(f"Unsupported scheme: {scheme}")
77 |
78 | return BytesIO(data)
79 | except Exception as e:
80 | if not quiet:
81 | print(f"Cannot load {type(self).__name__} {path}, with error {str(e)}, attempt ({i + 1}/{attempts})", file = sys.stderr)
82 |
83 | raise FileNotFoundError(f"All paths for {type(self).__name__} failed to load", self.paths)
84 |
--------------------------------------------------------------------------------
/datatap/droplet/attributes.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Optional, Sequence, Union
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..utils import basic_repr
8 |
9 | class _AttributeValueOptional(TypedDict, total = False):
10 | confidence: float
11 |
12 | class AttributeValueJson(_AttributeValueOptional, TypedDict):
13 | """
14 | The serialized JSON representation of an attribute candidate value.
15 | """
16 | value: str
17 |
18 | AttributeValuesJson = Union[Sequence[AttributeValueJson], str]
19 |
20 | class AttributeValue:
21 | value: str
22 | confidence: Optional[float]
23 |
24 | def __init__(self, value: str, *, confidence: Optional[float] = None) -> None:
25 | self.value = value
26 | self.confidence = confidence
27 |
28 | def to_json(self) -> AttributeValueJson:
29 | json = AttributeValueJson(value=self.value)
30 | if self.confidence is not None:
31 | json["confidence"] = self.confidence
32 | return json
33 |
34 | @staticmethod
35 | def from_json(json: AttributeValueJson) -> AttributeValue:
36 | return AttributeValue(json["value"], confidence=json.get("confidence"))
37 |
38 | class AttributeValues:
39 | content: Sequence[AttributeValue]
40 |
41 | @staticmethod
42 | def from_json(json: AttributeValuesJson) -> AttributeValues:
43 | """
44 | Constructs a `AttributeValues` from a `AttributeValuesJson`.
45 | """
46 | if isinstance(json, str):
47 | return AttributeValues([AttributeValue(json)])
48 | return AttributeValues([AttributeValue.from_json(c) for c in json])
49 |
50 | def __init__(self, content: Sequence[AttributeValue]):
51 | self.content = content
52 |
53 | def __repr__(self) -> str:
54 | return basic_repr("AttributeValues", self.content)
55 |
56 | def to_json(self) -> Sequence[AttributeValueJson]:
57 | return [c.to_json() for c in self.content]
58 |
59 | def most_likely(self) -> Optional[AttributeValue]:
60 | """
61 | Returns the most likely value of this specific attribute
62 | """
63 | if len(self.content) == 0:
64 | return None
65 |
66 | return max(self.content, key=lambda c: c.confidence or 1.0)
67 |
--------------------------------------------------------------------------------
/datatap/droplet/bounding_box.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Optional
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..geometry import Rectangle, RectangleJson
8 | from ..utils import basic_repr
9 |
10 | class _BoundingBoxJsonOptional(TypedDict, total = False):
11 | confidence: float
12 |
13 | class BoundingBoxJson(_BoundingBoxJsonOptional, TypedDict):
14 | """
15 | The serialized JSON representation of a bounding box.
16 | """
17 | rectangle: RectangleJson
18 |
19 | class BoundingBox:
20 | """
21 | A `BoundingBox` represents the area within an image taken up by a detection,
22 | specified as an axis-aligned rectangle.
23 | """
24 |
25 | rectangle: Rectangle
26 | """
27 | The area within the image where the corresponding detection appears.
28 | """
29 |
30 | confidence: Optional[float]
31 | """
32 | The confidence associated with this bounding box.
33 | """
34 |
35 | @staticmethod
36 | def from_json(json: BoundingBoxJson) -> BoundingBox:
37 | """
38 | Constructs a `BoundingBox` from a `BoundingBoxJson`.
39 | """
40 | return BoundingBox(
41 | Rectangle.from_json(json["rectangle"]),
42 | confidence = json.get("confidence")
43 | )
44 |
45 | def __init__(self, rectangle: Rectangle, *, confidence: Optional[float] = None):
46 | self.rectangle = rectangle
47 | self.confidence = confidence
48 |
49 | self.rectangle.assert_valid()
50 |
51 | def __repr__(self) -> str:
52 | return basic_repr("BoundingBox", self.rectangle, confidence = self.confidence)
53 |
54 | def __eq__(self, other: object) -> bool:
55 | if not isinstance(other, BoundingBox):
56 | return NotImplemented
57 | return self.rectangle == other.rectangle and self.confidence == other.confidence
58 |
59 | def to_json(self) -> BoundingBoxJson:
60 | """
61 | Serializes this `BoundingBox` to a `BoundingBoxJson`.
62 | """
63 | json: BoundingBoxJson = {
64 | "rectangle": self.rectangle.to_json()
65 | }
66 |
67 | if self.confidence is not None:
68 | json["confidence"] = self.confidence
69 |
70 | return json
71 |
72 | def meets_confidence_threshold(self, threshold: float) -> bool:
73 | """
74 | Returns `True` if and only if the confidence of this bounding box is
75 | either unset or it is at least the given `threshold`.
76 | """
77 | return self.confidence is None or self.confidence >= threshold
78 |
--------------------------------------------------------------------------------
/datatap/droplet/class_annotation.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Callable, Sequence
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..utils import basic_repr
8 | from .instance import Instance, InstanceJson
9 | from .multi_instance import MultiInstance, MultiInstanceJson
10 |
11 | __pdoc__ = { "ClassAnnotation.__add__": True }
12 |
13 | class ClassAnnotationJson(TypedDict, total = False):
14 | """
15 | The serialized JSON representation of a class annotation.
16 | """
17 | instances: Sequence[InstanceJson]
18 | multiInstances: Sequence[MultiInstanceJson]
19 |
20 | class ClassAnnotation:
21 | """
22 | A `ClassAnnotation` represents the set of detections for a given
23 | class. These may either be individual instances, or "multi instances"
24 | that describe a visual clustering of the class.
25 | """
26 |
27 | instances: Sequence[Instance]
28 | """
29 | A sequence of individual instances of this class.
30 | """
31 |
32 | multi_instances: Sequence[MultiInstance]
33 | """
34 | A sequence of multi-instances of this class. An example of a
35 | multi instance would be a crowd of people (labeled as such).
36 | """
37 |
38 | @staticmethod
39 | def from_json(json: ClassAnnotationJson) -> ClassAnnotation:
40 | """
41 | Constructs a `ClassAnnotation` from a `ClassAnnotationJson`.
42 | """
43 | return ClassAnnotation(
44 | instances = [Instance.from_json(instance) for instance in json["instances"]] if "instances" in json else [],
45 | multi_instances = [MultiInstance.from_json(multi_instance) for multi_instance in json["multiInstances"]] if "multiInstances" in json else []
46 | )
47 |
48 | def __init__(self, *, instances: Sequence[Instance], multi_instances: Sequence[MultiInstance] = []):
49 | self.instances = instances
50 | self.multi_instances = multi_instances
51 |
52 | def filter_detections(
53 | self,
54 | *,
55 | instance_filter: Callable[[Instance], bool],
56 | multi_instance_filter: Callable[[MultiInstance], bool]
57 | ) -> ClassAnnotation:
58 | """
59 | Returns a new class annotation consisting only of the instances and
60 | multi-instances that meet the given constraints.
61 | """
62 | return ClassAnnotation(
63 | instances = [
64 | instance
65 | for instance in self.instances
66 | if instance_filter(instance)
67 | ],
68 | multi_instances = [
69 | multi_instance
70 | for multi_instance in self.multi_instances
71 | if multi_instance_filter(multi_instance)
72 | ]
73 | )
74 |
75 | def __repr__(self) -> str:
76 | return basic_repr("ClassAnnotation", instances = self.instances, multi_instances = self.multi_instances)
77 |
78 | def __eq__(self, other: object) -> bool:
79 | if not isinstance(other, ClassAnnotation):
80 | return NotImplemented
81 | return self.instances == other.instances and self.multi_instances == other.multi_instances
82 |
83 | def __add__(self, other: ClassAnnotation) -> ClassAnnotation:
84 | if not isinstance(other, ClassAnnotation): # type: ignore - pyright complains about the isinstance check being redundant
85 | return NotImplemented
86 |
87 | instances = list(self.instances) + list(other.instances)
88 | multi_instances = list(self.multi_instances) + list(other.multi_instances)
89 |
90 | return ClassAnnotation(
91 | instances = instances,
92 | multi_instances = multi_instances,
93 | )
94 |
95 | def to_json(self) -> ClassAnnotationJson:
96 | """
97 | Serializes this `ClassAnnotation` into a `ClassAnnotationJson`.
98 | """
99 |
100 | return {
101 | "instances": [instance.to_json() for instance in self.instances],
102 | "multiInstances": [multi_instance.to_json() for multi_instance in self.multi_instances]
103 | }
104 |
--------------------------------------------------------------------------------
/datatap/droplet/frame_annotation.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Any, Callable, Dict, Mapping
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..utils import basic_repr
8 | from .class_annotation import ClassAnnotation, ClassAnnotationJson
9 | from .instance import Instance
10 | from .multi_instance import MultiInstance
11 |
12 | class FrameAnnotationJson(TypedDict):
13 | """
14 | The serialized JSON representation of an image annotation.
15 | """
16 |
17 | classes: Mapping[str, ClassAnnotationJson]
18 |
19 | class FrameAnnotation:
20 | """
21 | A collection of class annotations that annotate a given image.
22 | """
23 |
24 | classes: Mapping[str, ClassAnnotation]
25 | """
26 | A mapping from class name to the annotations of that class.
27 | """
28 |
29 | @staticmethod
30 | def from_json(json: Mapping[str, Any]) -> FrameAnnotation:
31 | """
32 | Constructs an `FrameAnnotation` from an `FrameAnnotationJson`.
33 | """
34 | return FrameAnnotation(
35 | classes = {
36 | class_name: ClassAnnotation.from_json(json["classes"][class_name])
37 | for class_name in json["classes"]
38 | }
39 | )
40 |
41 | def __init__(
42 | self,
43 | *,
44 | classes: Mapping[str, ClassAnnotation],
45 | ):
46 | self.classes = classes
47 |
48 | def filter_detections(
49 | self,
50 | *,
51 | instance_filter: Callable[[Instance], bool],
52 | multi_instance_filter: Callable[[MultiInstance], bool]
53 | ) -> FrameAnnotation:
54 | """
55 | Returns a new image annotation consisting only of the instances and
56 | multi-instances that meet the given constraints.
57 | """
58 | return FrameAnnotation(
59 | classes = {
60 | class_name: class_annotation.filter_detections(
61 | instance_filter = instance_filter,
62 | multi_instance_filter = multi_instance_filter
63 | )
64 | for class_name, class_annotation in self.classes.items()
65 | }
66 | )
67 |
68 | def apply_bounding_box_confidence_threshold(self, threshold: float) -> FrameAnnotation:
69 | """
70 | Returns a new image annotation consisting only of the instances and
71 | multi-instances that have bounding boxes which either do not have a
72 | confidence specified or which have a confience meeting the given
73 | threshold.
74 | """
75 | return self.filter_detections(
76 | instance_filter = lambda instance: (
77 | instance.bounding_box is not None
78 | and instance.bounding_box.meets_confidence_threshold(threshold)
79 | ),
80 | multi_instance_filter = lambda multi_instance: (
81 | multi_instance.bounding_box is not None
82 | and multi_instance.bounding_box.meets_confidence_threshold(threshold)
83 | )
84 | )
85 |
86 | def apply_segmentation_confidence_threshold(self, threshold: float) -> FrameAnnotation:
87 | """
88 | Returns a new image annotation consisting only of the instances and
89 | multi-instances that have segmentations which either do not have a
90 | confidence specified or which have a confience meeting the given
91 | threshold.
92 | """
93 | return self.filter_detections(
94 | instance_filter = lambda instance: (
95 | instance.segmentation is not None
96 | and instance.segmentation.meets_confidence_threshold(threshold)
97 | ),
98 | multi_instance_filter = lambda multi_instance: (
99 | multi_instance.segmentation is not None
100 | and multi_instance.segmentation.meets_confidence_threshold(threshold)
101 | )
102 | )
103 |
104 | def __repr__(self) -> str:
105 | return basic_repr(
106 | "FrameAnnotation",
107 | classes = self.classes
108 | )
109 |
110 | def __eq__(self, other: object) -> bool:
111 | if not isinstance(other, FrameAnnotation):
112 | return NotImplemented
113 | return self.classes == other.classes
114 |
115 | def __add__(self, other: FrameAnnotation) -> FrameAnnotation:
116 | if not isinstance(other, FrameAnnotation): # type: ignore - pyright complains about the isinstance check being redundant
117 | return NotImplemented
118 |
119 | classes: Dict[str, ClassAnnotation] = {}
120 |
121 | for key, value in self.classes.items():
122 | classes[key] = value
123 |
124 | for key, value in other.classes.items():
125 | if key in classes:
126 | classes[key] += value
127 | else:
128 | classes[key] = value
129 |
130 | return FrameAnnotation(
131 | classes = classes
132 | )
133 |
134 | def to_json(self) -> FrameAnnotationJson:
135 | """
136 | Serializes this image annotation into an `FrameAnnotationJson`.
137 | """
138 | json: FrameAnnotationJson = {
139 | "classes": {
140 | name: class_annotation.to_json()
141 | for name, class_annotation in self.classes.items()
142 | }
143 | }
144 |
145 | return json
146 |
--------------------------------------------------------------------------------
/datatap/droplet/image.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Optional, Sequence
4 |
5 | import PIL.Image
6 | from typing_extensions import TypedDict
7 |
8 | from ..utils import basic_repr
9 | from ._media import Media
10 |
11 |
12 | class _ImageJsonOptional(TypedDict, total = False):
13 | uid: str
14 |
15 | class ImageJson(_ImageJsonOptional, TypedDict):
16 | """
17 | The serialized JSON representation of an `Image`.
18 | """
19 | paths: Sequence[str]
20 |
21 | class Image(Media):
22 | """
23 | The `Image` class contains information about what image was
24 | labeled by a given annotation. It also includes utilities
25 | for loading and manipulating images.
26 | """
27 |
28 | uid: Optional[str]
29 | """
30 | A unique ID for this image.
31 | """
32 |
33 | _pil_image: Optional[PIL.Image.Image]
34 |
35 | @staticmethod
36 | def from_json(json: ImageJson) -> Image:
37 | """
38 | Creates an `Image` from an `ImageJson`.
39 | """
40 | return Image(uid = json.get("uid", None), paths = json["paths"])
41 |
42 | @staticmethod
43 | def from_pil(pil_image: PIL.Image.Image) -> Image:
44 | """
45 | Creates an `Image` from an existing PIL Image. Note that an
46 | image created this way will not have any `paths` set, but will
47 | still be able to load the image via `get_pil_image`.
48 | """
49 | image = Image(
50 | paths = [],
51 | )
52 | image._pil_image = pil_image
53 | return image
54 |
55 | def __init__(self, *, uid: Optional[str] = None, paths: Sequence[str]):
56 | super().__init__(paths = paths)
57 | self.uid = uid
58 | self._pil_image = None
59 |
60 | def __repr__(self) -> str:
61 | return basic_repr("Image", uid = self.uid, paths = self.paths)
62 |
63 | def __eq__(self, other: object) -> bool:
64 | if not isinstance(other, Image):
65 | return NotImplemented
66 | return self.paths == other.paths
67 |
68 | # TODO(mdsavage): consider using functools.cache here if we upgrade to Python >= 3.9
69 | def get_pil_image(self, quiet: bool = False, attempts: int = 3, allow_local: bool = False) -> PIL.Image.Image:
70 | """
71 | Attempts to load the image specified by this reference. Resolution happpens in this order:
72 |
73 | 1. Load from an internal cache (either from a previous load, or from `from_pil`)
74 | 2. Try loading every path in order, returning once one loads
75 |
76 | Warning! `get_pil_image` may attempt to read from the local file system or from private
77 | networks. Please ensure that the annotation you are loading is trusted.
78 | """
79 | if self._pil_image is not None:
80 | return self._pil_image
81 |
82 | return PIL.Image.open(self.load(quiet, attempts, allow_local))
83 |
84 | def to_json(self) -> ImageJson:
85 | """
86 | Serializes this `Image` into an `ImageJson`.
87 | """
88 | json: ImageJson = {
89 | "paths": self.paths
90 | }
91 |
92 | if self.uid is not None:
93 | json["uid"] = self.uid
94 |
95 | return json
96 |
--------------------------------------------------------------------------------
/datatap/droplet/image_annotation.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import json
4 | from typing import Any, Callable, Dict, Mapping, Optional
5 | from urllib.parse import quote, urlencode
6 |
7 | from datatap.utils import Environment
8 | from typing_extensions import Literal, TypedDict
9 |
10 | from ..geometry import Mask, MaskJson
11 | from ..utils import basic_repr
12 | from .class_annotation import ClassAnnotation, ClassAnnotationJson
13 | from .image import Image, ImageJson
14 | from .instance import Instance
15 | from .multi_instance import MultiInstance
16 |
17 |
18 | class _ImageAnnotationJsonOptional(TypedDict, total = False):
19 | uid: str
20 | mask: MaskJson
21 | metadata: Mapping[str, Any]
22 |
23 | class ImageAnnotationJson(_ImageAnnotationJsonOptional, TypedDict):
24 | """
25 | The serialized JSON representation of an image annotation.
26 | """
27 |
28 | kind: Literal["ImageAnnotation"]
29 | image: ImageJson
30 | classes: Mapping[str, ClassAnnotationJson]
31 |
32 | class ImageAnnotation:
33 | """
34 | A collection of class annotations that annotate a given image.
35 | """
36 |
37 | image: Image
38 | """
39 | The image being annotated.
40 | """
41 |
42 | classes: Mapping[str, ClassAnnotation]
43 | """
44 | A mapping from class name to the annotations of that class.
45 | """
46 |
47 | uid: Optional[str]
48 | """
49 | A unique identifier for this image annotation.
50 | """
51 |
52 | mask: Optional[Mask]
53 | """
54 | An optional region-of-interest mask to indicate that only
55 | features within the mask have been annotated.
56 | """
57 |
58 | metadata: Optional[Mapping[str, Any]]
59 | """
60 | An optional field for storing metadata on the annotation.
61 | """
62 |
63 | @staticmethod
64 | def from_json(json: Mapping[str, Any]) -> ImageAnnotation:
65 | """
66 | Constructs an `ImageAnnotation` from an `ImageAnnotationJson`.
67 | """
68 | return ImageAnnotation(
69 | image = Image.from_json(json["image"]),
70 | classes = {
71 | class_name: ClassAnnotation.from_json(json["classes"][class_name])
72 | for class_name in json["classes"]
73 | },
74 | mask = Mask.from_json(json["mask"]) if "mask" in json else None,
75 | uid = json.get("uid"),
76 | metadata = json.get("metadata")
77 | )
78 |
79 | def __init__(
80 | self,
81 | *,
82 | image: Image,
83 | classes: Mapping[str, ClassAnnotation],
84 | mask: Optional[Mask] = None,
85 | uid: Optional[str] = None,
86 | metadata: Optional[Mapping[str, Any]] = None
87 | ):
88 | self.image = image
89 | self.classes = classes
90 | self.mask = mask
91 | self.uid = uid
92 | self.metadata = metadata
93 |
94 | def filter_detections(
95 | self,
96 | *,
97 | instance_filter: Callable[[Instance], bool],
98 | multi_instance_filter: Callable[[MultiInstance], bool]
99 | ) -> ImageAnnotation:
100 | """
101 | Returns a new image annotation consisting only of the instances and
102 | multi-instances that meet the given constraints.
103 | """
104 | return ImageAnnotation(
105 | image = self.image,
106 | mask = self.mask,
107 | classes = {
108 | class_name: class_annotation.filter_detections(
109 | instance_filter = instance_filter,
110 | multi_instance_filter = multi_instance_filter
111 | )
112 | for class_name, class_annotation in self.classes.items()
113 | },
114 | uid = self.uid,
115 | metadata = self.metadata
116 | )
117 |
118 | def apply_bounding_box_confidence_threshold(self, threshold: float) -> ImageAnnotation:
119 | """
120 | Returns a new image annotation consisting only of the instances and
121 | multi-instances that have bounding boxes which either do not have a
122 | confidence specified or which have a confience meeting the given
123 | threshold.
124 | """
125 | return self.filter_detections(
126 | instance_filter = lambda instance: (
127 | instance.bounding_box is not None
128 | and instance.bounding_box.meets_confidence_threshold(threshold)
129 | ),
130 | multi_instance_filter = lambda multi_instance: (
131 | multi_instance.bounding_box is not None
132 | and multi_instance.bounding_box.meets_confidence_threshold(threshold)
133 | )
134 | )
135 |
136 | def apply_segmentation_confidence_threshold(self, threshold: float) -> ImageAnnotation:
137 | """
138 | Returns a new image annotation consisting only of the instances and
139 | multi-instances that have segmentations which either do not have a
140 | confidence specified or which have a confience meeting the given
141 | threshold.
142 | """
143 | return self.filter_detections(
144 | instance_filter = lambda instance: (
145 | instance.segmentation is not None
146 | and instance.segmentation.meets_confidence_threshold(threshold)
147 | ),
148 | multi_instance_filter = lambda multi_instance: (
149 | multi_instance.segmentation is not None
150 | and multi_instance.segmentation.meets_confidence_threshold(threshold)
151 | )
152 | )
153 |
154 | def apply_metadata(self, metadata: Mapping[str, Any]) -> ImageAnnotation:
155 | """
156 | Returns a new image annotation with the supplied metadata.
157 | """
158 | return ImageAnnotation(
159 | image = self.image,
160 | mask = self.mask,
161 | classes = self.classes,
162 | uid = self.uid,
163 | metadata = metadata
164 | )
165 |
166 | def __repr__(self) -> str:
167 | return basic_repr(
168 | "ImageAnnotation",
169 | uid = self.uid,
170 | image = self.image,
171 | mask = self.mask,
172 | classes = self.classes,
173 | metadata = self.metadata
174 | )
175 |
176 | def __eq__(self, other: object) -> bool:
177 | if not isinstance(other, ImageAnnotation):
178 | return NotImplemented
179 | return self.image == other.image and self.classes == other.classes and self.mask == other.mask
180 |
181 | def __add__(self, other: ImageAnnotation) -> ImageAnnotation:
182 | if not isinstance(other, ImageAnnotation): # type: ignore - pyright complains about the isinstance check being redundant
183 | return NotImplemented
184 |
185 | classes: Dict[str, ClassAnnotation] = {}
186 |
187 | for key, value in self.classes.items():
188 | classes[key] = value
189 |
190 | for key, value in other.classes.items():
191 | if key in classes:
192 | classes[key] += value
193 | else:
194 | classes[key] = value
195 |
196 | return ImageAnnotation(
197 | image = self.image,
198 | classes = classes,
199 | mask = self.mask,
200 | uid = self.uid if self.uid is not None else other.uid,
201 | metadata = self.metadata
202 | )
203 |
204 | def to_json(self) -> ImageAnnotationJson:
205 | """
206 | Serializes this image annotation into an `ImageAnnotationJson`.
207 | """
208 | json: ImageAnnotationJson = {
209 | "kind": "ImageAnnotation",
210 | "image": self.image.to_json(),
211 | "classes": {
212 | name: class_annotation.to_json()
213 | for name, class_annotation in self.classes.items()
214 | }
215 | }
216 |
217 | if self.mask is not None:
218 | json["mask"] = self.mask.to_json()
219 |
220 | if self.uid is not None:
221 | json["uid"] = self.uid
222 |
223 | if self.metadata is not None:
224 | json["metadata"] = self.metadata
225 |
226 | return json
227 |
228 | def get_visualization_url(self) -> str:
229 | """
230 | Generates a URL on the dataTap platform that can be visited to view a
231 | visualization of this `ImageAnnotation`.
232 | """
233 | params = {
234 | "annotation": json.dumps(self.to_json(), separators = (",", ":"))
235 | }
236 |
237 | return f"{Environment.BASE_URI}/visualizer/single#{urlencode(params, quote_via = quote)}"
238 |
239 | def get_comparison_url(self, other: ImageAnnotation) -> str:
240 | """
241 | Generates a URL on the dataTap platform that can be visited to view a
242 | visual comparison of this `ImageAnnotation` (which is treated as the
243 | "ground truth") and the `other` argument (which is treated as the
244 | "proposal").
245 |
246 | This method does not check that the two annotations agree on what image
247 | they are annotating, and will always use this `ImageAnnotation`'s
248 | image.
249 | """
250 | params = {
251 | "groundTruth": json.dumps(self.to_json(), separators = (",", ":")),
252 | "proposal": json.dumps(other.to_json(), separators = (",", ":"))
253 | }
254 |
255 | return f"{Environment.BASE_URI}/visualizer/compare#{urlencode(params, quote_via = quote)}"
256 |
--------------------------------------------------------------------------------
/datatap/droplet/instance.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Dict, Mapping, Optional
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..utils import basic_repr
8 | from .attributes import AttributeValues, AttributeValuesJson
9 | from .bounding_box import BoundingBox, BoundingBoxJson
10 | from .keypoint import Keypoint, KeypointJson
11 | from .segmentation import Segmentation, SegmentationJson
12 |
13 |
14 | class InstanceJson(TypedDict, total = False):
15 | """
16 | The JSON serialization of an `Instance`.
17 | """
18 | id: str
19 | boundingBox: BoundingBoxJson
20 | segmentation: SegmentationJson
21 | keypoints: Mapping[str, Optional[KeypointJson]]
22 | attributes: Mapping[str, AttributeValuesJson]
23 |
24 | class Instance:
25 | """
26 | A single appearance of an object of a particular class within a given image.
27 | """
28 |
29 | id: Optional[str]
30 | """
31 | A unique id for this instance (within the context of its containing
32 | annotation). Multiple instances with the same id should be interpreted
33 | to be the same object.
34 | """
35 |
36 | bounding_box: Optional[BoundingBox]
37 | """
38 | The bounding box of this instance.
39 | """
40 |
41 | segmentation: Optional[Segmentation]
42 | """
43 | The segmentation of this instance.
44 | """
45 |
46 | keypoints: Optional[Mapping[str, Optional[Keypoint]]]
47 | """
48 | A mapping from keypoint name to the keypoint within this instance. If a key
49 | maps to `None`, then the annotation is reporting the _absence of_ that
50 | keypoint (i.e., that it is not visible in the image and does not have an
51 | inferrable position in the image).
52 | """
53 |
54 | attributes: Optional[Mapping[str, AttributeValues]]
55 | """
56 | A mapping from attribute name to value.
57 | """
58 |
59 | @staticmethod
60 | def from_json(json: InstanceJson) -> Instance:
61 | """
62 | Creates an `Instance` from an `InstanceJson`.
63 | """
64 | return Instance(
65 | id = json.get("id"),
66 | bounding_box = BoundingBox.from_json(json["boundingBox"]) if "boundingBox" in json else None,
67 | segmentation = Segmentation.from_json(json["segmentation"]) if "segmentation" in json else None,
68 | keypoints = {
69 | name: Keypoint.from_json(keypoint) if keypoint is not None else None
70 | for name, keypoint in json["keypoints"].items()
71 | } if "keypoints" in json else None,
72 | attributes = {
73 | k: AttributeValues.from_json(v) for k, v in json["attributes"].items()
74 | } if "attributes" in json else None
75 | )
76 |
77 | def __init__(
78 | self,
79 | *,
80 | id: Optional[str] = None,
81 | bounding_box: Optional[BoundingBox] = None,
82 | segmentation: Optional[Segmentation] = None,
83 | keypoints: Optional[Mapping[str, Optional[Keypoint]]] = None,
84 | attributes: Optional[Mapping[str, AttributeValues]] = None
85 | ):
86 | self.id = id
87 | self.bounding_box = bounding_box
88 | self.segmentation = segmentation
89 | self.keypoints = keypoints
90 | self.attributes = attributes
91 |
92 | def __repr__(self) -> str:
93 | return basic_repr(
94 | "Instance",
95 | id = self.id,
96 | bounding_box = self.bounding_box,
97 | segmentation = self.segmentation,
98 | keypoints = self.keypoints,
99 | attributes = self.attributes
100 | )
101 |
102 | def __eq__(self, other: object) -> bool:
103 | if not isinstance(other, Instance):
104 | return NotImplemented
105 | return (
106 | self.id == other.id
107 | and self.bounding_box == other.bounding_box
108 | and self.segmentation == other.segmentation
109 | and self.keypoints == other.keypoints
110 | and self.attributes == other.attributes
111 | )
112 |
113 | def to_json(self) -> InstanceJson:
114 | """
115 | Serializes an `Instance` into an `InstanceJson`.
116 | """
117 | json: InstanceJson = {}
118 |
119 | if self.id is not None:
120 | json["id"] = self.id
121 |
122 | if self.bounding_box is not None:
123 | json["boundingBox"] = self.bounding_box.to_json()
124 |
125 | if self.segmentation is not None:
126 | json["segmentation"] = self.segmentation.to_json()
127 |
128 | if self.keypoints is not None:
129 | keypoints: Dict[str, Optional[KeypointJson]] = {}
130 |
131 | for name, keypoint in self.keypoints.items():
132 | keypoints[name] = keypoint.to_json() if keypoint is not None else None
133 |
134 | json["keypoints"] = keypoints
135 |
136 | if self.attributes is not None:
137 | json["attributes"] = {
138 | k: v.to_json() for k, v in self.attributes.items()
139 | }
140 |
141 | return json
142 |
--------------------------------------------------------------------------------
/datatap/droplet/keypoint.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Optional
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..geometry import Point, PointJson
8 | from ..utils import basic_repr
9 |
10 | class _KeypointJsonOptional(TypedDict, total = False):
11 | occluded: bool
12 | confidence: float
13 |
14 | class KeypointJson(_KeypointJsonOptional, TypedDict):
15 | """
16 | The JSON serialization of a `Keypoint`.
17 | """
18 | point: PointJson
19 |
20 | class Keypoint:
21 | """
22 | An object representing a specific keypoint in a particular instance.
23 | """
24 |
25 | point: Point
26 | """
27 | The point in the image where this keypoint appears.
28 | """
29 |
30 | occluded: Optional[bool]
31 | """
32 | Whether this keypoint is occluded.
33 |
34 | If `False`, the keypoint is visible within the image.
35 | If `True`, the keypoint is not visible in the image because it is blocked by some other object,
36 | but has an inferrable position that would lie within the frame of the image.
37 | If `None`, then the data source did not differentiate between occluded and unoccluded keypoints.
38 | """
39 |
40 | confidence: Optional[float]
41 | """
42 | The confidence associated with this keypoint.
43 | """
44 |
45 | @staticmethod
46 | def from_json(json: KeypointJson) -> Keypoint:
47 | """
48 | Creates a `Keypoint` from a `KeypointJson`.
49 | """
50 | return Keypoint(
51 | Point.from_json(json["point"]),
52 | occluded = json.get("occluded"),
53 | confidence = json.get("confidence")
54 | )
55 |
56 | def __init__(self, point: Point, *, occluded: Optional[bool] = None, confidence: Optional[float] = None):
57 | self.point = point
58 | self.occluded = occluded
59 | self.confidence = confidence
60 |
61 | self.point.assert_valid()
62 |
63 | def __repr__(self) -> str:
64 | return basic_repr("Keypoint", self.point, occluded = self.occluded, confidence = self.confidence)
65 |
66 | def __eq__(self, other: object) -> bool:
67 | if not isinstance(other, Keypoint):
68 | return NotImplemented
69 | return self.point == other.point and self.occluded == other.occluded and self.confidence == other.confidence
70 |
71 | def to_json(self) -> KeypointJson:
72 | """
73 | Serializes this object into a `KeypointJson`.
74 | """
75 | json: KeypointJson = {
76 | "point": self.point.to_json()
77 | }
78 |
79 | if self.occluded is not None:
80 | json["occluded"] = self.occluded
81 |
82 | if self.confidence is not None:
83 | json["confidence"] = self.confidence
84 |
85 | return json
86 |
--------------------------------------------------------------------------------
/datatap/droplet/multi_instance.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Optional
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..utils import basic_repr
8 | from .bounding_box import BoundingBox, BoundingBoxJson
9 | from .segmentation import Segmentation, SegmentationJson
10 |
11 |
12 | class MultiInstanceJson(TypedDict, total = False):
13 | """
14 | The JSON serialization of a `MultiInstance`.
15 | """
16 | boundingBox: BoundingBoxJson
17 | segmentation: SegmentationJson
18 | count: int
19 |
20 | class MultiInstance:
21 | """
22 | An appearance of a group of objects of a particular class in a particular image.
23 |
24 | There is not a strict definition as to when a group of instances should be categorized as a multi-instance.
25 | As such, when constructing a dataset, it is best to ensure that all of the `DataSource`s agree on what
26 | constitutes a `MultiInstance`. These are most often used in public datasets when the cost of annotating
27 | every instance would be too high.
28 | """
29 |
30 | bounding_box: Optional[BoundingBox]
31 | """
32 | The bounding box of this multi-instance.
33 | """
34 |
35 | segmentation: Optional[Segmentation]
36 | """
37 | The segmentation of this multi-instance.
38 | """
39 |
40 | count: Optional[int]
41 | """
42 | A count of how many true instances are encapsulated in this multi-instance.
43 | """
44 |
45 | @staticmethod
46 | def from_json(json: MultiInstanceJson) -> MultiInstance:
47 | """
48 | Creates a `MultiInstance` from a `MultiInstanceJson`.
49 | """
50 | return MultiInstance(
51 | bounding_box = BoundingBox.from_json(json["boundingBox"]) if "boundingBox" in json else None,
52 | segmentation = Segmentation.from_json(json["segmentation"]) if "segmentation" in json else None,
53 | count = json.get("count")
54 | )
55 |
56 | def __init__(
57 | self,
58 | *,
59 | bounding_box: Optional[BoundingBox] = None,
60 | segmentation: Optional[Segmentation] = None,
61 | count: Optional[int] = None
62 | ):
63 | self.bounding_box = bounding_box
64 | self.segmentation = segmentation
65 | self.count = count
66 |
67 | def __repr__(self) -> str:
68 | return basic_repr(
69 | "MultiInstance",
70 | bounding_box = self.bounding_box,
71 | segmentation = self.segmentation,
72 | count = self.count
73 | )
74 |
75 | def __eq__(self, other: object) -> bool:
76 | if not isinstance(other, MultiInstance):
77 | return NotImplemented
78 | return self.bounding_box == other.bounding_box and self.segmentation == other.segmentation and self.count == other.count
79 |
80 | def to_json(self) -> MultiInstanceJson:
81 | """
82 | Serializes this object as a `MultiInstanceJson`.
83 | """
84 | json: MultiInstanceJson = {}
85 |
86 | if self.bounding_box is not None:
87 | json["boundingBox"] = self.bounding_box.to_json()
88 |
89 | if self.segmentation is not None:
90 | json["segmentation"] = self.segmentation.to_json()
91 |
92 | if self.count is not None:
93 | json["count"] = self.count
94 |
95 | return json
96 |
--------------------------------------------------------------------------------
/datatap/droplet/segmentation.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Optional
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..geometry import Mask, MaskJson
8 | from ..utils import basic_repr
9 |
10 | class _SegmentationJsonOptional(TypedDict, total = False):
11 | confidence: float
12 |
13 | class SegmentationJson(_SegmentationJsonOptional, TypedDict):
14 | """
15 | The serialized JSON representation of a segmentation.
16 | """
17 | mask: MaskJson
18 |
19 | class Segmentation:
20 | """
21 | A `Segmentation` represents the area within an image taken up by a
22 | detection, specified as a `Mask`.
23 | """
24 |
25 | mask: Mask
26 | """
27 | The area within the image where the corresponding detection appears.
28 | """
29 |
30 | confidence: Optional[float]
31 | """
32 | The confidence associated with this segmentation.
33 | """
34 |
35 | @staticmethod
36 | def from_json(json: SegmentationJson) -> Segmentation:
37 | """
38 | Constructs a `Segmentation` from a `SegmentationJson`.
39 | """
40 | return Segmentation(
41 | Mask.from_json(json["mask"]),
42 | confidence = json.get("confidence")
43 | )
44 |
45 | def __init__(self, mask: Mask, *, confidence: Optional[float] = None):
46 | self.mask = mask
47 | self.confidence = confidence
48 |
49 | self.mask.assert_valid()
50 |
51 | def __repr__(self) -> str:
52 | return basic_repr("Segmentation", self.mask, confidence = self.confidence)
53 |
54 | def __eq__(self, other: object) -> bool:
55 | if not isinstance(other, Segmentation):
56 | return NotImplemented
57 | return self.mask == other.mask and self.confidence == other.confidence
58 |
59 | def to_json(self) -> SegmentationJson:
60 | """
61 | Serializes this `Segmentation` to a `SegmentationJson`.
62 | """
63 | json: SegmentationJson = {
64 | "mask": self.mask.to_json()
65 | }
66 |
67 | if self.confidence is not None:
68 | json["confidence"] = self.confidence
69 |
70 | return json
71 |
72 | def meets_confidence_threshold(self, threshold: float) -> bool:
73 | """
74 | Returns `True` if and only if the confidence of this segmentation is
75 | either unset or is at least the given `threshold`.
76 | """
77 | return self.confidence is None or self.confidence >= threshold
78 |
--------------------------------------------------------------------------------
/datatap/droplet/video.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Optional, Sequence
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..utils import basic_repr
8 | from .image import Image, ImageJson
9 |
10 |
11 | class VideoJson(TypedDict, total = False):
12 | """
13 | The serialized JSON representation of an `Video`.
14 | """
15 |
16 | uid: str
17 | paths: Sequence[str]
18 | frames: Sequence[ImageJson]
19 |
20 |
21 | class Video:
22 | """
23 | The `Video` class contains information about what Video was
24 | labeled by a given annotation. It also includes utilities
25 | for loading and manipulating Videos.
26 | """
27 |
28 | uid: Optional[str]
29 | """
30 | A unique ID for this Video.
31 | """
32 |
33 | paths: Optional[Sequence[str]]
34 | """
35 | A sequence of URIs where the media can be found. The loader
36 | will try them in order until it finds one it can load.
37 |
38 | Supported schemes include `http(s):`, `s3:`
39 | """
40 |
41 | frames: Optional[Sequence[Image]]
42 | """
43 | A sequence of images representing the video.
44 | """
45 |
46 | @staticmethod
47 | def from_json(json: VideoJson) -> Video:
48 | """
49 | Creates an `Video` from an `VideoJson`.
50 | """
51 | return Video(
52 | uid = json.get("uid"),
53 | paths = json.get("paths"),
54 | frames = [Image.from_json(frame) for frame in json["frames"]] if "frames" in json else None
55 | )
56 |
57 | def __init__(
58 | self,
59 | *,
60 | uid: Optional[str] = None,
61 | paths: Optional[Sequence[str]] = None,
62 | frames: Optional[Sequence[Image]] = None
63 | ):
64 | self.uid = uid
65 | self.paths = paths
66 | self.frames = frames
67 |
68 | def __repr__(self) -> str:
69 | return basic_repr("Video", uid = self.uid, paths = self.paths, frames = self.frames)
70 |
71 | def __eq__(self, other: object) -> bool:
72 | if not isinstance(other, Video):
73 | return NotImplemented
74 | return self.paths == other.paths
75 |
76 | def to_json(self) -> VideoJson:
77 | """
78 | Serializes this `Video` into an `VideoJson`.
79 | """
80 | json: VideoJson = {}
81 |
82 | if self.uid is not None:
83 | json["uid"] = self.uid
84 |
85 | if self.paths is not None:
86 | json["paths"] = self.paths
87 |
88 | if self.frames is not None:
89 | json["frames"] = [frame.to_json() for frame in self.frames]
90 |
91 | return json
92 |
--------------------------------------------------------------------------------
/datatap/droplet/video_annotation.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Any, Callable, Mapping, Optional, Sequence
4 | from datatap.droplet.video import Video, VideoJson
5 |
6 | from typing_extensions import Literal, TypedDict
7 |
8 | from ..utils import basic_repr
9 | from .instance import Instance
10 | from .multi_instance import MultiInstance
11 | from .frame_annotation import FrameAnnotation, FrameAnnotationJson
12 |
13 |
14 | class _VideoAnnotationJsonOptional(TypedDict, total = False):
15 | uid: str
16 | metadata: Mapping[str, Any]
17 |
18 | class VideoAnnotationJson(_VideoAnnotationJsonOptional, TypedDict):
19 | """
20 | The serialized JSON representation of an video annotation.
21 | """
22 |
23 | kind: Literal["VideoAnnotation"]
24 | video: VideoJson
25 | frames: Sequence[FrameAnnotationJson]
26 |
27 | class VideoAnnotation:
28 | """
29 | A collection of class annotations that annotate a given image.
30 | """
31 |
32 | video: Video
33 | """
34 | The video being annotated.
35 | """
36 |
37 | uid: Optional[str]
38 | """
39 | A unique identifier for this image annotation.
40 | """
41 |
42 | metadata: Optional[Mapping[str, Any]]
43 | """
44 | An optional field for storing metadata on the annotation.
45 | """
46 |
47 | @staticmethod
48 | def from_json(json: Mapping[str, Any]) -> VideoAnnotation:
49 | """
50 | Constructs an `VideoAnnotation` from an `VideoAnnotationJson`.
51 | """
52 | return VideoAnnotation(
53 | video = Video.from_json(json["video"]),
54 | frames = [FrameAnnotation.from_json(frame) for frame in json["frames"]],
55 | uid = json.get("uid"),
56 | metadata = json.get("metadata")
57 | )
58 |
59 | def __init__(
60 | self,
61 | *,
62 | video: Video,
63 | frames: Sequence[FrameAnnotation],
64 | uid: Optional[str] = None,
65 | metadata: Optional[Mapping[str, Any]] = None
66 | ):
67 | self.video = video
68 | self.frames = frames
69 | self.uid = uid
70 | self.metadata = metadata
71 |
72 | def filter_detections(
73 | self,
74 | *,
75 | instance_filter: Callable[[Instance], bool],
76 | multi_instance_filter: Callable[[MultiInstance], bool]
77 | ) -> VideoAnnotation:
78 | """
79 | Returns a new image annotation consisting only of the instances and
80 | multi-instances that meet the given constraints.
81 | """
82 | return VideoAnnotation(
83 | video = self.video,
84 | frames = [
85 | frame.filter_detections(
86 | instance_filter = instance_filter,
87 | multi_instance_filter = multi_instance_filter
88 | )
89 | for frame in self.frames
90 | ],
91 | uid = self.uid,
92 | metadata = self.metadata
93 | )
94 |
95 | def apply_bounding_box_confidence_threshold(self, threshold: float) -> VideoAnnotation:
96 | """
97 | Returns a new image annotation consisting only of the instances and
98 | multi-instances that have bounding boxes which either do not have a
99 | confidence specified or which have a confience meeting the given
100 | threshold.
101 | """
102 | return self.filter_detections(
103 | instance_filter = lambda instance: (
104 | instance.bounding_box is not None
105 | and instance.bounding_box.meets_confidence_threshold(threshold)
106 | ),
107 | multi_instance_filter = lambda multi_instance: (
108 | multi_instance.bounding_box is not None
109 | and multi_instance.bounding_box.meets_confidence_threshold(threshold)
110 | )
111 | )
112 |
113 | def apply_segmentation_confidence_threshold(self, threshold: float) -> VideoAnnotation:
114 | """
115 | Returns a new image annotation consisting only of the instances and
116 | multi-instances that have segmentations which either do not have a
117 | confidence specified or which have a confience meeting the given
118 | threshold.
119 | """
120 | return self.filter_detections(
121 | instance_filter = lambda instance: (
122 | instance.segmentation is not None
123 | and instance.segmentation.meets_confidence_threshold(threshold)
124 | ),
125 | multi_instance_filter = lambda multi_instance: (
126 | multi_instance.segmentation is not None
127 | and multi_instance.segmentation.meets_confidence_threshold(threshold)
128 | )
129 | )
130 |
131 | def apply_metadata(self, metadata: Mapping[str, Any]) -> VideoAnnotation:
132 | """
133 | Returns a new image annotation with the supplied metadata.
134 | """
135 | return VideoAnnotation(
136 | video = self.video,
137 | frames = self.frames,
138 | uid = self.uid,
139 | metadata = metadata
140 | )
141 |
142 | def __repr__(self) -> str:
143 | return basic_repr(
144 | "VideoAnnotation",
145 | uid = self.uid,
146 | video = self.video,
147 | frames = self.frames,
148 | metadata = self.metadata
149 | )
150 |
151 | def __eq__(self, other: object) -> bool:
152 | if not isinstance(other, VideoAnnotation):
153 | return NotImplemented
154 | return (
155 | self.video == other.video
156 | and self.frames == other.frames
157 | and self.uid == other.uid
158 | and self.metadata == other.metadata
159 | )
160 |
161 | def __add__(self, other: VideoAnnotation) -> VideoAnnotation:
162 | if not isinstance(other, VideoAnnotation): # type: ignore - pyright complains about the isinstance check being redundant
163 | return NotImplemented
164 |
165 | if len(self.frames) != len(other.frames):
166 | raise ValueError("Unable to merge VideoAnnotations with different number of frames")
167 |
168 | return VideoAnnotation(
169 | video = self.video,
170 | frames = [
171 | frame1 + frame2
172 | for frame1, frame2 in zip(self.frames, other.frames)
173 | ],
174 | uid = self.uid,
175 | metadata = self.metadata
176 | )
177 |
178 | def to_json(self) -> VideoAnnotationJson:
179 | """
180 | Serializes this image annotation into an `VideoAnnotationJson`.
181 | """
182 | json: VideoAnnotationJson = {
183 | "kind": "VideoAnnotation",
184 | "video": self.video.to_json(),
185 | "frames": [frame.to_json() for frame in self.frames]
186 | }
187 |
188 | if self.uid is not None:
189 | json["uid"] = self.uid
190 |
191 | if self.metadata is not None:
192 | json["metadata"] = self.metadata
193 |
194 | return json
195 |
--------------------------------------------------------------------------------
/datatap/examples/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Example code
3 | """
--------------------------------------------------------------------------------
/datatap/geometry/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module provides geometric primitives for storing or manipulating ML annotations.
3 |
4 | Generally speaking, a geometric object is considered "valid" in the droplet format when it lies entirely within the unit
5 | plane. This is because annotations in the droplet format are scaled to 0-to-1 along both axes so that they are
6 | resolution-independent. This can be checked by invoking `assert_valid` on any of the geometric objects (though this is
7 | done automatically when geometric constructs are used to create droplets).
8 | """
9 |
10 | from .mask import Mask, MaskJson
11 | from .point import Point, PointJson
12 | from .polygon import Polygon, PolygonJson
13 | from .rectangle import Rectangle, RectangleJson
14 |
15 | __all__ = [
16 | "Mask",
17 | "MaskJson",
18 | "Point",
19 | "PointJson",
20 | "Polygon",
21 | "PolygonJson",
22 | "Rectangle",
23 | "RectangleJson"
24 | ]
25 |
--------------------------------------------------------------------------------
/datatap/geometry/mask.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | from datatap.geometry.point import Point
3 |
4 | from typing import Generator, Sequence, Tuple, Union
5 |
6 | from .polygon import Polygon, PolygonJson
7 | from ..utils import basic_repr
8 |
9 | MaskJson = Sequence[PolygonJson]
10 |
11 | class Mask:
12 | """
13 | The shape resulting from XORing a set of polygons in 2D space.
14 |
15 | Generally, the expectation is that the polygons have no edge itersections; specifically, that for any pair of
16 | polygons in the mask, either they have no intersection or one completely contains the other. However, there is no
17 | assertion that this is the case, and generally speaking, the even-odd rule is used to determine if a particular
18 | point is contained by the mask.
19 | """
20 |
21 | polygons: Sequence[Polygon]
22 | """
23 | The constituent polygons of this `Mask`.
24 | """
25 |
26 | @staticmethod
27 | def from_json(json: MaskJson) -> Mask:
28 | """
29 | Creates a `Mask` from a `MaskJson`.
30 | """
31 | return Mask([Polygon.from_json(poly) for poly in json])
32 |
33 | def __init__(self, polygons: Sequence[Polygon]):
34 | self.polygons = polygons
35 |
36 | if len(self.polygons) < 1:
37 | raise ValueError(f"A mask must have at least one polygon; failed on mask {repr(self)}")
38 |
39 | def scale(self, factor: Union[float, int, Tuple[float, float], Point]) -> Mask:
40 | """
41 | Resizes the mask according to `factor`. The scaling factor can either be
42 | a scalar (`int` or `float`), in which case the mask will be scaled by
43 | the same factor on both axes, or a point-like (`Tuple[float, float]`
44 | or `Point`), in which case the mask will be scaled independently on each
45 | axis.
46 | """
47 | return Mask([p.scale(factor) for p in self.polygons])
48 |
49 | def to_json(self) -> MaskJson:
50 | """
51 | Serializes this object as a `MaskJson`.
52 | """
53 | return [polygon.to_json() for polygon in self.polygons]
54 |
55 | def assert_valid(self) -> None:
56 | """
57 | Asserts that this mask is valid on the unit plane.
58 | """
59 | for polygon in self.polygons:
60 | polygon.assert_valid()
61 | # TODO(mdsavage): check for invalid polygon intersections?
62 |
63 | def __repr__(self) -> str:
64 | return basic_repr("Mask", self.polygons)
65 |
66 | def __eq__(self, other: object) -> bool:
67 | # TODO(mdsavage): currently, this requires the polygons to be in the same order, not just represent the same mask
68 | if not isinstance(other, Mask):
69 | return NotImplemented
70 | return self.polygons == other.polygons
71 |
72 | def __iter__(self) -> Generator[Polygon, None, None]:
73 | yield from self.polygons
74 |
--------------------------------------------------------------------------------
/datatap/geometry/point.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Tuple, Union
4 |
5 | from ..utils import basic_repr
6 |
7 | PointJson = Tuple[float, float]
8 |
9 | class Point:
10 | """
11 | A point in 2D space. Also often used to represent a 2D vector.
12 | """
13 |
14 | x: float
15 | """
16 | The x-coordinate of the point.
17 | """
18 |
19 | y: float
20 | """
21 | The y-coordinate of the point.
22 | """
23 |
24 | @staticmethod
25 | def from_json(json: PointJson) -> Point:
26 | """
27 | Creates a `Point` from a `PointJson`.
28 | """
29 | return Point(json[0], json[1])
30 |
31 | def __init__(self, x: float, y: float, clip: bool = False):
32 | self.x = min(max(x, 0), 1) if clip else x
33 | self.y = min(max(y, 0), 1) if clip else y
34 |
35 | def to_json(self) -> PointJson:
36 | """
37 | Serializes this object as a `PointJson`.
38 | """
39 | return (self.x, self.y)
40 |
41 | def distance(self, other: Point) -> float:
42 | """
43 | Computes the scalar distance to another point.
44 | """
45 | return ((self.x - other.x) ** 2 + (self.y - other.y) ** 2) ** 0.5
46 |
47 | def assert_valid(self) -> None:
48 | """
49 | Asserts that this polygon is valid on the unit plane.
50 | """
51 | assert 0 <= self.x <= 1 and 0 <= self.y <= 1, f"Point coordinates must be between 0 and 1; failed on point {repr(self)}"
52 |
53 | def clip(self) -> Point:
54 | """
55 | Clips both coordinates of this point to the range [0, 1].
56 | """
57 | return Point(self.x, self.y, clip = True)
58 |
59 | def scale(self, factor: Union[float, int, Tuple[float, float], Point]) -> Point:
60 | """
61 | Resizes the point according to `factor`. The scaling factor can either
62 | be a scalar (`int` or `float`), in which case the point will be scaled
63 | by the same factor on both axes, or a point-like (`Tuple[float, float]`
64 | or `Point`), in which case the point will be scaled independently on
65 | each axis.
66 | """
67 | if isinstance(factor, (float, int)):
68 | return self * factor
69 | if isinstance(factor, tuple):
70 | return Point(self.x * factor[0], self.y * factor[1])
71 | return Point(self.x * factor.x, self.x * factor.y)
72 |
73 | def __add__(self, o: Point) -> Point:
74 | if isinstance(o, Point): # type: ignore - pyright complains about the isinstance check being redundant
75 | return Point(self.x + o.x, self.y + o.y)
76 | return NotImplemented
77 |
78 | def __sub__(self, o: Point) -> Point:
79 | if isinstance(o, Point): # type: ignore - pyright complains about the isinstance check being redundant
80 | return Point(self.x - o.x, self.y - o.y)
81 | return NotImplemented
82 |
83 | def __mul__(self, o: Union[int, float]) -> Point:
84 | if isinstance(o, (int, float)): # type: ignore - pyright complains about the isinstance check being redundant
85 | return Point(self.x * o, self.y * o)
86 | return NotImplemented
87 |
88 | def __truediv__(self, o: Union[int, float]) -> Point:
89 | if isinstance(o, (int, float)): # type: ignore - pyright complains about the isinstance check being redundant
90 | return Point(self.x / o, self.y / o)
91 | return NotImplemented
92 |
93 | def __repr__(self) -> str:
94 | return basic_repr("Point", self.x, self.y)
95 |
96 | def __hash__(self) -> int:
97 | return hash((self.x, self.y))
98 |
99 | def __eq__(self, other: object) -> bool:
100 | if isinstance(other, Point):
101 | return self.x == other.x and self.y == other.y
102 | return NotImplemented
103 |
--------------------------------------------------------------------------------
/datatap/geometry/polygon.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Generator, Sequence, Tuple, Union
4 |
5 | from .point import Point, PointJson
6 | from ..utils import basic_repr
7 |
8 | PolygonJson = Sequence[PointJson]
9 |
10 | class Polygon:
11 | """
12 | A polygon in 2D space.
13 | """
14 |
15 | points: Sequence[Point]
16 | """
17 | The vertices of this polygon.
18 | """
19 |
20 | @staticmethod
21 | def from_json(json: PolygonJson) -> Polygon:
22 | """
23 | Creates a `Polygon` from a `PolygonJson`.
24 | """
25 | return Polygon([Point.from_json(pt) for pt in json])
26 |
27 | def __init__(self, points: Sequence[Point]):
28 | self.points = points
29 |
30 | if len(self.points) < 3:
31 | raise ValueError(f"A polygon must have at least three points; failed on polygon {repr(self)}")
32 |
33 | def scale(self, factor: Union[float, int, Tuple[float, float], Point]) -> Polygon:
34 | """
35 | Resizes the polygon according to `factor`. The scaling factor can either
36 | be a scalar (`int` or `float`), in which case the polygon will be scaled
37 | by the same factor on both axes, or a point-like (`Tuple[float, float]`
38 | or `Point`), in which case the polygon will be scaled independently on
39 | each axis.
40 | """
41 | return Polygon([p.scale(factor) for p in self.points])
42 |
43 | def to_json(self) -> PolygonJson:
44 | """
45 | Serializes this object as a `PolygonJson`.
46 | """
47 | return [point.to_json() for point in self.points]
48 |
49 | def assert_valid(self) -> None:
50 | """
51 | Ensures that this polygon is valid on the unit plane.
52 | """
53 | for point in self.points:
54 | point.assert_valid()
55 | # TODO(mdsavage): check for self-intersection?
56 |
57 | def __repr__(self) -> str:
58 | return basic_repr("Polygon", self.points)
59 |
60 | def __eq__(self, other: object) -> bool:
61 | # TODO(mdsavage): currently, this requires the points to be in the same order, not just represent the same polygon
62 | if not isinstance(other, Polygon):
63 | return NotImplemented
64 | return self.points == other.points
65 |
66 | def __mul__(self, o: Union[int, float]) -> Polygon:
67 | if not isinstance(o, (int, float)): # type: ignore - pyright complains about the isinstance check being redundant
68 | return NotImplemented
69 | return Polygon([p * o for p in self.points])
70 |
71 | def __iter__(self) -> Generator[Point, None, None]:
72 | yield from self.points
--------------------------------------------------------------------------------
/datatap/geometry/rectangle.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from shapely.geometry import box, Polygon as ShapelyPolygon
4 | from typing import Sequence, Tuple, Union
5 |
6 | from .point import Point, PointJson
7 | from ..utils import basic_repr
8 |
9 | RectangleJson = Tuple[PointJson, PointJson]
10 |
11 | class Rectangle:
12 | """
13 | An axis-aligned rectangle in 2D space.
14 | """
15 |
16 | p1: Point
17 | """
18 | The top-left corner of the rectangle.
19 | """
20 |
21 | p2: Point
22 | """
23 | The bottom-right corner of the rectangle.
24 | """
25 |
26 | @staticmethod
27 | def from_json(json: RectangleJson) -> Rectangle:
28 | """
29 | Creates a `Rectangle` from a `RectangleJson`.
30 | """
31 | return Rectangle(Point.from_json(json[0]), Point.from_json(json[1]))
32 |
33 | @staticmethod
34 | def from_point_set(points: Sequence[Point]) -> Rectangle:
35 | """
36 | Creates the bounding rectangle of a set of points.
37 |
38 | Note, it is possible for this to create an invalid rectangle if all points
39 | are colinear and axis-aligned.
40 | """
41 | return Rectangle(
42 | Point(min(p.x for p in points), min(p.y for p in points)),
43 | Point(max(p.x for p in points), max(p.y for p in points))
44 | )
45 |
46 | def __init__(self, p1: Point, p2: Point, normalize: bool = False):
47 | if normalize:
48 | self.p1 = Point(min(p1.x, p2.x), min(p1.y, p2.y))
49 | self.p2 = Point(max(p1.x, p2.x), max(p1.y, p2.y))
50 | else:
51 | self.p1 = p1
52 | self.p2 = p2
53 |
54 | def assert_valid(self) -> None:
55 | """
56 | Ensures that this rectangle is valid on the unit plane.
57 | """
58 | self.p1.assert_valid()
59 | self.p2.assert_valid()
60 | assert self.p1.x < self.p2.x and self.p1.y < self.p2.y, f"Rectangle has non-positive area; failed on rectangle {repr(self)}"
61 |
62 | def to_json(self) -> RectangleJson:
63 | """
64 | Serializes this object as a `RectangleJson`.
65 | """
66 | return (self.p1.to_json(), self.p2.to_json())
67 |
68 | def to_shapely(self) -> ShapelyPolygon:
69 | """
70 | Converts this rectangle into a `Shapely.Polygon`.
71 | """
72 | return box(self.p1.x, self.p1.y, self.p2.x, self.p2.y)
73 |
74 | def to_xywh_tuple(self) -> Tuple[float, float, float, float]:
75 | """
76 | Converts this rectangle into a tuple of `(x_coordinate, y_coordinate, width, height)`.
77 | """
78 | w = self.p2.x - self.p1.x
79 | h = self.p2.y - self.p1.y
80 | return (self.p1.x, self.p1.y, w, h)
81 |
82 | def to_xyxy_tuple(self) -> Tuple[float, float, float, float]:
83 | """
84 | Converts this rectangle into a tuple of `(x_min, y_min, x_max, y_max)`.
85 | """
86 | return (self.p1.x, self.p1.y, self.p2.x, self.p2.y)
87 |
88 | def area(self) -> float:
89 | """
90 | Computes the area of this rectangle.
91 | """
92 | return abs(self.p1.x - self.p2.x) * abs(self.p1.y - self.p2.y)
93 |
94 | def iou(self, other: Rectangle) -> float:
95 | """
96 | Computes the iou (intersection-over-union) of this rectangle with another.
97 | """
98 | x1 = max(self.p1.x, other.p1.x)
99 | y1 = max(self.p1.y, other.p1.y)
100 | x2 = min(self.p2.x, other.p2.x)
101 | y2 = min(self.p2.y, other.p2.y)
102 | intersection_area = max(x2 - x1, 0) * max(y2 - y1, 0)
103 | union_area = self.area() + other.area() - intersection_area
104 | return intersection_area / union_area
105 |
106 | def diagonal(self) -> float:
107 | """
108 | Computes the diagonal length of this rectangle.
109 | """
110 | return self.p1.distance(self.p2)
111 |
112 | def scale(self, factor: Union[float, int, Tuple[float, float], Point]):
113 | """
114 | Resizes the rectangle according to `factor`. The scaling factor can
115 | either be a scalar (`int` or `float`), in which case the rectangle will
116 | be scaled by the same factor on both axes, or a point-like
117 | (`Tuple[float, float]` or `Point`), in which case the rectangle will be
118 | scaled independently on each axis.
119 | """
120 | return Rectangle(self.p1.scale(factor), self.p2.scale(factor))
121 |
122 | def center(self) -> Point:
123 | """
124 | Computes the center of this rectangle.
125 | """
126 | return Point((self.p1.x + self.p2.x) / 2, (self.p1.y + self.p2.y) / 2)
127 |
128 | def scale_from_center(self, factor: Union[float, int, Tuple[float, float], Point]) -> Rectangle:
129 | """
130 | Resizes the rectangle according to `factor`, though translates it so
131 | that its center does not move. The scaling factor can either be a scalar
132 | (`int` or `float`), in which case the rectangle will be scaled by the
133 | same factor on both axes, or a point-like (`Tuple[float, float]` or
134 | `Point`), in which case the rectangle will be scaled independently on
135 | each axis.
136 | """
137 | center = self.center()
138 | return Rectangle(
139 | (self.p1 - center).scale(factor) + center,
140 | (self.p2 - center).scale(factor) + center
141 | )
142 |
143 | def clip(self) -> Rectangle:
144 | """
145 | Clips the rectangle the unit-plane.
146 | """
147 | return Rectangle(self.p1.clip(), self.p2.clip())
148 |
149 | def normalize(self) -> Rectangle:
150 | """
151 | Returns a new rectangle that is guaranteed to have `p1` be the top left
152 | corner and `p2` be the bottom right corner.
153 | """
154 | return Rectangle(self.p1, self.p2, True)
155 |
156 | def __repr__(self) -> str:
157 | return basic_repr("Rectangle", self.p1, self.p2)
158 |
159 | def __hash__(self) -> int:
160 | return hash((self.p1, self.p2))
161 |
162 | def __eq__(self, other: object) -> bool:
163 | if not isinstance(other, Rectangle):
164 | return NotImplemented
165 | return self.p1 == other.p1 and self.p2 == other.p2
166 |
167 | def __mul__(self, o: Union[int, float]) -> Rectangle:
168 | if isinstance(o, (int, float)): # type: ignore - pyright complains about the isinstance check being redundant
169 | return Rectangle(self.p1 * o, self.p2 * o)
170 | return NotImplemented
171 |
--------------------------------------------------------------------------------
/datatap/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The metrics module provides a number of utilities for analyzing droplets in the context
3 | of a broader training or evaluation job.
4 |
5 | Here are some examples of the metrics module
6 |
7 | ```py
8 | from datatap import Api, metrics
9 | from my_model import model
10 |
11 | api = Api()
12 | dataset = api.get_default_database().get_dataset_list()[0]
13 | latest_version = dataset.latest_version
14 |
15 | confusion_matrix = metrics.ConfusionMatrix(latest_version.template.classes.keys())
16 | pr_curve = metrics.PrecisionRecallCurve()
17 |
18 | for annotation in latest_version.stream_split("validation"):
19 | prediction = model(annotation)
20 | confusion_matrix.add_annotation(annotation, prediction, 0.5, 0.5)
21 | pr_curve.add_annotation(annotation, prediction, 0.5)
22 |
23 | print(confusion_matrix.matrix)
24 | print(pr_curve.maximize_f1())
25 | ```
26 | """
27 |
28 | from .confusion_matrix import ConfusionMatrix
29 | from .precision_recall_curve import PrecisionRecallCurve, MaximizeF1Result
30 | from .iou import generate_confusion_matrix, generate_pr_curve
31 |
32 | __all__ = [
33 | "ConfusionMatrix",
34 | "PrecisionRecallCurve",
35 | "MaximizeF1Result",
36 | "generate_confusion_matrix",
37 | "generate_pr_curve",
38 | ]
--------------------------------------------------------------------------------
/datatap/metrics/_types.py:
--------------------------------------------------------------------------------
1 | from typing import NamedTuple
2 |
3 | from datatap.geometry import Rectangle
4 |
5 | class PredictionBox(NamedTuple):
6 | confidence: float
7 | class_name: str
8 | box: Rectangle
9 |
10 | class GroundTruthBox(NamedTuple):
11 | class_name: str
12 | box: Rectangle
13 |
14 |
--------------------------------------------------------------------------------
/datatap/metrics/confusion_matrix.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | from collections import defaultdict
3 |
4 | from typing import DefaultDict, Iterable, Mapping, Optional, Sequence, cast
5 |
6 | import numpy as np
7 | from scipy.optimize import linear_sum_assignment
8 |
9 | from datatap.droplet import ImageAnnotation
10 |
11 | from ._types import GroundTruthBox, PredictionBox
12 |
13 | class ConfusionMatrix:
14 | """
15 | Represents a confusion matrix for a collection of annotations.
16 | This class will handle the matching of instances in a ground truth annotations
17 | to instances in a set of matching prediction annotations.
18 | """
19 |
20 | # TODO(mdsavage): make this accept matching strategies other than bounding box IOU
21 |
22 | classes: Sequence[str]
23 | """
24 | A list of the classes that this confusion matrix is tracking.
25 | """
26 |
27 | matrix: np.ndarray
28 | """
29 | The current confusion matrix. Entry `(i, j)` represents the number of times that
30 | an instance of `self.classes[i]` was classified as an instance of `self.classes[j]`
31 | """
32 |
33 | _class_map: Mapping[str, int]
34 |
35 | def __init__(self, classes: Sequence[str], matrix: Optional[np.ndarray] = None):
36 | self.classes = ["__background__"] + list(classes)
37 | self._class_map = dict([(class_name, index) for index, class_name in enumerate(self.classes)])
38 | dim = len(self.classes)
39 | self.matrix = matrix if matrix is not None else np.zeros((dim, dim))
40 |
41 | def add_annotation(
42 | self: ConfusionMatrix,
43 | ground_truth: ImageAnnotation,
44 | prediction: ImageAnnotation,
45 | iou_threshold: float,
46 | confidence_threshold: float
47 | ) -> None:
48 | """
49 | Updates this confusion matrix for the given ground truth and prediction annotations evaluated with the given IOU
50 | threshold, only considering instances meeting the given confidence threshold.
51 |
52 | Note: this handles instances only; multi-instances are ignored.
53 | """
54 | ground_truth_boxes = [
55 | GroundTruthBox(class_name, instance.bounding_box.rectangle)
56 | for class_name in ground_truth.classes.keys()
57 | for instance in ground_truth.classes[class_name].instances
58 | if instance.bounding_box is not None
59 | ]
60 |
61 | prediction_boxes = sorted([
62 | PredictionBox(instance.bounding_box.confidence or 1, class_name, instance.bounding_box.rectangle)
63 | for class_name in prediction.classes.keys()
64 | for instance in prediction.classes[class_name].instances
65 | if instance.bounding_box is not None and instance.bounding_box.meets_confidence_threshold(confidence_threshold)
66 | ], reverse = True, key = lambda p: p.confidence)
67 |
68 | iou_matrix = np.array([
69 | [ground_truth_box.box.iou(prediction_box.box) for ground_truth_box in ground_truth_boxes]
70 | for prediction_box in prediction_boxes
71 | ], ndmin = 2)
72 |
73 | prediction_indices, ground_truth_indices = linear_sum_assignment(iou_matrix, maximize = True)
74 |
75 | unmatched_ground_truth_box_counts: DefaultDict[str, int] = defaultdict(lambda: 0)
76 | unmatched_prediction_box_counts: DefaultDict[str, int] = defaultdict(lambda: 0)
77 |
78 | for box in ground_truth_boxes:
79 | unmatched_ground_truth_box_counts[box.class_name] += 1
80 |
81 | for box in prediction_boxes:
82 | unmatched_prediction_box_counts[box.class_name] += 1
83 |
84 | for prediction_index, ground_truth_index in zip(cast(Iterable[int], prediction_indices), cast(Iterable[int], ground_truth_indices)):
85 | if iou_matrix[prediction_index, ground_truth_index] >= iou_threshold:
86 | ground_truth_box = ground_truth_boxes[ground_truth_index]
87 | prediction_box = prediction_boxes[prediction_index]
88 | self._add_detection(ground_truth_box.class_name, prediction_box.class_name)
89 | unmatched_ground_truth_box_counts[ground_truth_box.class_name] -= 1
90 | unmatched_prediction_box_counts[prediction_box.class_name] -= 1
91 |
92 | for class_name, count in unmatched_ground_truth_box_counts.items():
93 | if count > 0:
94 | self._add_false_negative(class_name, count = count)
95 |
96 | for class_name, count in unmatched_prediction_box_counts.items():
97 | if count > 0:
98 | self._add_false_positive(class_name, count = count)
99 |
100 | def batch_add_annotation(
101 | self: ConfusionMatrix,
102 | ground_truths: Sequence[ImageAnnotation],
103 | predictions: Sequence[ImageAnnotation],
104 | iou_threshold: float,
105 | confidence_threshold: float
106 | ) -> None:
107 | """
108 | Updates this confusion matrix with the values from several annotations simultaneously.
109 | """
110 | for ground_truth, prediction in zip(ground_truths, predictions):
111 | self.add_annotation(
112 | ground_truth,
113 | prediction,
114 | iou_threshold,
115 | confidence_threshold
116 | )
117 |
118 | def _add_detection(self, ground_truth_class: str, prediction_class: str, count: int = 1) -> None:
119 | r = self._class_map[ground_truth_class]
120 | c = self._class_map[prediction_class]
121 | self.matrix[r, c] += count
122 |
123 | def _add_false_negative(self, ground_truth_class: str, count: int = 1) -> None:
124 | self._add_detection(ground_truth_class, "__background__", count)
125 |
126 | def _add_false_positive(self, ground_truth_class: str, count: int = 1) -> None:
127 | self._add_detection("__background__", ground_truth_class, count)
128 |
129 |
130 | def __add__(self, other: ConfusionMatrix) -> ConfusionMatrix:
131 | if isinstance(other, ConfusionMatrix): # type: ignore - pyright complains about the isinstance check being redundant
132 | return ConfusionMatrix(self.classes, cast(np.ndarray, self.matrix + other.matrix))
133 | return NotImplemented
134 |
--------------------------------------------------------------------------------
/datatap/metrics/iou.py:
--------------------------------------------------------------------------------
1 | from datatap.metrics.confusion_matrix import ConfusionMatrix
2 | from typing import Sequence
3 |
4 | from ..droplet import ImageAnnotation
5 | from ..template import ImageAnnotationTemplate
6 | from .precision_recall_curve import PrecisionRecallCurve
7 |
8 |
9 | def generate_pr_curve(ground_truths: Sequence[ImageAnnotation], predictions: Sequence[ImageAnnotation], iou_threshold: float) -> PrecisionRecallCurve:
10 | """
11 | Returns a precision-recall curve for the given ground truth and prediction annotation lists evaluated with the given
12 | IOU threshold.
13 |
14 | Note: this handles instances only; multi-instances are ignored.
15 | """
16 | precision_recall_curve = PrecisionRecallCurve()
17 | precision_recall_curve.batch_add_annotation(ground_truths, predictions, iou_threshold)
18 | return precision_recall_curve
19 |
20 | def generate_confusion_matrix(
21 | template: ImageAnnotationTemplate,
22 | ground_truths: Sequence[ImageAnnotation],
23 | predictions: Sequence[ImageAnnotation],
24 | iou_threshold: float,
25 | confidence_threshold: float
26 | ) -> ConfusionMatrix:
27 | """
28 | Returns a confusion matrix for the given ground truth and prediction annotation lists evaluated with the given IOU
29 | threshold.
30 |
31 | Note: this handles instances only; multi-instances are ignored.
32 | """
33 | confusion_matrix = ConfusionMatrix(sorted(template.classes.keys()))
34 | confusion_matrix.batch_add_annotation(ground_truths, predictions, iou_threshold, confidence_threshold)
35 | return confusion_matrix
36 |
--------------------------------------------------------------------------------
/datatap/metrics/precision_recall_curve.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Iterable, Sequence, TYPE_CHECKING, List, NamedTuple, Optional, cast
4 |
5 | import numpy as np
6 | from scipy.optimize import linear_sum_assignment
7 | from sortedcontainers import SortedDict
8 |
9 | from datatap.droplet import ImageAnnotation
10 |
11 | from ._types import GroundTruthBox, PredictionBox
12 |
13 | if TYPE_CHECKING:
14 | import matplotlib.pyplot as plt
15 |
16 | class MaximizeF1Result(NamedTuple):
17 | """
18 | Represents the precision, recall, and f1 for a given `PrecisionRecallCurve`
19 | at the threshold that maximizes f1.
20 | """
21 | threshold: float
22 | precision: float
23 | recall: float
24 | f1: float
25 |
26 | class _PrecisionRecallPoint(NamedTuple):
27 | threshold: float
28 | precision: float
29 | recall: float
30 |
31 | class _DetectionEvent(NamedTuple):
32 | true_positive_delta: int
33 | false_positive_delta: int
34 |
35 | def __add__(self, other: _DetectionEvent) -> _DetectionEvent:
36 | if isinstance(other, _DetectionEvent): # type: ignore - pyright complains about the isinstance check being redundant
37 | return _DetectionEvent(self.true_positive_delta + other.true_positive_delta, self.false_positive_delta + other.false_positive_delta)
38 | return NotImplemented
39 |
40 |
41 | class PrecisionRecallCurve:
42 | """
43 | Represents a curve relating a chosen detection threshold to precision and recall. Internally, this is actually
44 | stored as a sorted list of detection events, which are used to compute metrics on the fly when needed.
45 | """
46 |
47 | # TODO(mdsavage): make this accept matching strategies other than bounding box IOU
48 |
49 | events: SortedDict[float, _DetectionEvent]
50 | ground_truth_positives: int
51 |
52 | def __init__(self, events: Optional[SortedDict[float, _DetectionEvent]] = None, ground_truth_positives: int = 0):
53 | self.events = SortedDict() if events is None else events
54 | self.ground_truth_positives = ground_truth_positives
55 |
56 | def clone(self) -> PrecisionRecallCurve:
57 | return PrecisionRecallCurve(self.events.copy(), self.ground_truth_positives)
58 |
59 | def maximize_f1(self) -> MaximizeF1Result:
60 | maximum = MaximizeF1Result(threshold = 1, precision = 0, recall = 0, f1 = 0)
61 |
62 | for threshold, precision, recall in self._compute_curve():
63 | f1 = 2 / ((1 / precision) + (1 / recall)) if precision > 0 and recall > 0 else 0
64 | if f1 >= maximum.f1:
65 | maximum = MaximizeF1Result(threshold = threshold, precision = precision, recall = recall, f1 = f1)
66 |
67 | return maximum
68 |
69 | def plot(self) -> plt.Figure:
70 | import matplotlib.pyplot as plt
71 | fig = plt.figure()
72 | curve = self._compute_curve()
73 | plt.plot([pt.recall for pt in curve], [pt.precision for pt in curve], "o-")
74 | plt.xlabel("Recall")
75 | plt.ylabel("Precision")
76 | return fig
77 |
78 | def add_annotation(
79 | self: PrecisionRecallCurve,
80 | ground_truth: ImageAnnotation,
81 | prediction: ImageAnnotation,
82 | iou_threshold: float
83 | ) -> None:
84 | """
85 | Returns a precision-recall curve for the given ground truth and prediction annotations evaluated with the given
86 | IOU threshold.
87 |
88 | Note: this handles instances only; multi-instances are ignored.
89 | """
90 | ground_truth_boxes = [
91 | GroundTruthBox(class_name, instance.bounding_box.rectangle)
92 | for class_name in ground_truth.classes.keys()
93 | for instance in ground_truth.classes[class_name].instances
94 | if instance.bounding_box is not None
95 | ]
96 |
97 | prediction_boxes = sorted([
98 | PredictionBox(instance.bounding_box.confidence or 1, class_name, instance.bounding_box.rectangle)
99 | for class_name in prediction.classes.keys()
100 | for instance in prediction.classes[class_name].instances
101 | if instance.bounding_box is not None
102 | ], reverse = True, key = lambda p: p.confidence)
103 |
104 | iou_matrix = np.array([
105 | [ground_truth_box.box.iou(prediction_box.box) for ground_truth_box in ground_truth_boxes]
106 | for prediction_box in prediction_boxes
107 | ])
108 |
109 | self._add_ground_truth_positives(len(ground_truth_boxes))
110 |
111 | previous_true_positives = 0
112 | previous_false_positives = 0
113 |
114 | for i in range(len(prediction_boxes)):
115 | confidence_threshold = prediction_boxes[i].confidence
116 |
117 | if i < len(prediction_boxes) - 1 and prediction_boxes[i+1].confidence == confidence_threshold:
118 | continue
119 |
120 | prediction_indices, ground_truth_indices = linear_sum_assignment(iou_matrix[:i+1,], maximize = True)
121 |
122 | true_positives = 0
123 | false_positives = max(0, i + 1 - len(ground_truth_boxes))
124 |
125 | for prediction_index, ground_truth_index in zip(cast(Iterable[int], prediction_indices), cast(Iterable[int], ground_truth_indices)):
126 | if (
127 | iou_matrix[prediction_index, ground_truth_index] >= iou_threshold
128 | and prediction_boxes[prediction_index].class_name == ground_truth_boxes[ground_truth_index].class_name
129 | ):
130 | true_positives += 1
131 | else:
132 | false_positives += 1
133 |
134 | self._add_event(confidence_threshold, _DetectionEvent(
135 | true_positive_delta = true_positives - previous_true_positives,
136 | false_positive_delta = false_positives - previous_false_positives
137 | ))
138 |
139 | previous_true_positives = true_positives
140 | previous_false_positives = false_positives
141 |
142 | def batch_add_annotation(
143 | self: PrecisionRecallCurve,
144 | ground_truths: Sequence[ImageAnnotation],
145 | predictions: Sequence[ImageAnnotation],
146 | iou_threshold: float
147 | ) -> None:
148 | """
149 | Updates this precision-recall curve with the values from several annotations simultaneously.
150 | """
151 | for ground_truth, prediction in zip(ground_truths, predictions):
152 | self.add_annotation(ground_truth, prediction, iou_threshold)
153 |
154 | def _compute_curve(self) -> List[_PrecisionRecallPoint]:
155 | assert self.ground_truth_positives > 0
156 | precision_recall_points: List[_PrecisionRecallPoint] = []
157 |
158 | true_positives = 0
159 | detections = 0
160 |
161 | for threshold in reversed(self.events):
162 | true_positive_delta, false_positive_delta = self.events[threshold]
163 | true_positives += true_positive_delta
164 | detections += true_positive_delta + false_positive_delta
165 | assert detections > 0
166 |
167 | precision_recall_points.append(_PrecisionRecallPoint(
168 | threshold = threshold,
169 | precision = true_positives / detections,
170 | recall = true_positives / self.ground_truth_positives
171 | ))
172 |
173 | return precision_recall_points
174 |
175 | def _add_event(self, threshold: float, event: _DetectionEvent) -> None:
176 | if threshold not in self.events:
177 | self.events[threshold] = _DetectionEvent(0, 0)
178 | self.events[threshold] += event
179 |
180 | def _add_ground_truth_positives(self, count: int) -> None:
181 | self.ground_truth_positives += count
182 |
183 | def __add__(self, other: PrecisionRecallCurve) -> PrecisionRecallCurve:
184 | if isinstance(other, PrecisionRecallCurve): # type: ignore - pyright complains about the isinstance check being redundant
185 | ret = self.clone()
186 | ret._add_ground_truth_positives(other.ground_truth_positives)
187 |
188 | for threshold, event in other.events.items():
189 | ret._add_event(threshold, event)
190 |
191 | return ret
192 | return NotImplemented
193 |
194 |
--------------------------------------------------------------------------------
/datatap/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zensors/datatap-python/1a05b354f20bbbc24b9bad76e919e0cc9542538c/datatap/py.typed
--------------------------------------------------------------------------------
/datatap/template/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Templates are used to describe how a given annotation (or set of annotations) is structured.
3 |
4 | All `Dataset`s and `DatasetVersion`s will have templates attached to them. If you need to
5 | create your own template (for instance, in order to create a new dataset), you can
6 | instantiate them as such:
7 |
8 | ```py
9 | from datatap.template import ImageAnnotationTemplate, ClassAnnotationTemplate, InstanceTemplate
10 |
11 | ImageAnnotationTemplate(classes = {
12 | "person": ClassAnnotationTemplate(
13 | instances = InstanceTemplate(
14 | bounding_box = True,
15 | segmentation = False, # this could also be omitted, since False is the default
16 | keypoints = { "head", "left shoulder", "right shoulder" },
17 | attributes = { "face mask": { "present", "absent" } }
18 | )
19 | )
20 | })
21 | ```
22 | """
23 |
24 |
25 | from .class_annotation_template import ClassAnnotationTemplate
26 | from .frame_annotation_template import FrameAnnotationTemplate
27 | from .image_annotation_template import ImageAnnotationTemplate
28 | from .instance_template import InstanceTemplate
29 | from .multi_instance_template import MultiInstanceTemplate
30 | from .video_annotation_template import VideoAnnotationTemplate
31 |
32 | __all__ = [
33 | "ClassAnnotationTemplate",
34 | "FrameAnnotationTemplate",
35 | "ImageAnnotationTemplate",
36 | "InstanceTemplate",
37 | "MultiInstanceTemplate",
38 | "VideoAnnotationTemplate",
39 | ]
40 |
--------------------------------------------------------------------------------
/datatap/template/class_annotation_template.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Optional
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..utils import basic_repr
8 | from .instance_template import InstanceTemplate, InstanceTemplateJson
9 | from .multi_instance_template import MultiInstanceTemplate, MultiInstanceTemplateJson
10 |
11 | class ClassAnnotationTemplateJson(TypedDict, total=False):
12 | """
13 | The serialized JSON representation of a class annotation template.
14 | """
15 |
16 | instances: InstanceTemplateJson
17 | multiInstances: MultiInstanceTemplateJson
18 |
19 | class ClassAnnotationTemplate():
20 | """
21 | A `ClassAnnotationTemplate` describes what each class should provide.
22 |
23 | In practice, most of the specification is delegated to its constituent tepmlates,
24 | `instances` and `multi_instances`.
25 | """
26 |
27 | instances: Optional[InstanceTemplate]
28 | """
29 | An `InstanceTemplate` that describes how instances are structured.
30 | """
31 |
32 | multi_instances: Optional[MultiInstanceTemplate]
33 | """
34 | A `MultiInstanceTemplate` that describes how multi instances are structured.
35 | """
36 |
37 | def __init__(
38 | self,
39 | *,
40 | instances: Optional[InstanceTemplate] = None,
41 | multi_instances: Optional[MultiInstanceTemplate] = None
42 | ):
43 | self.instances = instances
44 | self.multi_instances = multi_instances
45 |
46 | def to_json(self) -> ClassAnnotationTemplateJson:
47 | """
48 | Serializes this object into JSON.
49 | """
50 | json = ClassAnnotationTemplateJson()
51 | if self.instances is not None: json["instances"] = self.instances.to_json()
52 | if self.multi_instances is not None: json["multiInstances"] = self.multi_instances.to_json()
53 | return json
54 |
55 | @staticmethod
56 | def from_json(json: ClassAnnotationTemplateJson) -> ClassAnnotationTemplate:
57 | """
58 | Deserializes a JSON object into a `ClassAnnotationTemplate`.
59 | """
60 | instances = InstanceTemplate.from_json(json["instances"]) if "instances" in json else None
61 | multi_instances = MultiInstanceTemplate.from_json(json["multiInstances"]) if "multiInstances" in json else None
62 | return ClassAnnotationTemplate(instances=instances, multi_instances=multi_instances)
63 |
64 | def __repr__(self) -> str:
65 | return basic_repr(
66 | "ClassAnnotationTemplate",
67 | instances = self.instances,
68 | multi_instances = self.multi_instances
69 | )
70 |
--------------------------------------------------------------------------------
/datatap/template/frame_annotation_template.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Dict, Mapping
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..utils import basic_repr
8 | from .class_annotation_template import ClassAnnotationTemplate, ClassAnnotationTemplateJson
9 |
10 | class FrameAnnotationTemplateJson(TypedDict):
11 | """
12 | The serialized JSON representation of a frame annotation template.
13 | """
14 |
15 | classes: Dict[str, ClassAnnotationTemplateJson]
16 |
17 | class FrameAnnotationTemplate():
18 | """
19 | Describes how a `FrameAnnotation` is structured.
20 |
21 | For each of its classes, it provides a `ClassAnnotationTemplate`.
22 | """
23 |
24 | classes: Mapping[str, ClassAnnotationTemplate]
25 | """
26 | A mapping from class name to `ClassAnnotationTemplate`.
27 | """
28 |
29 | def __init__(self, *, classes: Mapping[str, ClassAnnotationTemplate]):
30 | self.classes = classes
31 |
32 | def to_json(self) -> FrameAnnotationTemplateJson:
33 | """
34 | Serializes this object to JSON.
35 | """
36 | return {
37 | "classes": {
38 | class_name: class_template.to_json()
39 | for class_name, class_template in self.classes.items()
40 | }
41 | }
42 |
43 | @staticmethod
44 | def from_json(json: FrameAnnotationTemplateJson) -> FrameAnnotationTemplate:
45 | """
46 | Deserializes a JSON object into a `FrameAnnotationTemplate`.
47 | """
48 | classes = {
49 | key: ClassAnnotationTemplate.from_json(value)
50 | for key, value in json.get("classes", {}).items()
51 | }
52 |
53 | return FrameAnnotationTemplate(classes=classes)
54 |
55 | def __repr__(self) -> str:
56 | return basic_repr(
57 | "FrameAnnotationTemplate",
58 | classes = self.classes
59 | )
60 |
--------------------------------------------------------------------------------
/datatap/template/image_annotation_template.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Dict, Mapping
4 |
5 | from typing_extensions import Literal, TypedDict
6 |
7 | from ..utils import basic_repr
8 | from .class_annotation_template import ClassAnnotationTemplate, ClassAnnotationTemplateJson
9 |
10 | class ImageAnnotationTemplateJson(TypedDict):
11 | """
12 | The serialized JSON representation of an image annotation template.
13 | """
14 |
15 | kind: Literal["ImageAnnotationTemplate"]
16 | classes: Dict[str, ClassAnnotationTemplateJson]
17 |
18 | class ImageAnnotationTemplate():
19 | """
20 | Describes how an `ImageAnnotation` is structured.
21 |
22 | For each of its classes, it provides a `ClassAnnotationTemplate`.
23 | """
24 |
25 | classes: Mapping[str, ClassAnnotationTemplate]
26 | """
27 | A mapping from class name to `ClassAnnotationTemplate`.
28 | """
29 |
30 | def __init__(self, *, classes: Mapping[str, ClassAnnotationTemplate]):
31 | self.classes = classes
32 |
33 | def to_json(self) -> ImageAnnotationTemplateJson:
34 | """
35 | Serializes this object to JSON.
36 | """
37 | return {
38 | "kind": "ImageAnnotationTemplate",
39 | "classes": {
40 | class_name: class_template.to_json()
41 | for class_name, class_template in self.classes.items()
42 | }
43 | }
44 |
45 | @staticmethod
46 | def from_json(json: ImageAnnotationTemplateJson) -> ImageAnnotationTemplate:
47 | """
48 | Deserializes a JSON object into an `ImageAnnotationTemplate`.
49 | """
50 | classes = {
51 | key: ClassAnnotationTemplate.from_json(value)
52 | for key, value in json.get("classes", {}).items()
53 | }
54 |
55 | return ImageAnnotationTemplate(classes=classes)
56 |
57 | def __repr__(self) -> str:
58 | return basic_repr(
59 | "ImageAnnotationTemplate",
60 | classes = self.classes
61 | )
62 |
--------------------------------------------------------------------------------
/datatap/template/instance_template.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import AbstractSet, Dict, List, Mapping
4 |
5 | from typing_extensions import TypedDict
6 |
7 | from ..utils import basic_repr
8 |
9 |
10 | class InstanceTemplateJson(TypedDict, total=False):
11 | """
12 | The serialized JSON representation of an instance template.
13 | """
14 |
15 | id: bool
16 | boundingBox: bool
17 | segmentation: bool
18 | keypoints: List[str]
19 | attributes: Dict[str, List[str]]
20 |
21 | class InstanceTemplate():
22 | """
23 | Describes how an individual instance is structured.
24 | """
25 |
26 | id: bool
27 | """
28 | If `id` is `True`, then all corresponding `Instance`s will have an ID
29 | that uniquely identifies the object represented by the instance in the
30 | context of the containing annotation.
31 | """
32 |
33 | bounding_box: bool
34 | """
35 | If `bounding_box` is `True`, then all corresponding `Instance`s will have a
36 | `BoundingBox` representing the bounds of their shape.
37 | """
38 |
39 | segmentation: bool
40 | """
41 | If `segmentation` is `True`, then all corresponding `Instance`s will have a
42 | `Segmentation` tightly representing their shape.
43 | """
44 |
45 | keypoints: AbstractSet[str]
46 | """
47 | For each keypoint name specified in `keypoints`, all corresponding instances
48 | will have a corresponding key in their `keypoints` field, the value of which
49 | will contain he keypoint if it is present or has an inferrable position in
50 | the image or `None` if it is not in-frame.
51 | """
52 |
53 | attributes: Mapping[str, AbstractSet[str]]
54 | """
55 | For each attribute name specified in `attributes`, all corresponding
56 | `Instance`s will provide one of the given values.
57 | """
58 |
59 | def __init__(
60 | self,
61 | *,
62 | id: bool = False,
63 | bounding_box: bool = False,
64 | segmentation: bool = False,
65 | keypoints: AbstractSet[str] = set(),
66 | attributes: Mapping[str, AbstractSet[str]] = dict(),
67 | ):
68 | self.id = id
69 | self.bounding_box = bounding_box
70 | self.segmentation = segmentation
71 | self.keypoints = keypoints
72 | self.attributes = attributes
73 |
74 | def to_json(self) -> InstanceTemplateJson:
75 | """
76 | Serializes this object as JSON.
77 | """
78 | json = InstanceTemplateJson()
79 |
80 | if self.id: json["id"] = True
81 | if self.bounding_box: json["boundingBox"] = True
82 | if self.segmentation: json["segmentation"] = True
83 | if len(self.keypoints) > 0: json["keypoints"] = list(self.keypoints)
84 | if len(self.attributes) > 0: json["attributes"] = { key: list(values) for key, values in self.attributes.items() }
85 |
86 | return json
87 |
88 | @staticmethod
89 | def from_json(json: InstanceTemplateJson) -> InstanceTemplate:
90 | """
91 | Deserializes a JSON object as an `InstanceTemplate`.
92 | """
93 | id = json.get("id", False)
94 | bounding_box = json.get("boundingBox", False)
95 | segmentation = json.get("segmentation", False)
96 | keypoints = set(json.get("keypoints", []))
97 | attributes = {
98 | key: set(values)
99 | for key, values in json.get("attributes", {}).items()
100 | }
101 | return InstanceTemplate(
102 | id = id,
103 | bounding_box=bounding_box,
104 | segmentation=segmentation,
105 | keypoints=keypoints,
106 | attributes=attributes,
107 | )
108 |
109 | def __repr__(self) -> str:
110 | return basic_repr(
111 | "InstanceTemplate",
112 | id = self.id,
113 | bounding_box = self.bounding_box,
114 | segmentation = self.segmentation,
115 | keypoints = self.keypoints,
116 | attributes = self.attributes,
117 | )
118 |
--------------------------------------------------------------------------------
/datatap/template/multi_instance_template.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing_extensions import TypedDict
4 |
5 | from ..utils import basic_repr
6 |
7 | class MultiInstanceTemplateJson(TypedDict, total=False):
8 | """
9 | The serialized JSON representation of a multi instance template.
10 | """
11 |
12 | boundingBox: bool
13 | segmentation: bool
14 | count: bool
15 |
16 | class MultiInstanceTemplate():
17 | """
18 | Describes how an individual multi-instance is structured.
19 | """
20 |
21 | bounding_box: bool
22 | """
23 | If `bounding_box` is `True`, then all corresponding `MultiInstance`s will
24 | have a `BoundingBox` representing the bounds of their shape.
25 | """
26 |
27 | segmentation: bool
28 | """
29 | If `segmentation` is `True`, then all corresponding `MultiInstance`s will
30 | have a `Segmentation` tightly representing their shape.
31 | """
32 |
33 | count: bool
34 | """
35 | If `count` is `True`, then all corresponding `MultiInstance`s will have a
36 | count of how many true instances are present in the multi-instance.
37 | """
38 |
39 | def __init__(
40 | self,
41 | *,
42 | bounding_box: bool = False,
43 | segmentation: bool = False,
44 | count: bool = False
45 | ):
46 | self.bounding_box = bounding_box
47 | self.segmentation = segmentation
48 | self.count = count
49 |
50 | def to_json(self) -> MultiInstanceTemplateJson:
51 | json = MultiInstanceTemplateJson()
52 | if self.bounding_box: json["boundingBox"] = True
53 | if self.segmentation: json["segmentation"] = True
54 | if self.count: json["count"] = True
55 | return json
56 |
57 | @staticmethod
58 | def from_json(json: MultiInstanceTemplateJson) -> MultiInstanceTemplate:
59 | bounding_box = json.get("boundingBox", False)
60 | segmentation = json.get("segmentation", False)
61 | count = json.get("count", False)
62 | return MultiInstanceTemplate(
63 | bounding_box = bounding_box,
64 | segmentation = segmentation,
65 | count = count
66 | )
67 |
68 | def __repr__(self) -> str:
69 | return basic_repr(
70 | "MultiInstanceTemplate",
71 | bounding_box = self.bounding_box,
72 | segmentation = self.segmentation
73 | )
74 |
--------------------------------------------------------------------------------
/datatap/template/video_annotation_template.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing_extensions import Literal, TypedDict
4 |
5 | from ..utils import basic_repr
6 | from .frame_annotation_template import (FrameAnnotationTemplate,
7 | FrameAnnotationTemplateJson)
8 |
9 |
10 | class VideoAnnotationTemplateJson(TypedDict):
11 | """
12 | The serialized JSON representation of a video annotation template.
13 | """
14 |
15 | kind: Literal["VideoAnnotationTemplate"]
16 | frames: FrameAnnotationTemplateJson
17 |
18 | class VideoAnnotationTemplate():
19 | """
20 | Describes how a `VideoAnnotation` is structured.
21 |
22 | It consists only of a `FrameAnnotationTemplate` that describes its frames.
23 | """
24 |
25 | frames: FrameAnnotationTemplate
26 | """
27 | A `FrameAnnotationTemplate` that describes how the frames are structured.
28 | """
29 |
30 | def __init__(self, *, frames: FrameAnnotationTemplate):
31 | self.frames = frames
32 |
33 | def to_json(self) -> VideoAnnotationTemplateJson:
34 | """
35 | Serializes this object to JSON.
36 | """
37 | return {
38 | "kind": "VideoAnnotationTemplate",
39 | "frames": self.frames.to_json()
40 | }
41 |
42 | @staticmethod
43 | def from_json(json: VideoAnnotationTemplateJson) -> VideoAnnotationTemplate:
44 | """
45 | Deserializes a JSON object into a `VideoAnnotationTemplate`.
46 | """
47 | return VideoAnnotationTemplate(
48 | frames = FrameAnnotationTemplate.from_json(json["frames"])
49 | )
50 |
51 | def __repr__(self) -> str:
52 | return basic_repr(
53 | "VideoAnnotationTemplate",
54 | frames = self.frames
55 | )
56 |
--------------------------------------------------------------------------------
/datatap/tf/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The `tf` module provides utilities for using dataTap with Tensorflow.
3 |
4 | Please note that if you want to be able to use this module, you will
5 | either need to install Tensorflow manually, or install dataTap with the
6 | tensorflow extra:
7 |
8 | ```bash
9 | pip install 'datatap[tf]'
10 | ```
11 |
12 | This module exports two helper functions for creating tensorflow datasets.
13 | Here is an example of the single-process one, `create_tf_dataset`:
14 |
15 | ```py
16 | import itertools
17 | from datatap import Api
18 | from datatap.tf import create_dataset
19 |
20 | api = Api()
21 | dataset = api.get_default_database().get_dataset_list()[0]
22 | latest_version = dataset.latest_version
23 |
24 | dataset = create_dataset(latest_version, "training", num_workers = 4)
25 | for (_image, bounding_boxes, labels) in itertools.islice(dataset, 3):
26 | print(bounding_boxes, labels)
27 | ```
28 | """
29 |
30 | from .dataset import create_dataset, create_multi_worker_dataset
31 |
32 | __all__ = [
33 | "create_dataset",
34 | "create_multi_worker_dataset",
35 | ]
--------------------------------------------------------------------------------
/datatap/tf/dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 | from os import cpu_count
3 |
4 | import requests
5 | import functools
6 | from typing import Dict, Optional
7 |
8 | try:
9 | import tensorflow as tf
10 | except ImportError:
11 | tf = {}
12 |
13 | from datatap.api.entities import Dataset
14 |
15 | def _get_class_mapping(dataset: Dataset, class_mapping: Optional[Dict[str, int]] = None):
16 | classes_used = dataset.template.classes.keys()
17 | if class_mapping is not None:
18 | if set(class_mapping.keys()) != set(classes_used):
19 | print(
20 | "[WARNING]: Potentially invalid class mapping. Provided classes ",
21 | set(class_mapping.keys()),
22 | " but needed ",
23 | set(classes_used)
24 | )
25 | return class_mapping
26 | else:
27 | return {
28 | cls: i
29 | for i, cls in enumerate(sorted(classes_used))
30 | }
31 |
32 | def create_dataset(
33 | dataset: Dataset,
34 | split: str,
35 | input_class_mapping: Optional[Dict[str, int]] = None,
36 | num_workers: int = cpu_count() or 1,
37 | input_context: Optional[tf.distribute.InputContext] = None
38 | ):
39 | """
40 | Creates a tensorflow `Dataset` object that will load a specified split of `version`.
41 |
42 | This function handles the necessary `Dataset` operations to parallelize the loading
43 | operation. Since image loading can be slow, it is recommended to have `num_workers`
44 | set to a value greater than 1. By default, it will try to load one image per CPU.
45 |
46 | If you intend to use the dataset across multiple processes or computers, consider
47 | using `create_tf_multi_worker_dataset` instead.
48 | """
49 | class_mapping = _get_class_mapping(dataset, input_class_mapping)
50 |
51 | def gen():
52 | worker_id = input_context.input_pipeline_id if input_context is not None else 0
53 | num_workers = input_context.num_input_pipelines if input_context is not None else 1
54 |
55 | for droplet in dataset.stream_split(split, worker_id, num_workers):
56 | image_url = tf.constant(droplet.image.paths[0])
57 |
58 | bounding_boxes = tf.stack([
59 | tf.constant(i.bounding_box.rectangle.to_xywh_tuple(), shape=(4,), dtype=tf.float64)
60 | for cls in droplet.classes.keys()
61 | for i in droplet.classes[cls].instances
62 | if i.bounding_box is not None
63 | ])
64 |
65 | labels = tf.stack([
66 | tf.constant(class_mapping[cls], dtype=tf.int32)
67 | for cls in droplet.classes.keys()
68 | for _ in droplet.classes[cls].instances
69 | ])
70 |
71 | yield (image_url, bounding_boxes, labels)
72 |
73 | def _load_img_fn(image_url: tf.Tensor):
74 | res = requests.get(image_url.numpy().decode("ascii"))
75 | img = tf.io.decode_jpeg(res.content, channels=3)
76 | return img
77 |
78 | def load_img_fn(image_url: tf.Tensor):
79 | return tf.py_function(_load_img_fn, inp=(image_url,), Tout=(tf.uint8,))
80 |
81 | def map_fn(image_url: tf.Tensor, boxes: tf.Tensor, labels: tf.Tensor):
82 | return (load_img_fn(image_url), boxes, labels)
83 |
84 | ds = tf.data.Dataset.from_generator(
85 | gen,
86 | (tf.string, tf.float64, tf.int32),
87 | (tf.TensorShape(()), tf.TensorShape((None, 4)), (tf.TensorShape((None))))
88 | )
89 |
90 | return ds.map(map_fn, num_parallel_calls=num_workers)
91 |
92 |
93 | def create_multi_worker_dataset(
94 | strategy: tf.distribute.experimental.Strategy,
95 | dataset: Dataset,
96 | split: str,
97 | num_workers: int = cpu_count() or 1,
98 | input_class_mapping: Optional[Dict[str, int]] = None,
99 | ):
100 | """
101 | Creates a multi-worker sharded dataset. In addition to sharding the contents
102 | of the dataset across multiple machines, this function will also attempt to
103 | load the images across several workers.
104 |
105 | If you are running multiple workers on the same physical machine, consider lowering
106 | the value of `num_workers`, as by default each worker will try to use every CPU
107 | on the machine.
108 | """
109 | ds = strategy.experimental_distribute_datasets_from_function(
110 | functools.partial(create_dataset, dataset, split, num_workers, input_class_mapping)
111 | )
112 | return ds
113 |
--------------------------------------------------------------------------------
/datatap/torch/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The `torch` module provides utilities for using dataTap with PyTorch.
3 |
4 | Please note that if you want to be able to use this module, you will
5 | either need to install PyTorch manually, or install dataTap with the
6 | PyTorch extra:
7 |
8 | ```bash
9 | pip install 'datatap[torch]'
10 | ```
11 |
12 | The `torch` module provides both a `torch.IterableDataset` implementation,
13 | and a convenience method to create a `torch.Dataloader` using it. Here is
14 | an example of how to use these:
15 |
16 | ```py
17 | import itertools
18 | from datatap import Api
19 | from datatap.torch import create_dataloader
20 |
21 | import torchvision.transforms as T
22 |
23 | api = Api()
24 | dataset = api.get_default_database().get_dataset_list()[0]
25 | latest_version = dataset.latest_version
26 |
27 | transforms = T.Compose([
28 | T.Resize((128, 128)),
29 | T.ColorJitter(hue=0.2),
30 | T.ToTensor(),
31 | ])
32 |
33 | dataloader = create_dataloader(latest_version, "training", batch_size = 4, image_transform = transforms)
34 | for batch in itertools.islice(dataloader, 3):
35 | print(batch.boxes, batch.labels)
36 | ```
37 |
38 | """
39 |
40 | from ._patch_torch import patch_all as _patch_all
41 | _patch_all()
42 |
43 | from .dataset import DatasetElement, DatasetBatch, IterableDataset
44 | from .dataloader import create_dataloader
45 | from .utils import torch_to_image_annotation
46 |
47 | __all__ = [
48 | "DatasetElement",
49 | "DatasetBatch",
50 | "IterableDataset",
51 | "create_dataloader",
52 | "torch_to_image_annotation",
53 | ]
--------------------------------------------------------------------------------
/datatap/torch/_patch_torch.py:
--------------------------------------------------------------------------------
1 | """
2 | This file handles monkey patching the PyTorch dataset/dataloder to handle
3 | allowing them to be typed.
4 | """
5 |
6 | import functools
7 | from typing import Any, Type, TypeVar
8 |
9 | _T = TypeVar("_T")
10 |
11 | def allow_generic(cls: Type[_T], type: Any) -> Type[_T]:
12 | """
13 | This function is can be monkey patched onto any type to allow it
14 | to support generics (i.e. Cls[T]).
15 |
16 | If you are running into any issues with it, please file a bug
17 | report with dev@zensors.com.
18 | """
19 | return cls
20 |
21 | def patch_generic_class(cls: Type[Any]):
22 | setattr(cls, "__class_getitem__", functools.partial(allow_generic, cls))
23 |
24 |
25 | def patch_all():
26 | from torch.utils.data import IterableDataset, DataLoader
27 |
28 | patch_generic_class(IterableDataset)
29 | patch_generic_class(DataLoader)
--------------------------------------------------------------------------------
/datatap/torch/dataloader.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from os import cpu_count
4 | from typing import Callable, Dict, Generator, Optional, TypeVar, cast, TYPE_CHECKING
5 |
6 | import torch
7 | import PIL.Image
8 | import torchvision.transforms.functional as TF
9 | from torch.utils.data import DataLoader as TorchDataLoader
10 |
11 | from datatap.api.entities import Dataset
12 |
13 | from .dataset import IterableDataset, DatasetBatch, collate
14 |
15 | _T = TypeVar("_T")
16 |
17 | if TYPE_CHECKING:
18 | class DataLoader(TorchDataLoader[_T]):
19 | """
20 | This is an ambient redeclaration of the dataloader class that
21 | has properly typed iter methods.
22 | """
23 | def __iter__(self) -> Generator[_T, None, None]: ...
24 | else:
25 | DataLoader = TorchDataLoader
26 |
27 | def create_dataloader(
28 | dataset: Dataset,
29 | split: str,
30 | batch_size: int = 1,
31 | num_workers: int = cpu_count() or 0,
32 | *,
33 | image_transform: Callable[[PIL.Image.Image], torch.Tensor] = TF.to_tensor,
34 | class_mapping: Optional[Dict[str, int]] = None,
35 | device: torch.device = torch.device("cpu")
36 | ) -> DataLoader[DatasetBatch]:
37 | """
38 | Creates a PyTorch `Dataloader` that yields batches of annotations.
39 |
40 | This `Dataloader` is using `datatap.torch.Dataset` under the hood, so
41 | all of the same restrictions apply, most notably that the `image_transform`
42 | function must ultimately return a `torch.Tensor` of dimensionality
43 | `(..., H, W)`.
44 | """
45 | if torch.multiprocessing.get_start_method(allow_none = True) is None:
46 | torch.multiprocessing.set_start_method("spawn")
47 |
48 | torch_dataset = IterableDataset(dataset, split, image_transform = image_transform, class_mapping = class_mapping, device = device)
49 | dataloader = cast(
50 | DataLoader[DatasetBatch],
51 | DataLoader(
52 | torch_dataset,
53 | batch_size,
54 | collate_fn = collate, # type: ignore (Torch's types are off)
55 | num_workers = num_workers,
56 | )
57 | )
58 |
59 | return dataloader
60 |
--------------------------------------------------------------------------------
/datatap/torch/dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Any, Callable, Dict, Iterator, List, Optional, Union, overload
4 |
5 | import torch
6 | import PIL.Image
7 | import torchvision.transforms.functional as TF
8 | from torch.utils.data import IterableDataset as TorchIterableDataset, get_worker_info # type: ignore
9 |
10 | from datatap.droplet import ImageAnnotation
11 | from datatap.api.entities import Dataset
12 |
13 | class DatasetElement():
14 | """
15 | Represents a single element from the dataset.
16 | """
17 |
18 | original_annotation: ImageAnnotation
19 | """
20 | The original, untransformed annotation.
21 | """
22 |
23 | image: torch.Tensor
24 | """
25 | The image as transformed by the dataset.
26 | """
27 |
28 | boxes: torch.Tensor
29 | """
30 | The bounding boxes. They are specified in xyxy format `(min-x, min-y, max-x, max-y)`.
31 | """
32 |
33 | labels: torch.Tensor
34 | """
35 | The labels. They are a tensor of unsigned integers.
36 | """
37 |
38 | def __init__(self, original_annotation: ImageAnnotation, image: torch.Tensor, boxes: torch.Tensor, labels: torch.Tensor):
39 | self.original_annotation = original_annotation
40 | self.image = image
41 | self.boxes = boxes
42 | self.labels = labels
43 |
44 | class DatasetBatch():
45 | """
46 | Represents a batch of images as produced by a `DataLoader`.
47 | """
48 |
49 | original_annotations: List[ImageAnnotation]
50 | """
51 | The original annotations from this batch.
52 | """
53 |
54 | images: List[torch.Tensor]
55 | """
56 | A list of the images in this batch.
57 | """
58 |
59 | boxes: List[torch.Tensor]
60 | """
61 | A list of all the per-image bounding boxes in this batch.
62 | """
63 |
64 | labels: List[torch.Tensor]
65 | """
66 | A list of all the per-image labels in this batch.
67 | """
68 |
69 | def __init__(self, original_annotations: List[ImageAnnotation], images: List[torch.Tensor], boxes: List[torch.Tensor], labels: List[torch.Tensor]):
70 | self.original_annotations = original_annotations
71 | self.images = images
72 | self.boxes = boxes
73 | self.labels = labels
74 |
75 | @overload
76 | def collate(elt: DatasetElement) -> DatasetBatch: ...
77 | @overload
78 | def collate(elt: List[DatasetElement]) -> DatasetBatch: ...
79 | def collate(elt: Union[DatasetElement, List[DatasetElement]]) -> DatasetBatch:
80 | """
81 | A utility function that collates several `DatasetElement`s into one `DatasetBatch`.
82 | """
83 | if not isinstance(elt, List):
84 | elt = [elt]
85 |
86 | return DatasetBatch(
87 | [d.original_annotation for d in elt],
88 | [d.image for d in elt],
89 | [d.boxes for d in elt],
90 | [d.labels for d in elt],
91 | )
92 |
93 | class IterableDataset(TorchIterableDataset[DatasetElement]):
94 | """
95 | A PyTorch `IterableDataset` that yields all of the annotations from a
96 | given `DatasetVersion`. Provides functionality for automatically applying
97 | transforms to images, and then scaling the annotations to the new dimensions.
98 |
99 | Note, it is required that the transformation produce a image tensor of
100 | dimensionality `[..., H, W]`. One way of doing this is using
101 | `torchvision.transforms.functional.to_tensor` as the final step of the transform.
102 | """
103 |
104 | _dataset: Dataset
105 | _split: str
106 | _class_mapping: Dict[str, int]
107 | _class_names: Dict[int, str]
108 | _device: torch.device
109 |
110 | def __init__(
111 | self,
112 | dataset: Dataset,
113 | split: str,
114 | class_mapping: Optional[Dict[str, int]] = None,
115 | image_transform: Callable[[PIL.Image.Image], torch.Tensor] = TF.to_tensor,
116 | device: torch.device = torch.device("cpu")
117 | ):
118 | self._dataset = dataset
119 | self._split = split
120 | self._image_transform = image_transform
121 | self._device = device
122 |
123 | template_classes = dataset.template.classes.keys()
124 | if class_mapping is not None:
125 | if set(class_mapping.keys()) != set(template_classes):
126 | print(
127 | "[WARNING]: Potentially invalid class mapping. Provided classes ",
128 | set(class_mapping.keys()),
129 | " but needed ",
130 | set(template_classes)
131 | )
132 | self._class_mapping = class_mapping
133 | else:
134 | self._class_mapping = {
135 | cls: i
136 | for i, cls in enumerate(sorted(template_classes))
137 | }
138 |
139 | self._class_names = {
140 | i: cls
141 | for cls, i in self._class_mapping.items()
142 | }
143 |
144 | def _get_generator(self):
145 | worker_info: Optional[Any] = get_worker_info()
146 |
147 | if worker_info is None:
148 | return self._dataset.stream_split(self._split, 0, 1)
149 | else:
150 | num_workers: int = worker_info.num_workers
151 | worker_id: int = worker_info.id
152 |
153 | return self._dataset.stream_split(self._split, worker_id, num_workers)
154 |
155 | def __iter__(self) -> Iterator[DatasetElement]:
156 | for annotation in self._get_generator():
157 | img = annotation.image.get_pil_image(True).convert("RGB")
158 | transformed_img = self._image_transform(img).to(self._device)
159 | h, w = transformed_img.shape[-2:]
160 |
161 | instance_boxes = [
162 | (
163 | instance.bounding_box.rectangle.p1.x * w,
164 | instance.bounding_box.rectangle.p1.y * h,
165 | instance.bounding_box.rectangle.p2.x * w,
166 | instance.bounding_box.rectangle.p2.y * h,
167 | )
168 | for class_name in annotation.classes.keys()
169 | for instance in annotation.classes[class_name].instances
170 | if instance.bounding_box is not None
171 | ]
172 |
173 | instance_labels = [
174 | self._class_mapping[class_name]
175 | for class_name in annotation.classes.keys()
176 | for _ in annotation.classes[class_name].instances
177 | if class_name in self._class_mapping
178 | ]
179 |
180 | target = torch.tensor(instance_boxes).reshape((-1, 4)).to(self._device)
181 | labels = torch.tensor(instance_labels, dtype = torch.int64).to(self._device)
182 |
183 | element = DatasetElement(annotation, transformed_img, target, labels)
184 |
185 | yield element
186 |
--------------------------------------------------------------------------------
/datatap/torch/utils.py:
--------------------------------------------------------------------------------
1 | from datatap.droplet.bounding_box import BoundingBox
2 | from typing import Dict, List, Optional
3 |
4 | import torch
5 | import torchvision.transforms.functional as TF
6 |
7 | from datatap.geometry import Point, Rectangle
8 | from datatap.droplet import Instance, ClassAnnotation, ImageAnnotation, Image
9 |
10 | def tensor_to_rectangle(tensor: torch.Tensor) -> Rectangle:
11 | """
12 | Expects a tensor of dimensionality `torch.Size([4])` in `xyxy` format
13 | """
14 | return Rectangle(
15 | Point(float(tensor[0]), float(tensor[1]), clip = True),
16 | Point(float(tensor[2]), float(tensor[3]), clip = True),
17 | )
18 |
19 | def torch_to_image_annotation(
20 | image: torch.Tensor,
21 | class_map: Dict[str, int],
22 | *,
23 | labels: torch.Tensor,
24 | boxes: torch.Tensor,
25 | scores: torch.Tensor,
26 | serialize_image: bool = False,
27 | uid: Optional[str] = None,
28 | ) -> ImageAnnotation:
29 | """
30 | Creates an `ImageAnnotation` from a canonical tensor representation.
31 |
32 | This function assumes the following,
33 |
34 | 1. Image is of dimensionality `(..., height, width)`
35 | 2. Labels are an `int`/`uint` tensor of size `[n]`
36 | 3. Scores are a `float` tensor of size `[n]`
37 | 3. Boxes are a `float` tensor of size `[n, 4]`
38 | """
39 | inverted_class_map = {
40 | i: cls
41 | for cls, i in class_map.items()
42 | }
43 |
44 | height, width = image.shape[-2:]
45 |
46 | # First construct the image. If we are asked to serialize it, then
47 | # use the tensor to construct a cached PIL image
48 | if serialize_image:
49 | pil_image = TF.to_pil_image(image, "RGB")
50 | droplet_image = Image.from_pil(pil_image)
51 | else:
52 | droplet_image = Image(paths = [])
53 |
54 | # Then, compute each of the class annotations
55 | class_annotations: Dict[str, List[Instance]] = {}
56 |
57 | boxes = boxes.cpu() / torch.tensor([width, height, width, height])
58 |
59 | for i, label in enumerate(labels.cpu()):
60 | class_name = inverted_class_map.get(int(label))
61 | if class_name is None:
62 | continue
63 |
64 | if class_name not in class_annotations:
65 | class_annotations[class_name] = []
66 |
67 | class_annotations[class_name].append(
68 | Instance(
69 | bounding_box = BoundingBox(
70 | tensor_to_rectangle(boxes[i]),
71 | confidence = float(scores[i]),
72 | )
73 | )
74 | )
75 |
76 | # Finally, construct the image annotation
77 |
78 | return ImageAnnotation(
79 | uid = uid,
80 | image = droplet_image,
81 | classes = {
82 | cls: ClassAnnotation(instances = instances, multi_instances = [])
83 | for cls, instances in class_annotations.items()
84 | }
85 | )
--------------------------------------------------------------------------------
/datatap/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | A collection of primarily internal-use utilities.
3 | """
4 |
5 | from .environment import Environment
6 | from .helpers import assert_one, timer, DeletableGenerator
7 | from .cache_generator import CacheGenerator
8 | from .or_nullish import OrNullish
9 | from .print_helpers import basic_repr, color_repr, force_pretty_print, pprint, pprints
10 |
11 | __all__ = [
12 | "Environment",
13 | "assert_one",
14 | "timer",
15 | "DeletableGenerator",
16 | "CacheGenerator",
17 | "OrNullish",
18 | "basic_repr",
19 | "color_repr",
20 | "force_pretty_print",
21 | "pprint",
22 | "pprints"
23 | ]
24 |
--------------------------------------------------------------------------------
/datatap/utils/cache_generator.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import os
4 | import json
5 | import time
6 | from threading import Semaphore, Thread
7 | from queue import Queue
8 | from os import path
9 | from typing import Any, Callable, Generator, TypeVar, Optional
10 |
11 | from .helpers import DeletableGenerator
12 |
13 | _T = TypeVar("_T")
14 |
15 | def CacheGenerator(file_name: str, create_stream: Callable[[], Generator[_T, Any, Any]]) -> Generator[_T, None, None]:
16 | # We can't just naively stream from the server, unfortunately. Due to the sheer
17 | # volume of data, and the fact that training can be such a slow process, if we
18 | # try to stream the data directly from server to training process, we will end
19 | # up filling the OS' buffer, causing significant backpressure for the server.
20 | #
21 | # Likewise, we cannot necessarily stream from the server into a local buffer,
22 | # as a large dataset could be greater than our available RAM.
23 | #
24 | # As a result, this method streams data from the server to a temp file in a
25 | # subprocess. The main process then streams from that tempfile to the consumer
26 | # of the stream. Finally, once all data has been read, the main process stores
27 | # the stream file as an authoritative cache file for this particular stream.
28 | # Subsequent calls to this function with the same arguments will then pull from
29 | # that file.
30 | #
31 | # Please note that as a result of file-system accesses, streaming in this manner
32 | # incurs a non-trivial performance cost. For production training jobs, it is
33 | # recommended that this function be used with a data-loader capable of running
34 | # on multiple threads.
35 |
36 | # TODO(zwade): change this to UID once we have an endpoint for fetching it
37 | dir_name = path.dirname(file_name)
38 | tmp_file_name = f"{file_name}.stream"
39 | os.makedirs(dir_name, exist_ok=True)
40 |
41 | EOF = "EOF"
42 |
43 | # Checks for an authoritative cache, using it if it exists.
44 | if path.exists(file_name):
45 | def cache_generator():
46 | with open(file_name, "r") as f:
47 | for line in f.readlines():
48 | line = line.strip()
49 | if line == "" or line == EOF:
50 | continue
51 | yield json.loads(line)
52 | return
53 | return cache_generator()
54 |
55 |
56 | # `sync_queue` is used to synchronize startup and termination of the
57 | # subprocess, optionally propagating any errors that arise.
58 | sync_queue: Queue[Optional[Exception]] = Queue()
59 |
60 | # `available_annotations` counts how many lines have been written to
61 | # the stream file that have not yet been consumed.
62 | available_annotations = Semaphore()
63 |
64 | # `dead` is a flag that allows us to terminate our stream early
65 | dead = False
66 |
67 | def stream_target():
68 | stream = create_stream()
69 |
70 | with open(tmp_file_name, "a+") as f:
71 | sync_queue.put(None)
72 | try:
73 | for element in stream:
74 | if dead:
75 | raise Exception("Premature termination")
76 |
77 | # We want to prioritize reading quickly, so after we write, we
78 | # flush to the disk.
79 | #
80 | # (Note that we do not synchronize, as `fsync` incurs a 10x
81 | # slowdown)
82 | f.write(json.dumps(element) + "\n")
83 | f.flush()
84 | # We then "release" our semaphore to indicate that we've made a
85 | # new asset available to the consumer
86 | available_annotations.release()
87 |
88 | sync_queue.put(None)
89 | except Exception as e:
90 | sync_queue.put(e)
91 | finally:
92 | # We explicitly write "EOF" at the end of the stream, since we
93 | # otherwise would not be able to distinguish between the actual
94 | # EOF and an incomplete write.
95 | f.write(EOF + "\n")
96 | f.flush()
97 | available_annotations.release()
98 |
99 | thread = Thread(target = stream_target)
100 | thread.start()
101 |
102 | def generator():
103 | sync_queue.get()
104 | with open(tmp_file_name, "r") as f:
105 | while True:
106 | available_annotations.acquire()
107 |
108 | line = ""
109 | c = 0
110 | while line == "" or line[-1] != "\n":
111 | # Busy loop to wait for the file write.
112 | #
113 | # If we're eagerly fetching a large portion of the stream
114 | # we may become bottlenecked by file synchronization. In
115 | # this case, we implement a simple backoff to avoid
116 | # unnecessarily hammering the file system.
117 | line += f.readline()
118 | c += 1
119 | if c > 10:
120 | time.sleep(0.005)
121 |
122 | data = line.strip()
123 | if data == EOF:
124 | break
125 |
126 | yield json.loads(data)
127 |
128 | thread.join()
129 |
130 | error = sync_queue.get()
131 | if error is not None:
132 | # This error came from the data loading subprocess
133 | raise error
134 |
135 | def stop_processing():
136 | # This is a rather gross way of killing it, but unlike `Process`, `Thread`
137 | # has no `terminate` method.
138 | nonlocal dead
139 | dead = True
140 |
141 | return DeletableGenerator(generator(), stop_processing)
--------------------------------------------------------------------------------
/datatap/utils/environment.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | class Environment:
4 | """
5 | A class providing static access to parameters related to the execution
6 | environment of the module.
7 | """
8 |
9 | API_KEY = os.getenv("DATATAP_API_KEY")
10 | """
11 | The default API key used for API calls.
12 | """
13 |
14 | BASE_URI = os.getenv("DATATAP_BASE_URI", "https://app.datatap.dev")
15 | """
16 | The base URI used for referencing the dataTap application, e.g. for API
17 | calls. One might change this to use an HTTP proxy, for example.
18 | """
19 |
--------------------------------------------------------------------------------
/datatap/utils/helpers.py:
--------------------------------------------------------------------------------
1 | import time
2 | from types import TracebackType
3 | from typing import Dict, List, Callable, Generator, Optional, Tuple, TypeVar
4 | from contextlib import contextmanager
5 |
6 | from .print_helpers import pprint
7 |
8 | _T = TypeVar("_T")
9 | _U = TypeVar("_U")
10 | _V = TypeVar("_V")
11 |
12 | class DeletableGenerator(Generator[_T, _U, _V]):
13 | """
14 | A deletable generator wraps an existing generator with a deletion
15 | function to allow cleanup.
16 | """
17 |
18 | _gen: Generator[_T, _U, _V]
19 | _delete: Callable[[], None]
20 |
21 | def __init__(self, gen: Generator[_T, _U, _V], delete_thunk: Callable[[], None]):
22 | self._gen = gen
23 | self._delete = delete_thunk
24 |
25 | def __next__(self):
26 | return next(self._gen)
27 |
28 | def send(self, value: _U):
29 | return self._gen.send(value)
30 |
31 | def throw(self, excn: BaseException, val: None, tb: Optional[TracebackType]):
32 | return self._gen.throw(excn, val, tb)
33 |
34 | def __del__(self):
35 | self._delete()
36 | pass
37 |
38 |
39 | def assert_one(item_list: List[_T]) -> _T:
40 | """
41 | Given a list of items, asserts that the list is a singleton,
42 | and returns its value.
43 | """
44 | if len(item_list) != 1:
45 | raise AssertionError(f"Expected one item in list, but found {len(item_list)}", item_list)
46 |
47 | return item_list[0]
48 |
49 |
50 | _timer_state: Dict[str, Tuple[float, int]] = {}
51 | @contextmanager
52 | def timer(name: str):
53 | start = time.time()
54 | yield None
55 | end = time.time()
56 |
57 | value = end - start
58 | avg, count = _timer_state.get(name, (0.0, 0))
59 | count += 1
60 | avg += (value - avg) / count
61 | _timer_state[name] = (avg, count)
62 |
63 | pprint(
64 | "{blue}{name} took {yellow}{value:1.3f}s{blue} for an average of {yellow}{avg:1.3f}s",
65 | name = name,
66 | value = value,
67 | avg = avg,
68 | )
--------------------------------------------------------------------------------
/datatap/utils/or_nullish.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, TypeVar, Callable
2 |
3 | _T = TypeVar("_T")
4 | _S = TypeVar("_S")
5 |
6 | class OrNullish:
7 | """
8 | A helper class to represent the monad `α OrNullish = α | None`.
9 | """
10 |
11 | @staticmethod
12 | def bind(val: Optional[_T], fn: Callable[[_T], Optional[_S]]) -> Optional[_S]:
13 | """
14 | Monadically binds `fn` to the value of `val`.
15 | """
16 | if val is None:
17 | return None
18 | else:
19 | return fn(val)
--------------------------------------------------------------------------------
/datatap/utils/print_helpers.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import sys
4 | from typing import Any, Dict, List, Tuple, Union, cast
5 |
6 | _ansi = {
7 | "gray": "\033[30m",
8 | "red": "\033[31m",
9 | "green": "\033[32m",
10 | "yellow": "\033[33m",
11 | "blue": "\033[34m",
12 | "purple": "\033[35m",
13 | "cyan": "\033[36m",
14 | "white": "\033[37m",
15 | "black": "\033[38m",
16 |
17 | "orange": "\033[38;5;209m", # TODO(zwade): This is a bit closer to "salmon"
18 |
19 | "clear": "\033[0m",
20 |
21 | "prev": "\033[F",
22 | "start": "\033[G",
23 | }
24 |
25 | IS_INTERACTIVE = sys.stdout.isatty()
26 | pretty_print: bool = False
27 |
28 | def force_pretty_print():
29 | """
30 | By default, this library only uses pretty-printing when it's in an
31 | interactive environment (terminal, python shell, etc.). However, there are a
32 | few cases when pretty-printing is desired in a non-interactive environment,
33 | such as when running under Jupyter. Calling this function once will ensure
34 | all future prints will be pretty.
35 | """
36 | global pretty_print
37 | pretty_print = True
38 |
39 | def pprint(fmt: str, *args: Any, print_args: Dict[str, Any] = {}, **kwargs: Any) -> None:
40 | """
41 | Pretty printer. The first argument is a format string, and the remaining
42 | arguments are the values for the string. Additionally, the format string
43 | can access a number of ansi escape codes such as colors, `clear`, `prev`,
44 | and `start`.
45 |
46 | ```py
47 | pprint("{prev}Progress: {orange}{i}{clear}/{total}, i=i, total=total)
48 | ```
49 | """
50 | print((fmt + "{clear}").format(*args, **{**kwargs, **_ansi}), **print_args)
51 | sys.stdout.flush()
52 |
53 | def pprints(fmt: str, *args: Any, **kwargs: Any) -> str:
54 | """
55 | Pretty prints to a string.
56 |
57 | See `datatap.utils.pprint`.
58 | """
59 | return (fmt + "{clear}").format(*args, **{**kwargs, **_ansi})
60 |
61 | def color_repr(entity: Any) -> str:
62 | """
63 | A dynamic pretty-printer that will syntax highlight different python
64 | entities.
65 |
66 | Rarely used on its own, see `datatap.utils.basic_repr`.
67 | """
68 | if entity is None:
69 | return f"{_ansi['orange']}None{_ansi['clear']}"
70 | if isinstance(entity, str):
71 | return f"{_ansi['cyan']}\"{_ansi['green']}{entity}{_ansi['clear']}{_ansi['cyan']}\"{_ansi['clear']}"
72 | if isinstance(entity, (int, float)):
73 | return f"{_ansi['orange']}{entity}{_ansi['clear']}"
74 | if isinstance(entity, (list, tuple)):
75 | entity_list = cast(Union[List[Any], Tuple[Any]], entity)
76 | return (
77 | f"{_ansi['cyan']}{'[' if type(entity_list) == list else '('}" +
78 | f"{_ansi['cyan']},{_ansi['clear']} ".join([color_repr(e) for e in entity_list]) +
79 | f"{_ansi['cyan']}{']' if type(entity_list) == list else ')'}"
80 | )
81 | if isinstance(entity, dict):
82 | entity_dict = cast(Dict[Any, Any], entity)
83 | return (
84 | f"{_ansi['cyan']}{{" +
85 | f"{_ansi['cyan']},{_ansi['clear']} ".join([
86 | f"{color_repr(key)}{_ansi['cyan']}: {color_repr(value)}"
87 | for key, value in entity_dict.items()
88 | ]) +
89 | f"{_ansi['cyan']}}}"
90 | )
91 | return repr(entity)
92 |
93 | def basic_repr(class_name: str, *args: Any, **kwargs: Any) -> str:
94 | """
95 | A function to be used for defining a class's `__repr__` method.
96 | When possible, will pretty-print the object in a way that is both easy
97 | to read, and useful for testing.
98 |
99 | ```py
100 | from datatap.utils import basic_repr
101 |
102 | class Person:
103 | name: string
104 | age: int
105 | height: int
106 |
107 | def __repr__(self):
108 | return basic_repr("Person", name, age = age, height = height)
109 | ```
110 | """
111 | if not IS_INTERACTIVE and not pretty_print:
112 | positional_properties = [repr(value) for value in args]
113 | named_properties = [f"{key} = {repr(value)}" for key, value in kwargs.items() if value is not None]
114 | properties = ", ".join(positional_properties + named_properties)
115 | return f"{class_name}({properties})"
116 | else:
117 | positional_properties = [
118 | f"{_ansi['green']}{color_repr(value)}{_ansi['clear']}"
119 | for value in args
120 | ]
121 | named_properties = [
122 | f"{_ansi['red']}{key} {_ansi['purple']}= {color_repr(value)}"
123 | for key, value in kwargs.items()
124 | if value is not None
125 | ]
126 | properties = f"{_ansi['cyan']},{_ansi['clear']} ".join(positional_properties + named_properties)
127 | return f"{_ansi['yellow']}{class_name}{_ansi['cyan']}({_ansi['clear']}{properties}{_ansi['cyan']}){_ansi['clear']}"
128 |
--------------------------------------------------------------------------------
/dev_requirements.txt:
--------------------------------------------------------------------------------
1 | boto3-stubs
--------------------------------------------------------------------------------
/pyrightconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "typeCheckingMode": "strict",
3 | "reportMissingTypeStubs": "information",
4 | "reportUnusedImport": "warning",
5 | "reportUnusedVariable": "information",
6 | "stubPath": "./typings",
7 | "include": [
8 | "datatap/api",
9 | "datatap/comet",
10 | "datatap/dataset",
11 | "datatap/droplet",
12 | "datatap/geometry",
13 | // "datatap/metrics",
14 | "datatap/template",
15 | "datatap/utils",
16 | // "examples",
17 | // "tests"
18 | ]
19 | }
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3>=1.15.14
2 | fastjsonschema==2.14.2
3 | toolz>=0.10.0
4 | simplejson>=3.17.0
5 | jsonmerge==1.7.0
6 | neo4j==4.0.1
7 | Shapely
8 | requests>=2.23.0
9 | typing-extensions
10 |
--------------------------------------------------------------------------------
/requirements_image.txt:
--------------------------------------------------------------------------------
1 | palettable>=3.3.0
2 | Flask>=1.1.1
3 | Pillow>=7.1.0
4 | Shapely>=1.7.0
5 | scipy>=1.4.1
6 | importlib-resources>=1.4.0
7 | boto3>=1.15.14
--------------------------------------------------------------------------------
/requirements_importers.txt:
--------------------------------------------------------------------------------
1 | Shapely==1.7.0
2 | scikit-image==0.17.2
3 | Unidecode==1.1.1
4 |
--------------------------------------------------------------------------------
/requirements_metrics.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.2
2 | sortedcontainers>=0.8.1
3 | matplotlib>=3.3.2
4 | scipy>=1.5.2
5 |
--------------------------------------------------------------------------------
/requirements_tf.txt:
--------------------------------------------------------------------------------
1 | tensorflow>=2.0.0
--------------------------------------------------------------------------------
/requirements_torch.txt:
--------------------------------------------------------------------------------
1 | torch>=1.8.0+cpu
2 | torchvision>=0.9.0+cpu
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import setuptools
3 |
4 | with open("README.md", "r") as f:
5 | long_description = f.read()
6 |
7 | with open("requirements.txt", "r") as f:
8 | requirements = f.read().strip().split("\n")
9 |
10 | extras_require = {}
11 | for path in glob.glob("requirements_*.txt"):
12 | extra = path.split("_")[-1].split(".")[0]
13 | with open(path, "r") as f:
14 | extras_require[extra] = [
15 | dep
16 | for dep in map(str.strip, f.readlines())
17 | if dep != "" and not dep.startswith("-")
18 | ]
19 |
20 |
21 | setuptools.setup(
22 | name = "datatap",
23 | version = "0.3.0",
24 | author = "Zensors' Dev Team",
25 | author_email = "dev-team@zensors.com",
26 | description = "Client library for dataTap",
27 | long_description = long_description,
28 | long_description_content_type = "text/markdown",
29 | url = "https://github.com/zensors/datatap-python", # pypi will add extra information if the url is the repo
30 | packages = setuptools.find_packages(),
31 | package_data = { "": ["image/assets/*"], "datatap": ["py.typed"] },
32 | classifiers = [
33 | "Programming Language :: Python :: 3",
34 | "Operating System :: OS Independent",
35 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
36 | ],
37 | python_requires = ">=3.7",
38 | install_requires = requirements,
39 | extras_require = extras_require,
40 | dependency_links = [
41 | "https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html"
42 | ],
43 | )
44 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zensors/datatap-python/1a05b354f20bbbc24b9bad76e919e0cc9542538c/tests/__init__.py
--------------------------------------------------------------------------------
/tests/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zensors/datatap-python/1a05b354f20bbbc24b9bad76e919e0cc9542538c/tests/metrics/__init__.py
--------------------------------------------------------------------------------
/tests/metrics/test_iou.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import numpy as np
4 | from datatap.droplet import (BoundingBox, ClassAnnotation, Image,
5 | ImageAnnotation, Instance)
6 | from datatap.geometry import Point, Rectangle
7 | from datatap.metrics.confusion_matrix import ConfusionMatrix
8 | from datatap.metrics.iou import (generate_confusion_matrix,
9 | generate_pr_curve)
10 | from datatap.metrics.precision_recall_curve import (_DetectionEvent as DetectionEvent,
11 | PrecisionRecallCurve)
12 | from datatap.template import (ClassAnnotationTemplate,
13 | ImageAnnotationTemplate, InstanceTemplate)
14 |
15 | tpl = ImageAnnotationTemplate(
16 | classes = {
17 | "a": ClassAnnotationTemplate(
18 | instances = InstanceTemplate(bounding_box = True)
19 | ),
20 | "b": ClassAnnotationTemplate(
21 | instances = InstanceTemplate(bounding_box = True)
22 | )
23 | }
24 | )
25 |
26 | im = Image(paths = [])
27 |
28 | gt1 = ImageAnnotation(
29 | image = im,
30 | classes = {
31 | "a": ClassAnnotation(
32 | instances = [
33 | Instance(bounding_box = BoundingBox(Rectangle(Point(0.5, 0.5), Point(0.7, 0.7)))),
34 | Instance(bounding_box = BoundingBox(Rectangle(Point(0.1, 0.1), Point(0.2, 0.2))))
35 | ]
36 | ),
37 | "b": ClassAnnotation(
38 | instances = [
39 | Instance(bounding_box = BoundingBox(Rectangle(Point(0.6, 0.5), Point(0.7, 0.7))))
40 | ]
41 | )
42 | }
43 | )
44 |
45 | pred1 = ImageAnnotation(
46 | image = im,
47 | classes = {
48 | "a": ClassAnnotation(
49 | instances = [
50 | Instance(bounding_box = BoundingBox(Rectangle(Point(0.58, 0.5), Point(0.7, 0.7)), confidence = 0.7)),
51 | Instance(bounding_box = BoundingBox(Rectangle(Point(0.1, 0.1), Point(0.18, 0.19)), confidence = 0.9))
52 | ]
53 | ),
54 | "b": ClassAnnotation(
55 | instances = [
56 | Instance(bounding_box = BoundingBox(Rectangle(Point(0.6, 0.5), Point(0.7, 0.7)), confidence = 0.6)),
57 | Instance(bounding_box = BoundingBox(Rectangle(Point(0.1, 0.8), Point(0.2, 0.9)), confidence = 0.2))
58 | ]
59 | )
60 | }
61 | )
62 |
63 | gt2 = ImageAnnotation(
64 | image = im,
65 | classes = {
66 | "a": ClassAnnotation(
67 | instances = [
68 | Instance(bounding_box = BoundingBox(Rectangle(Point(0.1, 0.1), Point(0.4, 0.4)))),
69 | ]
70 | ),
71 | "b": ClassAnnotation(
72 | instances = []
73 | )
74 | }
75 | )
76 |
77 | pred2 = ImageAnnotation(
78 | image = im,
79 | classes = {
80 | "a": ClassAnnotation(
81 | instances = [
82 | Instance(bounding_box = BoundingBox(Rectangle(Point(0.1, 0.12), Point(0.37, 0.4)), confidence = 0.8)),
83 | Instance(bounding_box = BoundingBox(Rectangle(Point(0.09, 0.08), Point(0.39, 0.4)), confidence = 0.6))
84 | ]
85 | ),
86 | "b": ClassAnnotation(
87 | instances = [
88 | Instance(bounding_box = BoundingBox(Rectangle(Point(0.6, 0), Point(0.8, 0.2)), confidence = 0.3))
89 | ]
90 | )
91 | }
92 | )
93 |
94 | class TestIou(unittest.TestCase):
95 | def test_add_annotation_to_pr_curve_1(self):
96 | pr = PrecisionRecallCurve()
97 | pr.add_annotation(ground_truth = gt1, prediction = pred1, iou_threshold = 0.3)
98 | self.assertEqual(pr.events, {
99 | 0.2: DetectionEvent(0, 1),
100 | 0.6: DetectionEvent(2, -1),
101 | 0.7: DetectionEvent(0, 1),
102 | 0.9: DetectionEvent(1, 0)
103 | })
104 |
105 | def test_add_annotation_to_pr_curve_2(self):
106 | pr = PrecisionRecallCurve()
107 | pr.add_annotation(ground_truth = gt2, prediction = pred2, iou_threshold = 0.3)
108 | self.assertEqual(pr.events, {
109 | 0.3: DetectionEvent(0, 1),
110 | 0.6: DetectionEvent(0, 1),
111 | 0.8: DetectionEvent(1, 0)
112 | })
113 |
114 | def test_generate_pr_curve(self):
115 | pr = generate_pr_curve(ground_truths = [gt1, gt2], predictions = [pred1, pred2], iou_threshold = 0.3)
116 | self.assertEqual(pr.events, {
117 | 0.2: DetectionEvent(0, 1),
118 | 0.3: DetectionEvent(0, 1),
119 | 0.6: DetectionEvent(2, 0),
120 | 0.7: DetectionEvent(0, 1),
121 | 0.8: DetectionEvent(1, 0),
122 | 0.9: DetectionEvent(1, 0)
123 | })
124 |
125 | def test_add_annotation_to_confusion_matrix_1a(self):
126 | cm = ConfusionMatrix(sorted(tpl.classes.keys()))
127 | cm.add_annotation(
128 | ground_truth = gt1,
129 | prediction = pred1,
130 | iou_threshold = 0.3,
131 | confidence_threshold = 0.1
132 | )
133 | self.assertTrue(
134 | np.array_equal(
135 | cm.matrix,
136 | np.array([
137 | [0, 0, 1],
138 | [0, 2, 0],
139 | [0, 0, 1]
140 | ])
141 | )
142 | )
143 |
144 | def test_add_annotation_to_confusion_matrix_1b(self):
145 | cm = ConfusionMatrix(sorted(tpl.classes.keys()))
146 | cm.add_annotation(
147 | ground_truth = gt1,
148 | prediction = pred1,
149 | iou_threshold = 0.3,
150 | confidence_threshold = 0.6
151 | )
152 | self.assertTrue(
153 | np.array_equal(
154 | cm.matrix,
155 | np.array([
156 | [0, 0, 0],
157 | [0, 2, 0],
158 | [0, 0, 1]
159 | ])
160 | )
161 | )
162 |
163 | def test_add_annotation_to_confusion_matrix_1c(self):
164 | cm = ConfusionMatrix(sorted(tpl.classes.keys()))
165 | cm.add_annotation(
166 | ground_truth = gt1,
167 | prediction = pred1,
168 | iou_threshold = 0.3,
169 | confidence_threshold = 0.61
170 | )
171 | self.assertTrue(
172 | np.array_equal(
173 | cm.matrix,
174 | np.array([
175 | [0, 0, 0],
176 | [1, 1, 0],
177 | [0, 1, 0]
178 | ])
179 | )
180 | )
181 |
182 | def test_add_annotation_to_confusion_matrix_2(self):
183 | cm = ConfusionMatrix(sorted(tpl.classes.keys()))
184 | cm.add_annotation(
185 | ground_truth = gt2,
186 | prediction = pred2,
187 | iou_threshold = 0.3,
188 | confidence_threshold = 0.1
189 | )
190 | self.assertTrue(
191 | np.array_equal(
192 | cm.matrix,
193 | np.array([
194 | [0, 1, 1],
195 | [0, 1, 0],
196 | [0, 0, 0]
197 | ])
198 | )
199 | )
200 |
201 | def test_generate_confusion_matrix(self):
202 | cm = generate_confusion_matrix(
203 | template = tpl,
204 | ground_truths = [gt1, gt2],
205 | predictions = [pred1, pred2],
206 | iou_threshold = 0.3,
207 | confidence_threshold = 0.1
208 | )
209 | self.assertTrue(
210 | np.array_equal(
211 | cm.matrix,
212 | np.array([
213 | [0, 1, 2],
214 | [0, 3, 0],
215 | [0, 0, 1]
216 | ])
217 | )
218 | )
219 |
220 | if __name__ == "__main__":
221 | unittest.main()
222 |
--------------------------------------------------------------------------------
/tests/metrics/test_precision_recall_curve.py:
--------------------------------------------------------------------------------
1 | # pyright: reportPrivateUsage=false
2 |
3 | from datatap.metrics.precision_recall_curve import _DetectionEvent as DetectionEvent, MaximizeF1Result, PrecisionRecallCurve
4 |
5 | import unittest
6 |
7 | class TestPrecisionRecallCurve(unittest.TestCase):
8 | def test_add(self):
9 | a = PrecisionRecallCurve()
10 | a._add_event(0.1, DetectionEvent(0, 1))
11 | a._add_event(0.8, DetectionEvent(1, -1))
12 | a._add_ground_truth_positives(3)
13 |
14 | b = PrecisionRecallCurve()
15 | b._add_event(0.25, DetectionEvent(1, 0))
16 | b._add_event(0.6, DetectionEvent(0, -1))
17 | b._add_ground_truth_positives(2)
18 |
19 | c = a + b
20 | self.assertEqual(c.ground_truth_positives, 5)
21 | self.assertEqual(c.events, {
22 | 0.1: DetectionEvent(0, 1),
23 | 0.25: DetectionEvent(1, 0),
24 | 0.6: DetectionEvent(0, -1),
25 | 0.8: DetectionEvent(1, -1)
26 | })
27 |
28 | def test_maximize_f1(self):
29 | pr = PrecisionRecallCurve()
30 | pr._add_ground_truth_positives(5)
31 | pr._add_event(0.1, DetectionEvent(0, 1)) # p = 4/6, r = 4/5, f1 = 8/11
32 | pr._add_event(0.25, DetectionEvent(1, 0)) # p = 4/5, r = 4/5, f1 = 4/5
33 | pr._add_event(0.6, DetectionEvent(2, -1)) # p = 3/4, r = 3/5, f1 = 2/3
34 | pr._add_event(0.72, DetectionEvent(0, 1)) # p = 1/3, r = 1/5, f1 = 1/4
35 | pr._add_event(0.8, DetectionEvent(1, 0)) # p = 1/2, r = 1/5, f1 = 2/7
36 | pr._add_event(0.9, DetectionEvent(0, 1)) # p = 0/1, r = 0/5, f1 = 0
37 | self.assertEqual(pr.maximize_f1(), MaximizeF1Result(threshold = 0.25, precision = 0.8, recall = 0.8, f1 = 0.8))
38 |
39 | if __name__ == "__main__":
40 | unittest.main()
41 |
--------------------------------------------------------------------------------
/typings/PIL/Image.pyi:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | from typing import Optional, Sequence, SupportsBytes, Tuple
3 |
4 |
5 | class Image:
6 | size: Tuple[int, int]
7 |
8 | def convert(self, mode: str) -> Image: ...
9 | def resize(self, size: Tuple[int, int], resample: Optional[int]) -> Image: ...
10 | def getdata(self) -> Sequence[int]: ...
11 |
12 | def open(data: BytesIO) -> Image: ...
13 | def fromarray(buffer: SupportsBytes, mode: Optional[str]) -> Image: ...
14 |
15 | BOX: int
16 |
--------------------------------------------------------------------------------
/typings/PIL/__init__.pyi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zensors/datatap-python/1a05b354f20bbbc24b9bad76e919e0cc9542538c/typings/PIL/__init__.pyi
--------------------------------------------------------------------------------
/typings/boto3/__init__.pyi:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 | from typing import Literal, TypedDict
3 |
4 |
5 | class S3Resource:
6 | def Object(self, bucket_name: str, path_name: str) -> S3Object: ...
7 |
8 | class S3Object:
9 | def get(self) -> S3ObjectDict: ...
10 |
11 | class S3ObjectDict(TypedDict):
12 | Body: BytesIO
13 |
14 | def resource(type: Literal["s3"]) -> S3Resource: ...
15 |
--------------------------------------------------------------------------------
/typings/comet_ml/API.pyi:
--------------------------------------------------------------------------------
1 | from typing import List, Optional, overload
2 |
3 | from typing_extensions import TypedDict
4 |
5 | from .APIExperiment import APIAsset, APIExperiment
6 | from .query import QueryExpression
7 |
8 | class APIProject(TypedDict):
9 | projectId: str
10 | projectName: str
11 | ownerUserName: str
12 | projectDescription: str
13 | workspaceName: str
14 | numberOfExperiments: int
15 | lastUpdated: int
16 | public: bool
17 |
18 | class APIRegistryExperimentModel(TypedDict):
19 | experimentModelId: str
20 | experimentModelName: str
21 | experimentKey: str
22 |
23 | class APIRegistryVersion(TypedDict):
24 | registryModelItemId: str
25 | experimentModel: APIRegistryExperimentModel
26 | version: str
27 | comment: str
28 | stages: List[str]
29 | userName: str
30 | createdAt: int
31 | lastUpdated: int
32 | assets: List[APIAsset]
33 | restApiUrl: str
34 |
35 | class APIRegistryModel(TypedDict):
36 | registryModelId: str
37 | modelName: str
38 | description: str
39 | isPublic: bool
40 | createdAt: int
41 | lastUpdate: int
42 | userName: str
43 | versions: List[APIRegistryVersion]
44 |
45 | class API:
46 | def __init__(self, api_key: Optional[str] = ...) -> API: ...
47 |
48 | def query(
49 | self,
50 | workspace: str,
51 | project_name: str,
52 | query: QueryExpression,
53 | archived: bool = ...
54 | ) -> List[APIExperiment]: ...
55 |
56 | # From a type perspective, these could be collapsed into two overloads
57 | # but each one is doing something different, so i preferred to make it
58 | # explicit
59 | @overload
60 | def get(self) -> List[str]: ...
61 | @overload
62 | def get(self, workspace: str = ...) -> List[str]: ...
63 | @overload
64 | def get(self, workspace: str = ..., project_name: str = ...) -> List[str]: ...
65 | @overload
66 | def get(self, workspace: str = ..., project_name: str = ..., experiment: str = ...) -> APIExperiment: ...
67 |
68 | def get_experiment(self, workspace: str = ..., project_name: str = ..., experiment: str = ...) -> APIExperiment: ...
69 | def get_project(self, workspace: str, project_name: str) -> APIProject: ...
70 | def get_projects(self, workspace: str) -> List[APIProject]: ...
71 |
72 | def update_registry_model_version(
73 | self,
74 | workspace: str,
75 | registry_name: str,
76 | version: str,
77 | comment: Optional[str] = ...,
78 | stages: Optional[List[str]] = ...
79 | ) -> None: ...
80 | def get_registry_model_details(
81 | self,
82 | workspace: str,
83 | registry_name: str,
84 | version: Optional[str] = ...
85 | ) -> APIRegistryModel: ...
86 | def get_registry_model_names(self, workspace: str) -> List[str]: ...
--------------------------------------------------------------------------------
/typings/comet_ml/APIExperiment.pyi:
--------------------------------------------------------------------------------
1 | from typing import Any, List, Optional, overload
2 |
3 | from typing_extensions import Literal, TypedDict
4 |
5 | class APIAsset(TypedDict):
6 | fileName: str
7 | fileSize: int
8 | runContext: Optional[str]
9 | step: Optional[int]
10 | link: str
11 | createdAt: int
12 | dir: str
13 | canView: bool
14 | audio: bool
15 | video: bool
16 | histogram: bool
17 | image: bool
18 | type: str
19 | metadata: Any
20 | assetId: str
21 |
22 | class APIMetrics(TypedDict):
23 | metricName: str
24 | metricValue: str
25 | timestamp: int
26 | step: int
27 | epoch: Optional[int]
28 | runContext: Optional[str]
29 |
30 | class APIMetricsSummary(TypedDict):
31 | name: str
32 | valueMax: str
33 | valueMin: str
34 | timestampMax: int
35 | timestampMin: int
36 | timestampCurrent: int
37 | stepMax: int
38 | stepMin: int
39 | stepCurrent: int
40 | valueCurrent: str
41 |
42 | class APIMetadata(TypedDict):
43 | archived: bool
44 | durationMillis: int
45 | endTimeMillis: int
46 | experimentKey: str
47 | experimentName: Optional[str]
48 | fileName: Optional[str]
49 | filePatah: Optional[str]
50 | projectId: str
51 | projectName: str
52 | running: bool
53 | startTimeMillis: int
54 | throttle: bool
55 | workspaceName: str
56 |
57 |
58 | class APIExperiment:
59 | id: str
60 | url: str
61 | name: str
62 | start_server_timestamp: int
63 |
64 | def __init__(self, *args: Any, **kwargs: Any) -> APIExperiment: ...
65 |
66 | def add_tags(self, tags: List[str]) -> None: ...
67 | @overload
68 | def get_asset(self, asset_id: str, return_type: Literal["binary"] = ...) -> bytes: ...
69 | @overload
70 | def get_asset(self, asset_id: str, return_type: Literal["text"]) -> str: ...
71 | @overload
72 | def get_asset(self, asset_id: str, return_type: str) -> str | bytes: ...
73 | def get_asset_list(self, asset_type: str = ...) -> List[APIAsset]: ...
74 | def get_model_asset_list(self, model_name: str) -> List[APIAsset]: ...
75 | def download_model(self, name: str, output_path: str = ..., expand: bool = ...) -> None: ...
76 | def register_model(
77 | self,
78 | model_name: str,
79 | version: str = ...,
80 | workspace: Optional[str] = ...,
81 | registry_name: Optional[str] = ...,
82 | description: Optional[str] = ...,
83 | comment: Optional[str] = ...,
84 | stages: Optional[List[str]] = ...
85 | ) -> None: ...
86 | def get_metrics(self, metric: Optional[str] = ...) -> List[APIMetrics]: ...
87 | def log_other(self, key: str, value: Any, timestamp: Optional[int] = ...) -> None: ...
88 | def get_tags(self) -> List[str]: ...
89 | def get_metadata(self) -> APIMetadata: ...
90 | @overload
91 | def get_metrics_summary(self) -> List[APIMetricsSummary]: ...
92 | @overload
93 | def get_metrics_summary(self, metric: str) -> APIMetricsSummary: ...
94 | @overload
95 | def get_others_summary(self) -> List[APIMetricsSummary]: ...
96 | @overload
97 | def get_others_summary(self, other: str) -> List[str]: ...
98 |
--------------------------------------------------------------------------------
/typings/comet_ml/ExistingExperiment.pyi:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from .Experiment import Experiment
4 |
5 |
6 | class ExistingExperiment(Experiment):
7 |
8 | def __init__(
9 | self,
10 | api_key: Optional[str] = ...,
11 | previous_experiment: Optional[str] = ...,
12 | project_name: Optional[str] = ...,
13 | workspace: Optional[str] = ...,
14 | log_code: bool = ...,
15 | log_graph: bool = ...,
16 | auto_param_logging: bool = ...,
17 | auto_metric_logging: bool = ...,
18 | auto_weight_logging: bool = ...,
19 | auto_output_logging: bool = ...,
20 | auto_log_co2: bool = ...,
21 | parse_args: bool = ...,
22 | log_env_details: bool = ...,
23 | log_env_gpu: bool = ...,
24 | log_env_cpu: bool = ...,
25 | log_env_host: bool = ...,
26 | log_git_metadata: bool = ...,
27 | log_git_patch: bool = ...,
28 | display_summary_level: int = ...,
29 | disabled: bool = ...
30 | ) -> ExistingExperiment: ...
31 |
--------------------------------------------------------------------------------
/typings/comet_ml/Experiment.pyi:
--------------------------------------------------------------------------------
1 | from contextlib import contextmanager
2 | from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Union, Mapping
3 |
4 | from io import BufferedReader
5 |
6 | from typing_extensions import Literal
7 |
8 | class Experiment:
9 | id: str
10 | context: str
11 |
12 | def __init__(
13 | self,
14 | api_key: Optional[str] = ...,
15 | project_name: Optional[str] = ...,
16 | workspace: Optional[str] = ...,
17 | log_code: bool = ...,
18 | log_graph: bool = ...,
19 | auto_param_logging: bool = ...,
20 | auto_metric_logging: bool = ...,
21 | auto_weight_logging: bool = ...,
22 | auto_output_logging: bool = ...,
23 | auto_log_co2: bool = ...,
24 | parse_args: bool = ...,
25 | log_env_details: bool = ...,
26 | log_env_gpu: bool = ...,
27 | log_env_cpu: bool = ...,
28 | log_env_host: bool = ...,
29 | log_git_metadata: bool = ...,
30 | log_git_patch: bool = ...,
31 | display_summary_level: int = ...,
32 | disabled: bool = ...
33 | ) -> Experiment: ...
34 |
35 | def set_step(self, step: int) -> None: ...
36 | def log_asset(
37 | self,
38 | file_data: str | BufferedReader,
39 | file_name: Optional[str] = ...,
40 | overwrite: bool = ...,
41 | copy_to_tmp: bool = ...,
42 | step: Optional[int] = ...
43 | ) -> None: ...
44 | def log_asset_data(
45 | self,
46 | data: Any,
47 | name: Optional[str] = ...,
48 | overwrite: bool = ...,
49 | step: Optional[int] = ...,
50 | metadata: Optional[Any] = ...,
51 | file_name: Optional[str] = ...
52 | ) -> None: ...
53 | def log_metric(
54 | self,
55 | name: str,
56 | value: str,
57 | step: Optional[int] = ...,
58 | epoch: Optional[int] = ...,
59 | include_context: bool = ...
60 | ) -> None: ...
61 | def log_metrics(
62 | self,
63 | dict: Dict[str, str],
64 | prefix: Optional[str] = ...,
65 | step: Optional[int] = ...,
66 | epoch: Optional[int] = ...
67 | ) -> None: ...
68 | def log_table(
69 | self,
70 | filename: str,
71 | tabular_data: Optional[Sequence[Sequence[Any]]] = ...,
72 | headers: Literal[False] | Sequence[str] = ...
73 | ) -> None: ...
74 | def log_model(
75 | self,
76 | name: str,
77 | file_or_folder: str,
78 | file_name: Optional[str] = ...,
79 | overwrite: bool = ...,
80 | metadata: Any = ...,
81 | copy_to_tmp: bool = ...
82 | ) -> None: ...
83 | def log_confusion_matrix(
84 | self,
85 | y_true: Optional[List[int]] = ...,
86 | y_predicted: Optional[Sequence[int]] = ...,
87 | matrix: Optional[List[List[Any]]] = ...,
88 | labels: Optional[List[str]] = ...,
89 | title: str = ...,
90 | row_label: str = ...,
91 | column_label: str = ...,
92 | max_examples_per_cell: int = ...,
93 | max_categories: int = ...,
94 | winner_function: Optional[Callable[[List[List[int]]], int]] = ...,
95 | index_to_example_function: Optional[Callable[[int], str | int | Dict[str, str]]] = ...,
96 | cache: bool = ...,
97 | file_name: str = ...,
98 | overwrite: bool = ...,
99 | step: Optional[int] = ...
100 | ) -> None: ...
101 | def log_parameter(
102 | self,
103 | name: str,
104 | value: Union[float, int, bool, str, List[Any]],
105 | step: Optional[int] = ...,
106 | ) -> None: ...
107 | def log_parameters(
108 | self,
109 | parameters: Mapping[str, Any],
110 | prefix: Optional[str] = ...,
111 | step: Optional[int] = ...,
112 | ) -> None: ...
113 | def log_other(
114 | self,
115 | key: str,
116 | value: str,
117 | ) -> None: ...
118 | def get_other(
119 | self,
120 | key: str
121 | ) -> str: ...
122 |
123 | @contextmanager
124 | def context_manager(self, name: str) -> Iterator[Experiment]: ...
--------------------------------------------------------------------------------
/typings/comet_ml/__init__.pyi:
--------------------------------------------------------------------------------
1 | from .API import API, APIProject, APIRegistryModel, APIRegistryVersion
2 | from .APIExperiment import APIExperiment
3 | from .ExistingExperiment import ExistingExperiment
4 | from .Experiment import Experiment
5 |
6 | __all__ = [
7 | "API",
8 | "APIExperiment",
9 | "ExistingExperiment",
10 | "Experiment",
11 | ]
--------------------------------------------------------------------------------
/typings/comet_ml/exceptions.pyi:
--------------------------------------------------------------------------------
1 | class NotFound(Exception): ...
--------------------------------------------------------------------------------
/typings/comet_ml/query/__init__.pyi:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from comet_ml.query import QueryExpression
4 |
5 |
6 | class QueryExpression:
7 | def startswith(self, prefix: str) -> QueryExpression: ...
8 |
9 | class Tag(QueryExpression):
10 | def __init__(self, tag: str) -> Tag: ...
11 |
12 | class Metadata(QueryExpression):
13 | def __init__(self, metadata: str) -> Metadata: ...
--------------------------------------------------------------------------------
/typings/dask/__init__.pyi:
--------------------------------------------------------------------------------
1 | from typing import Callable, TypeVar, overload
2 | from .delayed import Delayed
3 |
4 | A = TypeVar("A")
5 | B = TypeVar("B")
6 | C = TypeVar("C")
7 | D = TypeVar("D")
8 | E = TypeVar("E")
9 |
10 | T = TypeVar("T")
11 |
12 | @overload
13 | def delayed(fn: Callable[[A], T]) -> Callable[[A], Delayed[T]]: ...
14 | @overload
15 | def delayed(fn: Callable[[A, B], T]) -> Callable[[A, B], Delayed[T]]: ...
16 | @overload
17 | def delayed(fn: Callable[[A, B, C], T]) -> Callable[[A, B, C], Delayed[T]]: ...
18 | @overload
19 | def delayed(fn: Callable[[A, B, C, D], T]) -> Callable[[A, B, C, D], Delayed[T]]: ...
20 | @overload
21 | def delayed(fn: Callable[[A, B, C, D, E], T]) -> Callable[[A, B, C, D, E], Delayed[T]]: ...
22 | def delayed(fn: Callable[..., T]) -> Callable[..., Delayed[T]]: ...
23 |
--------------------------------------------------------------------------------
/typings/dask/bag.pyi:
--------------------------------------------------------------------------------
1 | from typing import Generic, Iterable, TypeVar, List
2 |
3 | from .delayed import Delayed
4 |
5 | T = TypeVar("T")
6 |
7 | class Bag(Generic[T]):
8 | def to_delayed(self) -> List[Delayed[List[T]]]: ...
9 | def take(self, count: int) -> List[T]: ...
10 |
11 | def from_delayed(delayed: Iterable[Delayed[Iterable[T]]]) -> Bag[T]: ...
--------------------------------------------------------------------------------
/typings/dask/delayed.pyi:
--------------------------------------------------------------------------------
1 | from typing import Generic, TypeVar
2 |
3 | T = TypeVar("T")
4 |
5 | class Delayed(Generic[T]):
6 | def compute(self) -> T: ...
7 |
--------------------------------------------------------------------------------
/typings/fsspec/__init__.pyi:
--------------------------------------------------------------------------------
1 | from io import BufferedReader
2 |
3 |
4 | class OpenFile(BufferedReader): # this isn't true but it's close enough for how we're using fsspec
5 | pass
6 |
7 | def open(uri: str) -> OpenFile: ...
8 |
9 |
--------------------------------------------------------------------------------
/typings/matplotlib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zensors/datatap-python/1a05b354f20bbbc24b9bad76e919e0cc9542538c/typings/matplotlib/__init__.py
--------------------------------------------------------------------------------
/typings/matplotlib/pyplot/__init__.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | class Figure:
4 | pass
5 |
6 | def figure() -> Figure: ...
7 | def plot(*args: Any) -> None: ...
8 | def xlabel(label: str) -> None: ...
9 | def ylabel(label: str) -> None: ...
10 |
--------------------------------------------------------------------------------
/typings/neo4j/__init__.pyi:
--------------------------------------------------------------------------------
1 | from .graph_database import GraphDatabase
2 | from .transaction import Transaction
3 |
4 | __all__ = [
5 | "GraphDatabase",
6 | "Transaction",
7 | ]
8 |
--------------------------------------------------------------------------------
/typings/neo4j/driver.pyi:
--------------------------------------------------------------------------------
1 | from typing import ContextManager
2 |
3 | from .session import Session
4 |
5 | class Driver:
6 | def session(self) -> ContextManager[Session]: ...
7 |
--------------------------------------------------------------------------------
/typings/neo4j/graph_database.pyi:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 | from .driver import Driver
3 |
4 | class GraphDatabase:
5 | @staticmethod
6 | def driver(url: str, *, auth: Tuple[str, str]) -> Driver: ...
7 |
--------------------------------------------------------------------------------
/typings/neo4j/record.pyi:
--------------------------------------------------------------------------------
1 | from typing import Generic, Iterable, TypeVar
2 |
3 | T = TypeVar("T")
4 | V = TypeVar("V")
5 | K = TypeVar("K", str, int)
6 | R = TypeVar("R")
7 |
8 | class Record(Generic[T], Iterable[V]):
9 | def __getitem__(self, key: K) -> R: ...
10 |
11 |
--------------------------------------------------------------------------------
/typings/neo4j/result.pyi:
--------------------------------------------------------------------------------
1 | from typing import Generic, Iterator, TypeVar
2 |
3 | from .record import Record
4 |
5 | T = TypeVar("T")
6 |
7 | class Result(Generic[T]):
8 | def __iter__(self) -> Iterator[Record[T]]: ...
9 | def single(self) -> Record[T]: ...
10 |
--------------------------------------------------------------------------------
/typings/neo4j/session.pyi:
--------------------------------------------------------------------------------
1 | from typing import Any, ContextManager, Dict, TypeVar, Callable, overload
2 |
3 | from neo4j.result import Result
4 |
5 | from .transaction import Transaction
6 |
7 |
8 | _F = TypeVar("_F")
9 |
10 | class Session:
11 | def begin_transaction(self) -> ContextManager[Transaction]: ...
12 |
13 | def write_transaction(self, fn: Callable[[Transaction], _F]) -> _F: ...
14 |
15 | @overload
16 | def run(self, query: str, args: Dict[str, Any]) -> Result[_F]: ...
17 | @overload
18 | def run(self, query: str, **kwargs: Any) -> Result[_F]: ...
19 |
--------------------------------------------------------------------------------
/typings/neo4j/transaction.pyi:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, TypeVar, overload
2 | from .result import Result
3 |
4 | T = TypeVar("T")
5 |
6 | class Transaction:
7 | @overload
8 | def run(self, query: str, args: Dict[str, Any]) -> Result[T]: ...
9 | @overload
10 | def run(self, query: str, **kwargs: Any) -> Result[T]: ...
11 |
--------------------------------------------------------------------------------
/typings/pycocotools/__init__.pyi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zensors/datatap-python/1a05b354f20bbbc24b9bad76e919e0cc9542538c/typings/pycocotools/__init__.pyi
--------------------------------------------------------------------------------
/typings/pycocotools/mask.pyi:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from typing import Sequence, Tuple
4 |
5 | import numpy as np
6 | from typing_extensions import TypedDict
7 |
8 |
9 | class CocoRleJson(TypedDict):
10 | counts: Sequence[int]
11 | size: Tuple[int, int]
12 |
13 | class CocoRle:
14 | pass
15 |
16 | def frPyObjects(json: CocoRleJson, height: int, width: int) -> CocoRle: ...
17 | def decode(rle: CocoRle) -> np.ndarray: ...
18 |
--------------------------------------------------------------------------------
/typings/requests/__init__.pyi:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, Generator, Optional, overload
2 |
3 | from typing_extensions import Literal
4 |
5 | class Response:
6 | content: bytes
7 | ok: bool
8 |
9 | def json(self) -> Any: ...
10 | @overload
11 | def iter_lines(self, *, decode_unicode: Literal[True], chunk_size: int = ...) -> Generator[str, None, None]: ...
12 | @overload
13 | def iter_lines(self, *, decode_unicode: Optional[Literal[False]] = ..., chunk_size: int = ...) -> Generator[bytes, None, None]: ...
14 |
15 | def get(
16 | url: str,
17 | params: Dict[str, str] | None = ...,
18 | headers: Dict[str, str | None] | None = ...,
19 | stream: bool = ...,
20 | ) -> Response: ...
21 |
22 | def post(
23 | url: str,
24 | params: Dict[str, str] | None = ...,
25 | headers: Dict[str, str | None] | None = ...,
26 | stream: bool = ...,
27 | json: Any = ...
28 | ) -> Response: ...
--------------------------------------------------------------------------------
/typings/scipy/__init__.pyi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zensors/datatap-python/1a05b354f20bbbc24b9bad76e919e0cc9542538c/typings/scipy/__init__.pyi
--------------------------------------------------------------------------------
/typings/scipy/optimize/__init__.pyi:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 | from numpy import ndarray
3 |
4 | def linear_sum_assignment(cost_matrix: ndarray, maximize: bool = ...) -> Tuple[ndarray, ndarray]: ...
5 |
--------------------------------------------------------------------------------
/typings/shapely/__init__.pyi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zensors/datatap-python/1a05b354f20bbbc24b9bad76e919e0cc9542538c/typings/shapely/__init__.pyi
--------------------------------------------------------------------------------
/typings/shapely/geometry/__init__.pyi:
--------------------------------------------------------------------------------
1 | class Polygon:
2 | pass
3 |
4 | def box(minx: float, miny: float, maxx: float, maxy: float, ccw: bool = ...) -> Polygon: ...
5 |
--------------------------------------------------------------------------------
/typings/skimage/__init__.pyi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zensors/datatap-python/1a05b354f20bbbc24b9bad76e919e0cc9542538c/typings/skimage/__init__.pyi
--------------------------------------------------------------------------------
/typings/skimage/measure.pyi:
--------------------------------------------------------------------------------
1 | from typing import Optional, Sequence
2 |
3 | import numpy as np
4 |
5 |
6 | Polygon = np.ndarray
7 |
8 | def approximate_polygon(polygon: Polygon, tolerance: float) -> Polygon: ...
9 | def find_contours(image: np.ndarray, level: Optional[float]) -> Sequence[Polygon]: ...
10 |
--------------------------------------------------------------------------------
/typings/sortedcontainers/__init__.pyi:
--------------------------------------------------------------------------------
1 | from typing import Generic, Iterator, MutableMapping, TypeVar
2 |
3 |
4 | K = TypeVar("K")
5 | V = TypeVar("V")
6 |
7 | class SortedDict(Generic[K, V], MutableMapping[K, V]):
8 | def __reversed__(self) -> Iterator[K]: ...
9 | def copy(self) -> SortedDict[K, V]: ...
10 |
--------------------------------------------------------------------------------
/typings/tensorflow/__init__.pyi:
--------------------------------------------------------------------------------
1 | from . import distribute
2 | from . import data
3 | from . import io
4 | from .types import DType, int8, uint8, int32, int64, float32, float64, string, TensorShape
5 | from .tensor import Tensor, constant, stack, py_function
6 |
7 | __all__ = [
8 | "distribute",
9 | "data",
10 | "io",
11 | "DType",
12 | "int8",
13 | "uint8",
14 | "int32",
15 | "int64",
16 | "float32",
17 | "float64",
18 | "string",
19 | "TensorShape",
20 | "Tensor",
21 | "constant",
22 | "stack",
23 | "py_function",
24 | ]
--------------------------------------------------------------------------------
/typings/tensorflow/data/__init__.pyi:
--------------------------------------------------------------------------------
1 | from .dataset import Dataset
2 |
3 | __all__ = [
4 | "Dataset"
5 | ]
--------------------------------------------------------------------------------
/typings/tensorflow/data/dataset.pyi:
--------------------------------------------------------------------------------
1 | from typing import Callable, Generator, Iterable, List, TypeVar, NewType
2 |
3 | import tensorflow as tf
4 |
5 | _T = TypeVar("_T")
6 |
7 | DTypeDeepIterable = tf.DType | Iterable["DTypeDeepIterable"]
8 | TensorShapeDeepIterable = tf.TensorShape | Iterable["TensorShapeDeepIterable"]
9 |
10 | _V = TypeVar("_V")
11 |
12 | class Dataset:
13 | @staticmethod
14 | def from_generator(
15 | gen: Callable[[_T], Generator[Iterable[tf.Tensor], None, None]] | Callable[[], Generator[Iterable[tf.Tensor], None, None]],
16 | output_type: DTypeDeepIterable = ...,
17 | output_shapes: TensorShapeDeepIterable = ...,
18 | args: _T = ...
19 | ) -> Dataset: ...
20 |
21 | def __iter__(self) -> Generator[Iterable[tf.Tensor], None, None]: ...
22 |
23 | def map(self, map_fn: Callable[..., Iterable[tf.Tensor]], num_parallel_calls: int = ..., deterministic: bool = ...) -> Dataset: ...
24 | def prefetch(self, buffer_size: int) -> Dataset: ...
--------------------------------------------------------------------------------
/typings/tensorflow/distribute/__init__.pyi:
--------------------------------------------------------------------------------
1 | from . import experimental
2 | from .input_context import InputContext
3 | from .distributed_dataset import DistributedDataset
4 |
5 | __all__ = [
6 | "experimental",
7 | "InputContext",
8 | "DistributedDataset",
9 | ]
--------------------------------------------------------------------------------
/typings/tensorflow/distribute/distributed_dataset.pyi:
--------------------------------------------------------------------------------
1 | from typing import Generator, Iterable
2 |
3 | import tensorflow as tf
4 |
5 | class DistributedDataset:
6 |
7 | def __iter__(self) -> Generator[Iterable[tf.Tensor], None, None]: ...
--------------------------------------------------------------------------------
/typings/tensorflow/distribute/experimental/__init__.pyi:
--------------------------------------------------------------------------------
1 | from .strategy import Strategy, MultiWorkerMirroredStrategy
2 |
3 | __all__ = [
4 | "Strategy",
5 | "MultiWorkerMirroredStrategy",
6 | ]
--------------------------------------------------------------------------------
/typings/tensorflow/distribute/experimental/strategy.pyi:
--------------------------------------------------------------------------------
1 | from typing import Callable, Generator
2 | import tensorflow as tf
3 |
4 | class Strategy:
5 | def experimental_distribute_datasets_from_function(
6 | self,
7 | fn: Callable[[tf.distribute.InputContext], tf.data.Dataset]
8 | ) -> tf.distribute.DistributedDataset: ...
9 |
10 | class MultiWorkerMirroredStrategy(Strategy):
11 | pass
--------------------------------------------------------------------------------
/typings/tensorflow/distribute/input_context.pyi:
--------------------------------------------------------------------------------
1 | class InputContext:
2 | input_pipeline_id: int
3 | num_input_pipelines: int
4 | num_input_pipelines: int
--------------------------------------------------------------------------------
/typings/tensorflow/io/__init__.py:
--------------------------------------------------------------------------------
1 | from .decode_image import decode_jpeg
2 |
3 | __all__ = [
4 | "decode_jpeg",
5 | ]
--------------------------------------------------------------------------------
/typings/tensorflow/io/decode_image.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | import tensorflow as tf
3 |
4 | def decode_jpeg(img: bytes, channels: Optional[int] = ..., dtype: tf.DType = ..., name: str = ..., expand_animations: bool = ...) -> tf.Tensor: ...
--------------------------------------------------------------------------------
/typings/tensorflow/tensor.pyi:
--------------------------------------------------------------------------------
1 | from typing import Any, Callable, Iterable
2 |
3 | import tensorflow as tf
4 |
5 |
6 | class Tensor:
7 | def numpy(self) -> Any: ...
8 |
9 |
10 | def constant(arraylike: Any, dtype: tf.DType = ..., shape: Iterable[int] | tf.TensorShape = ...) -> Tensor: ...
11 | def stack(tensors: Iterable[Tensor]) -> Tensor: ...
12 | def py_function(func: Callable[[Tensor], Tensor], inp: Any, Tout: tf.DType | Iterable[tf.DType], name: str = ...) -> Tensor: ...
--------------------------------------------------------------------------------
/typings/tensorflow/types.pyi:
--------------------------------------------------------------------------------
1 | from typing import Iterable, NewType, Optional
2 |
3 | DType = NewType("DType", str)
4 |
5 | uint8: DType
6 | int8: DType
7 | int32: DType
8 | int64: DType
9 | float32: DType
10 | float64: DType
11 | string: DType
12 |
13 | class TensorShape:
14 | dims: Optional[Iterable[int]]
15 | ndims: Optional[int]
16 | rank: Optional[int]
17 |
18 | def __init__(self, dims: Optional[Iterable[Optional[int]]]) -> TensorShape: ...
--------------------------------------------------------------------------------
/typings/torchvision/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zensors/datatap-python/1a05b354f20bbbc24b9bad76e919e0cc9542538c/typings/torchvision/__init__.py
--------------------------------------------------------------------------------
/typings/torchvision/transforms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zensors/datatap-python/1a05b354f20bbbc24b9bad76e919e0cc9542538c/typings/torchvision/transforms/__init__.py
--------------------------------------------------------------------------------
/typings/torchvision/transforms/functional.pyi:
--------------------------------------------------------------------------------
1 | import PIL.Image
2 | import torch
3 |
4 | def to_tensor(img: PIL.Image.Image) -> torch.Tensor: ...
5 | def to_pil_image(tensor: torch.Tensor, mode: str = ...) -> PIL.Image.Image: ...
--------------------------------------------------------------------------------