├── .gitignore
├── .gitmodules
├── README.md
├── annotators.py
├── assets
    ├── img.jpg
    ├── img_canny.jpg
    ├── img_color_shuffler.jpg
    ├── img_content_shuffler.jpg
    ├── img_hed.jpg
    ├── img_kitchen_sink.jpg
    ├── img_lineart.jpg
    ├── img_lineart_anime.jpg
    ├── img_midas.jpg
    ├── img_midas_ade20k.jpg
    ├── img_mlsd.jpg
    ├── img_normalbae.jpg
    ├── img_normalbae_ade20k.jpg
    ├── img_oneformer_ade20k.jpg
    ├── img_oneformer_coco.jpg
    ├── img_openpose.jpg
    ├── img_pidi.jpg
    ├── img_uniformer.jpg
    └── img_zoe.jpg
├── main.py
├── requirements.txt
└── test.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 | 
162 | /env.sh
163 | /models
164 | /custom/*
165 | !/custom/.gitkeep
166 | /.tmp
167 | /venv.bkp
168 | /venv.*
169 | /config/*
170 | !/config/examples
171 | !/config/_PUT_YOUR_CONFIGS_HERE).txt


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "repositories/controlnet"]
2 | 	path = repositories/controlnet
3 | 	url = https://github.com/lllyasviel/ControlNet-v1-1-nightly.git
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Batch Annotator
 2 | 
 3 | This is a simple tool to annotate images in batches using various models.
 4 | 
 5 | ### Installation
 6 | 
 7 | ```bash
 8 | git submodule update --init --recursive
 9 | pip install -r requirements.txt
10 | ``` 
11 | 
12 | ### Usage
13 | 
14 | ```bash
15 | python main.py <input_dir> <output_dir> <annotator>
16 | ```
17 | 
18 | ### Annotators
19 | 
20 | | Annotator        | Image                                                                                                                           |
21 | |------------------|---------------------------------------------------------------------------------------------------------------------------------|
22 | |                  | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img.jpg" width="384" height="480">               |
23 | | canny            | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_canny.jpg" width="384" height="480">         |
24 | | color_shuffler   | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_color_shuffler.jpg" width="384" height="480"> |
25 | | content_shuffler | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_content_shuffler.jpg" width="384" height="480"> |
26 | | hed              | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_hed.jpg" width="384" height="480">           |
27 | | lineart          | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_lineart.jpg" width="384" height="480">       |
28 | | lineart_anime    | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_lineart_anime.jpg" width="384" height="480"> |
29 | | midas            | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_midas.jpg" width="384" height="480">         |
30 | | mlsd             | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_mlsd.jpg" width="384" height="480">          |
31 | | normalbae        | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_normalbae.jpg" width="384" height="480">     |
32 | | oneformer_ade20k | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_oneformer_ade20k.jpg" width="384" height="480"> |
33 | | oneformer_coco   | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_oneformer_coco.jpg" width="384" height="480"> |
34 | | openpose         | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_openpose.jpg" width="384" height="480">      |
35 | | pidi             | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_pidi.jpg" width="384" height="480">          |
36 | | uniformer        | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_uniformer.jpg" width="384" height="480">     |
37 | | zoe              | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_zoe.jpg" width="384" height="480">           |
38 | | midas_ade20k              | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_midas_ade20k.jpg" width="384" height="480">              |
39 | | normalbae_ade20k              | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_normalbae_ade20k.jpg" width="384" height="480">              |
40 | | kitchen_sink              | <img src="https://raw.githubusercontent.com/ostris/batch-annotator/main/assets/img_kitchen_sink.jpg" width="384" height="480">              |
41 | 
42 | 


--------------------------------------------------------------------------------
/annotators.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import cv2
  4 | import numpy as np
  5 | import torch
  6 | import importlib
  7 | 
  8 | CONTROL_NET_ROOT = os.path.join(os.path.dirname(__file__), 'repositories', 'controlnet')
  9 | sys.path.append(CONTROL_NET_ROOT)
 10 | 
 11 | from annotator.util import resize_image, HWC3
 12 | 
 13 | annotators = []
 14 | 
 15 | 
 16 | def value_map(x, in_min: float, in_max: float, out_min: float, out_max: float) -> float:
 17 |     return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min
 18 | 
 19 | 
 20 | def add_annotators_to_arg_parser(parser):
 21 |     # add arguments for each annotator
 22 |     for annotator in annotators:
 23 |         if annotator.additional_args is not None:
 24 |             for arg in annotator.additional_args:
 25 |                 # handle booleans
 26 |                 if arg["type"] == bool:
 27 |                     parser.add_argument(f"--{arg['slug']}", action="store_true", help=arg["help"])
 28 |                 else:
 29 |                     parser.add_argument(
 30 |                         f"--{arg['slug']}", type=arg['type'], default=arg['default'] if 'default' in arg else None,
 31 |                         help=arg['help'])
 32 | 
 33 | 
 34 | class Annotator:
 35 |     def __init__(
 36 |             self,
 37 |             name,
 38 |             slug=None,
 39 |             import_path=None,
 40 |             import_class_name=None,
 41 |             additional_args=None,
 42 |             call_override=None
 43 |     ):
 44 |         self.name = name
 45 |         self.slug = slug
 46 |         if self.slug is None:
 47 |             self.slug = self.name
 48 |         self.model = None
 49 |         self.import_path = import_path
 50 |         self.import_class_name = import_class_name
 51 |         self.additional_args = additional_args
 52 |         if self.import_class_name is None and call_override is None:
 53 |             raise ValueError('import_class_name must be specified for Annotator: ' + self.name)
 54 |         if self.import_path is None:
 55 |             self.import_path = 'annotator.' + self.slug
 56 | 
 57 |         self.call_override = call_override
 58 | 
 59 |     def __call__(self, img, res, *args, **kwargs):
 60 |         if self.call_override is not None:
 61 |             return self.call_override(self, img, res, *args, **kwargs)
 62 |         if self.model is None:
 63 |             self.load()
 64 |         img = resize_image(HWC3(img), res)
 65 |         res = self.model(img, *args, **kwargs)
 66 |         return [res]
 67 | 
 68 |     def load(self):
 69 |         module = importlib.import_module(self.import_path)
 70 |         annotator_model = getattr(module, self.import_class_name)
 71 |         self.model = annotator_model()
 72 | 
 73 |     def cleanup(self):
 74 |         if self.model is not None:
 75 |             del self.model
 76 |             torch.cuda.empty_cache()
 77 |             self.model = None
 78 | 
 79 | 
 80 | annotators.append(
 81 |     Annotator(
 82 |         name='Canny',
 83 |         slug='canny',
 84 |         import_path='annotator.canny',
 85 |         import_class_name='CannyDetector',
 86 |         additional_args=[
 87 |             {
 88 |                 'slug': 'canny_low_threshold',
 89 |                 'keyword': 'low_threshold',
 90 |                 'type': int,
 91 |                 'default': 100,
 92 |                 'help': 'Low threshold for Canny edge detection'
 93 |             },
 94 |             {
 95 |                 'slug': 'canny_high_threshold',
 96 |                 'keyword': 'high_threshold',
 97 |                 'type': int,
 98 |                 'default': 200,
 99 |                 'help': 'High threshold for Canny edge detection'
100 |             }
101 |         ]
102 |     )
103 | )
104 | 
105 | annotators.append(
106 |     Annotator(
107 |         name='HED',
108 |         slug='hed',
109 |         import_path='annotator.hed',
110 |         import_class_name='HEDdetector'
111 |     )
112 | )
113 | 
114 | annotators.append(
115 |     Annotator(
116 |         name='PIDI',
117 |         slug='pidi',
118 |         import_path='annotator.pidinet',
119 |         import_class_name='PidiNetDetector'
120 |     )
121 | )
122 | 
123 | annotators.append(
124 |     Annotator(
125 |         name='MLSD Line Detection',
126 |         slug='mlsd',
127 |         import_path='annotator.mlsd',
128 |         import_class_name='MLSDdetector',
129 |         additional_args=[
130 |             {
131 |                 'slug': 'mlsd_score_thr',
132 |                 'keyword': 'thr_v',
133 |                 'type': float,
134 |                 'default': 0.1,
135 |                 'help': 'Threshold for score of line detection'
136 |             },
137 |             {
138 |                 'slug': 'mlsd_dist_thr',
139 |                 'keyword': 'thr_d',
140 |                 'type': float,
141 |                 'default': 0.1,
142 |                 'help': 'Threshold for distance of line detection'
143 |             }
144 |         ]
145 |     )
146 | )
147 | 
148 | annotators.append(
149 |     Annotator(
150 |         name='Midas Depth',
151 |         slug='midas',
152 |         import_path='annotator.midas',
153 |         import_class_name='MidasDetector'
154 |     )
155 | )
156 | 
157 | annotators.append(
158 |     Annotator(
159 |         name='Zoe Depth',
160 |         slug='zoe',
161 |         import_path='annotator.zoe',
162 |         import_class_name='ZoeDetector'
163 |     )
164 | )
165 | 
166 | annotators.append(
167 |     Annotator(
168 |         name='NormalBae',
169 |         slug='normalbae',
170 |         import_path='annotator.normalbae',
171 |         import_class_name='NormalBaeDetector'
172 |     )
173 | )
174 | 
175 | annotators.append(
176 |     Annotator(
177 |         name='OpenPose',
178 |         slug='openpose',
179 |         import_path='annotator.openpose',
180 |         import_class_name='OpenposeDetector',
181 |         additional_args=[
182 |             {
183 |                 'slug': 'openpose_hand_and_face',
184 |                 'keyword': 'hand_and_face',
185 |                 'type': bool,
186 |                 'help': 'Whether to detect hand and face'
187 |             }
188 |         ]
189 |     )
190 | )
191 | 
192 | annotators.append(
193 |     Annotator(
194 |         name='Uniformer',
195 |         slug='uniformer',
196 |         import_path='annotator.uniformer',
197 |         import_class_name='UniformerDetector',
198 |     )
199 | )
200 | 
201 | annotators.append(
202 |     Annotator(
203 |         name='Lineart Anime',
204 |         slug='lineart_anime',
205 |         import_path='annotator.lineart_anime',
206 |         import_class_name='LineartAnimeDetector'
207 |     )
208 | )
209 | 
210 | annotators.append(
211 |     Annotator(
212 |         name='Lineart',
213 |         slug='lineart',
214 |         import_path='annotator.lineart',
215 |         import_class_name='LineartDetector',
216 |         additional_args=[
217 |             {
218 |                 'slug': 'lineart_coarse',
219 |                 'keyword': 'coarse',
220 |                 'type': bool,
221 |                 'help': 'Whether to use coarse model'
222 |             }
223 |         ]
224 |     )
225 | )
226 | 
227 | annotators.append(
228 |     Annotator(
229 |         name='Oneformer COCO',
230 |         slug='oneformer_coco',
231 |         import_path='annotator.oneformer',
232 |         import_class_name='OneformerCOCODetector'
233 |     )
234 | )
235 | 
236 | annotators.append(
237 |     Annotator(
238 |         name='Oneformer ADE20k',
239 |         slug='oneformer_ade20k',
240 |         import_path='annotator.oneformer',
241 |         import_class_name='OneformerADE20kDetector'
242 |     )
243 | )
244 | 
245 | annotators.append(
246 |     Annotator(
247 |         name='Content Shuffler',
248 |         slug='content_shuffler',
249 |         import_path='annotator.shuffle',
250 |         import_class_name='ContentShuffleDetector'
251 |     )
252 | )
253 | 
254 | annotators.append(
255 |     Annotator(
256 |         name='Color Shuffler',
257 |         slug='color_shuffler',
258 |         import_path='annotator.shuffle',
259 |         import_class_name='ColorShuffleDetector'
260 |     )
261 | )
262 | 
263 | 
264 | # Midas min sets the darkest overlay value. Since it scales from 0.0 to 1.0, 0.1 to 0.5 is a good min value
265 | # To keep the farthest objects visible, we need to adjust the midas values to be higher
266 | def midas_ade20k(self, img, res, midas_ade20k_min=0.5):
267 |     # find midas and ade20k
268 | 
269 |     midas = None
270 |     oneformer_ade20k = None
271 |     for annotator in annotators:
272 |         if annotator.slug == 'midas':
273 |             midas = annotator
274 |         if annotator.slug == 'oneformer_ade20k':
275 |             oneformer_ade20k = annotator
276 | 
277 |     midas_imd = midas(img, res)[0]
278 |     ade20k_img = oneformer_ade20k(img, res)[0]
279 | 
280 |     # expand to 3 channels
281 |     if midas_imd.ndim == 2:
282 |         midas_imd = np.expand_dims(midas_imd, axis=-1)
283 |         # stack
284 |         midas_imd = np.concatenate([midas_imd, midas_imd, midas_imd], axis=-1)
285 | 
286 |     # convert to 0 - 1 float
287 |     midas_img = midas_imd.astype(np.float32) / 255.0
288 |     ade20k_img = ade20k_img.astype(np.float32) / 255.0
289 | 
290 |     # adjust midas min value
291 |     midas_img = value_map(midas_img, 0, 1.0, midas_ade20k_min, 1.0)
292 | 
293 |     merged = ade20k_img * midas_img
294 |     merged = np.clip(merged, 0, 1) * 255
295 |     merged = merged.astype(np.uint8)
296 | 
297 |     return [merged]
298 | 
299 | 
300 | annotators.append(
301 |     Annotator(
302 |         name='Midas + Oneformer ADE20k',
303 |         slug='midas_ade20k',
304 |         additional_args=[
305 |             {
306 |                 'slug': 'midas_ade20k_min',
307 |                 'type': float,
308 |                 'default': 0.2,
309 |                 'help': 'Minimum value for midas overlay'
310 |             }
311 |         ],
312 |         call_override=midas_ade20k
313 |     )
314 | )
315 | 
316 | 
317 | def normalbae_ade20k(self, img, res, normalbae_ade20k_min=0.5):
318 |     # find midas and ade20k
319 | 
320 |     oneformer_ade20k = None
321 |     normalbae = None
322 |     for annotator in annotators:
323 |         if annotator.slug == 'normalbae':
324 |             normalbae = annotator
325 |         if annotator.slug == 'oneformer_ade20k':
326 |             oneformer_ade20k = annotator
327 | 
328 |     normalbae_img = normalbae(img, res)[0]
329 |     ade20k_img = oneformer_ade20k(img, res)[0]
330 | 
331 | 
332 |     # convert to 0 - 1 float
333 |     normalbae_img = normalbae_img.astype(np.float32) / 255.0
334 |     ade20k_img = ade20k_img.astype(np.float32) / 255.0
335 | 
336 |     # make it grayscale by averaging the channels
337 |     if normalbae_img.ndim == 3:
338 |         normalbae_img = np.mean(normalbae_img, axis=-1, keepdims=True)
339 |         # stack
340 |         normalbae_img = np.concatenate([normalbae_img, normalbae_img, normalbae_img], axis=-1)
341 | 
342 |         # normalize
343 |         normalbae_img = value_map(normalbae_img, np.min(normalbae_img), np.max(normalbae_img), 0, 1)
344 | 
345 |     # adjust midas min value
346 |     normalbae_img = value_map(normalbae_img, 0, 1.0, normalbae_ade20k_min, 1.0)
347 | 
348 |     merged = ade20k_img * normalbae_img
349 |     merged = np.clip(merged, 0, 1) * 255
350 |     merged = merged.astype(np.uint8)
351 | 
352 |     return [merged]
353 | 
354 | 
355 | annotators.append(
356 |     Annotator(
357 |         name='Normal Bae + Oneformer ADE20k',
358 |         slug='normalbae_ade20k',
359 |         additional_args=[
360 |             {
361 |                 'slug': 'normalbae_ade20k_min',
362 |                 'type': float,
363 |                 'default': 0.2,
364 |                 'help': 'Minimum value for normal bae overlay'
365 |             }
366 |         ],
367 |         call_override=normalbae_ade20k
368 |     )
369 | )
370 | 
371 | 
372 | def kitchen_sink(self, img, res):
373 | 
374 |     min_midas = 0.2
375 |     min_normalbae = 0.5
376 |     min_depth_scale = 0.2
377 |     # find midas and ade20k
378 | 
379 |     midas = None
380 |     normalbae = None
381 |     oneformer_ade20k = None
382 |     openpose = None
383 |     for annotator in annotators:
384 |         if annotator.slug == 'normalbae':
385 |             normalbae = annotator
386 |         if annotator.slug == 'oneformer_ade20k':
387 |             oneformer_ade20k = annotator
388 |         if annotator.slug == 'midas':
389 |             midas = annotator
390 |         if annotator.slug == 'openpose':
391 |             openpose = annotator
392 | 
393 | 
394 |     normalbae_img = normalbae(img, res)[0]
395 |     ade20k_img = oneformer_ade20k(img, res)[0]
396 |     midas_img = midas(img, res)[0]
397 |     openpose_img = openpose(img, res)[0]
398 | 
399 | 
400 |     # convert to 0 - 1 float
401 |     normalbae_img = normalbae_img.astype(np.float32) / 255.0
402 |     ade20k_img = ade20k_img.astype(np.float32) / 255.0
403 | 
404 |     # make it grayscale by averaging the channels
405 |     if normalbae_img.ndim == 3:
406 |         normalbae_img = np.mean(normalbae_img, axis=-1, keepdims=True)
407 |         # stack
408 |         normalbae_img = np.concatenate([normalbae_img, normalbae_img, normalbae_img], axis=-1)
409 | 
410 |     # expand to 3 channels
411 |     if midas_img.ndim == 2:
412 |         midas_img = np.expand_dims(midas_img, axis=-1)
413 |         # stack
414 |         midas_img = np.concatenate([midas_img, midas_img, midas_img], axis=-1)
415 | 
416 |     # adjust midas min value
417 |     normalbae_img = value_map(normalbae_img, np.min(normalbae_img), np.max(normalbae_img), min_normalbae, 1.0)
418 | 
419 |     # adjust midas min value
420 |     midas_img = value_map(midas_img, np.min(midas_img), np.max(midas_img), min_midas, 1.0)
421 | 
422 |     depth_scaler = normalbae_img * midas_img
423 | 
424 |     # normalize depth scaler
425 |     depth_scaler = value_map(depth_scaler, np.min(depth_scaler), np.max(depth_scaler), min_depth_scale, 1.0)
426 | 
427 |     image = ade20k_img + openpose_img
428 | 
429 |     merged = image * depth_scaler
430 |     merged = np.clip(merged, 0, 1) * 255
431 |     merged = merged.astype(np.uint8)
432 | 
433 |     return [merged]
434 | 
435 | 
436 | annotators.append(
437 |     Annotator(
438 |         name='Kitchen Sink',
439 |         slug='kitchen_sink',
440 |         call_override=kitchen_sink
441 |     )
442 | )
443 | 
444 | 
445 | def post_process(annotated_img, original_image):
446 |     img = annotated_img
447 |     # is is list, get the first one
448 |     if isinstance(img, list):
449 |         img = img[0]
450 |     img = HWC3(img)
451 |     h, w, _ = original_image.shape
452 |     ha, wa, _ = img.shape
453 |     if h != ha or w != wa:
454 |         output_img = cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR)
455 |     else:
456 |         output_img = img
457 |     return output_img
458 | 
459 | 
460 | def cleanup_annotators():
461 |     for annotator in annotators:
462 |         annotator.cleanup()
463 | 
464 | 
465 | def annotate(input_image, args):
466 |     res = args.res
467 | 
468 |     if res == -1:
469 |         # get resolution
470 |         orig_h, orig_w, orig_c = input_image.shape
471 |         res = min(orig_h, orig_w)
472 | 
473 |     # clone numpy image
474 |     img = input_image.copy()
475 |     with torch.no_grad():
476 |         # find the annotator
477 |         for annotator_model in annotators:
478 |             if annotator_model.slug == args.annotator:
479 |                 # build additional kwargs
480 |                 kwargs = {}
481 |                 if annotator_model.additional_args is not None:
482 |                     for arg_dict in annotator_model.additional_args:
483 |                         keyword = arg_dict['slug']
484 |                         if 'keyword' in arg_dict:
485 |                             keyword = arg_dict['keyword']
486 |                         kwargs[keyword] = getattr(args, arg_dict['slug'])
487 |                 # run the model
488 |                 result = annotator_model(img, res, **kwargs)
489 |                 # post process
490 |                 result = post_process(result, img)
491 |                 return result
492 | 
493 |         # if we made it here, we didn't find the annotator
494 |         raise Exception(f'Annotator {args.annotator} not found')
495 | 


--------------------------------------------------------------------------------
/assets/img.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img.jpg


--------------------------------------------------------------------------------
/assets/img_canny.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_canny.jpg


--------------------------------------------------------------------------------
/assets/img_color_shuffler.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_color_shuffler.jpg


--------------------------------------------------------------------------------
/assets/img_content_shuffler.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_content_shuffler.jpg


--------------------------------------------------------------------------------
/assets/img_hed.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_hed.jpg


--------------------------------------------------------------------------------
/assets/img_kitchen_sink.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_kitchen_sink.jpg


--------------------------------------------------------------------------------
/assets/img_lineart.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_lineart.jpg


--------------------------------------------------------------------------------
/assets/img_lineart_anime.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_lineart_anime.jpg


--------------------------------------------------------------------------------
/assets/img_midas.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_midas.jpg


--------------------------------------------------------------------------------
/assets/img_midas_ade20k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_midas_ade20k.jpg


--------------------------------------------------------------------------------
/assets/img_mlsd.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_mlsd.jpg


--------------------------------------------------------------------------------
/assets/img_normalbae.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_normalbae.jpg


--------------------------------------------------------------------------------
/assets/img_normalbae_ade20k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_normalbae_ade20k.jpg


--------------------------------------------------------------------------------
/assets/img_oneformer_ade20k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_oneformer_ade20k.jpg


--------------------------------------------------------------------------------
/assets/img_oneformer_coco.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_oneformer_coco.jpg


--------------------------------------------------------------------------------
/assets/img_openpose.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_openpose.jpg


--------------------------------------------------------------------------------
/assets/img_pidi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_pidi.jpg


--------------------------------------------------------------------------------
/assets/img_uniformer.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_uniformer.jpg


--------------------------------------------------------------------------------
/assets/img_zoe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_zoe.jpg


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import cv2
 4 | import argparse
 5 | from tqdm import tqdm
 6 | from annotators import add_annotators_to_arg_parser
 7 | 
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser(
11 |         description="Batch converts images to depth map using MiDaS"
12 |     )
13 |     parser.add_argument("input_dir", help="input directory")
14 |     parser.add_argument("output_dir", help="output directory")
15 |     parser.add_argument("annotator", type=str, default="midas", help="Annotator to use")
16 |     parser.add_argument("--res", type=int, default=512,
17 |                         help="Resolution to process at. -1 for original size (be careful for large images!)")
18 |     parser.add_argument("--gpu", type=int, default=0, help="GPU id to use")
19 | 
20 |     add_annotators_to_arg_parser(parser)
21 | 
22 |     args = parser.parse_args()
23 | 
24 |     if args.gpu is not None:
25 |         os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
26 | 
27 |     # make output directory
28 |     if not os.path.exists(args.output_dir):
29 |         os.makedirs(args.output_dir, exist_ok=True)
30 | 
31 |     from annotators import annotate
32 | 
33 |     img_ext = [".jpg", ".jpeg", ".png", ".webp"]
34 | 
35 |     image_paths = [img_path for img_path in os.listdir(args.input_dir) if
36 |                    os.path.splitext(img_path)[1].lower() in img_ext]
37 | 
38 |     print(f"Found {len(image_paths)} images")
39 | 
40 |     for img_path in tqdm(image_paths):
41 |         full_img_path = os.path.join(args.input_dir, img_path)
42 | 
43 |         img = cv2.imread(full_img_path)
44 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
45 | 
46 |         output = annotate(img, args)
47 | 
48 |         output = output.astype('uint8')
49 |         output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
50 | 
51 |         cv2.imwrite(os.path.join(args.output_dir, img_path), output)
52 | 
53 |     print("FIN")
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | basicsr
 2 | einops
 3 | numpy
 4 | opencv_python
 5 | timm==0.6.7
 6 | torch
 7 | torchvision
 8 | tqdm
 9 | matplotlib
10 | prettytable
11 | omegaconf
12 | fvcore
13 | pycocotools
14 | Pillow==9.5.0
15 | ftfy
16 | regex


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import cv2
 4 | import argparse
 5 | from tqdm import tqdm
 6 | from annotators import annotate, annotators, cleanup_annotators, add_annotators_to_arg_parser
 7 | 
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser(
11 |         description="Converts an image to all annotations"
12 |     )
13 |     parser.add_argument("input_img", help="input image")
14 |     parser.add_argument("--res", type=int, default=512,
15 |                         help="Resolution to process at. -1 for original size (be careful for large images!)")
16 |     add_annotators_to_arg_parser(parser)
17 |     args = parser.parse_args()
18 | 
19 |     img = cv2.imread(args.input_img)
20 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
21 | 
22 |     for annotator in tqdm(annotators):
23 |         with torch.no_grad():
24 |             args.annotator = annotator.slug
25 |             # append annotator to output filename
26 |             file_path_without_ext, ext = os.path.splitext(args.input_img)
27 |             out_path = file_path_without_ext + "_" + annotator.slug + ext
28 |             output = annotate(img, args)
29 | 
30 |             output = output.astype('uint8')
31 |             output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR)
32 | 
33 |             cv2.imwrite(out_path, output)
34 |             annotator.cleanup()
35 | 
36 | 
37 |     print("FIN")
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     main()
42 | 


--------------------------------------------------------------------------------