├── .gitignore ├── .gitmodules ├── README.md ├── annotators.py ├── assets ├── img.jpg ├── img_canny.jpg ├── img_color_shuffler.jpg ├── img_content_shuffler.jpg ├── img_hed.jpg ├── img_kitchen_sink.jpg ├── img_lineart.jpg ├── img_lineart_anime.jpg ├── img_midas.jpg ├── img_midas_ade20k.jpg ├── img_mlsd.jpg ├── img_normalbae.jpg ├── img_normalbae_ade20k.jpg ├── img_oneformer_ade20k.jpg ├── img_oneformer_coco.jpg ├── img_openpose.jpg ├── img_pidi.jpg ├── img_uniformer.jpg └── img_zoe.jpg ├── main.py ├── requirements.txt └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | .idea/ 161 | 162 | /env.sh 163 | /models 164 | /custom/* 165 | !/custom/.gitkeep 166 | /.tmp 167 | /venv.bkp 168 | /venv.* 169 | /config/* 170 | !/config/examples 171 | !/config/_PUT_YOUR_CONFIGS_HERE).txt -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "repositories/controlnet"] 2 | path = repositories/controlnet 3 | url = https://github.com/lllyasviel/ControlNet-v1-1-nightly.git 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Batch Annotator 2 | 3 | This is a simple tool to annotate images in batches using various models. 4 | 5 | ### Installation 6 | 7 | ```bash 8 | git submodule update --init --recursive 9 | pip install -r requirements.txt 10 | ``` 11 | 12 | ### Usage 13 | 14 | ```bash 15 | python main.py 16 | ``` 17 | 18 | ### Annotators 19 | 20 | | Annotator | Image | 21 | |------------------|---------------------------------------------------------------------------------------------------------------------------------| 22 | | | | 23 | | canny | | 24 | | color_shuffler | | 25 | | content_shuffler | | 26 | | hed | | 27 | | lineart | | 28 | | lineart_anime | | 29 | | midas | | 30 | | mlsd | | 31 | | normalbae | | 32 | | oneformer_ade20k | | 33 | | oneformer_coco | | 34 | | openpose | | 35 | | pidi | | 36 | | uniformer | | 37 | | zoe | | 38 | | midas_ade20k | | 39 | | normalbae_ade20k | | 40 | | kitchen_sink | | 41 | 42 | -------------------------------------------------------------------------------- /annotators.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import numpy as np 5 | import torch 6 | import importlib 7 | 8 | CONTROL_NET_ROOT = os.path.join(os.path.dirname(__file__), 'repositories', 'controlnet') 9 | sys.path.append(CONTROL_NET_ROOT) 10 | 11 | from annotator.util import resize_image, HWC3 12 | 13 | annotators = [] 14 | 15 | 16 | def value_map(x, in_min: float, in_max: float, out_min: float, out_max: float) -> float: 17 | return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min 18 | 19 | 20 | def add_annotators_to_arg_parser(parser): 21 | # add arguments for each annotator 22 | for annotator in annotators: 23 | if annotator.additional_args is not None: 24 | for arg in annotator.additional_args: 25 | # handle booleans 26 | if arg["type"] == bool: 27 | parser.add_argument(f"--{arg['slug']}", action="store_true", help=arg["help"]) 28 | else: 29 | parser.add_argument( 30 | f"--{arg['slug']}", type=arg['type'], default=arg['default'] if 'default' in arg else None, 31 | help=arg['help']) 32 | 33 | 34 | class Annotator: 35 | def __init__( 36 | self, 37 | name, 38 | slug=None, 39 | import_path=None, 40 | import_class_name=None, 41 | additional_args=None, 42 | call_override=None 43 | ): 44 | self.name = name 45 | self.slug = slug 46 | if self.slug is None: 47 | self.slug = self.name 48 | self.model = None 49 | self.import_path = import_path 50 | self.import_class_name = import_class_name 51 | self.additional_args = additional_args 52 | if self.import_class_name is None and call_override is None: 53 | raise ValueError('import_class_name must be specified for Annotator: ' + self.name) 54 | if self.import_path is None: 55 | self.import_path = 'annotator.' + self.slug 56 | 57 | self.call_override = call_override 58 | 59 | def __call__(self, img, res, *args, **kwargs): 60 | if self.call_override is not None: 61 | return self.call_override(self, img, res, *args, **kwargs) 62 | if self.model is None: 63 | self.load() 64 | img = resize_image(HWC3(img), res) 65 | res = self.model(img, *args, **kwargs) 66 | return [res] 67 | 68 | def load(self): 69 | module = importlib.import_module(self.import_path) 70 | annotator_model = getattr(module, self.import_class_name) 71 | self.model = annotator_model() 72 | 73 | def cleanup(self): 74 | if self.model is not None: 75 | del self.model 76 | torch.cuda.empty_cache() 77 | self.model = None 78 | 79 | 80 | annotators.append( 81 | Annotator( 82 | name='Canny', 83 | slug='canny', 84 | import_path='annotator.canny', 85 | import_class_name='CannyDetector', 86 | additional_args=[ 87 | { 88 | 'slug': 'canny_low_threshold', 89 | 'keyword': 'low_threshold', 90 | 'type': int, 91 | 'default': 100, 92 | 'help': 'Low threshold for Canny edge detection' 93 | }, 94 | { 95 | 'slug': 'canny_high_threshold', 96 | 'keyword': 'high_threshold', 97 | 'type': int, 98 | 'default': 200, 99 | 'help': 'High threshold for Canny edge detection' 100 | } 101 | ] 102 | ) 103 | ) 104 | 105 | annotators.append( 106 | Annotator( 107 | name='HED', 108 | slug='hed', 109 | import_path='annotator.hed', 110 | import_class_name='HEDdetector' 111 | ) 112 | ) 113 | 114 | annotators.append( 115 | Annotator( 116 | name='PIDI', 117 | slug='pidi', 118 | import_path='annotator.pidinet', 119 | import_class_name='PidiNetDetector' 120 | ) 121 | ) 122 | 123 | annotators.append( 124 | Annotator( 125 | name='MLSD Line Detection', 126 | slug='mlsd', 127 | import_path='annotator.mlsd', 128 | import_class_name='MLSDdetector', 129 | additional_args=[ 130 | { 131 | 'slug': 'mlsd_score_thr', 132 | 'keyword': 'thr_v', 133 | 'type': float, 134 | 'default': 0.1, 135 | 'help': 'Threshold for score of line detection' 136 | }, 137 | { 138 | 'slug': 'mlsd_dist_thr', 139 | 'keyword': 'thr_d', 140 | 'type': float, 141 | 'default': 0.1, 142 | 'help': 'Threshold for distance of line detection' 143 | } 144 | ] 145 | ) 146 | ) 147 | 148 | annotators.append( 149 | Annotator( 150 | name='Midas Depth', 151 | slug='midas', 152 | import_path='annotator.midas', 153 | import_class_name='MidasDetector' 154 | ) 155 | ) 156 | 157 | annotators.append( 158 | Annotator( 159 | name='Zoe Depth', 160 | slug='zoe', 161 | import_path='annotator.zoe', 162 | import_class_name='ZoeDetector' 163 | ) 164 | ) 165 | 166 | annotators.append( 167 | Annotator( 168 | name='NormalBae', 169 | slug='normalbae', 170 | import_path='annotator.normalbae', 171 | import_class_name='NormalBaeDetector' 172 | ) 173 | ) 174 | 175 | annotators.append( 176 | Annotator( 177 | name='OpenPose', 178 | slug='openpose', 179 | import_path='annotator.openpose', 180 | import_class_name='OpenposeDetector', 181 | additional_args=[ 182 | { 183 | 'slug': 'openpose_hand_and_face', 184 | 'keyword': 'hand_and_face', 185 | 'type': bool, 186 | 'help': 'Whether to detect hand and face' 187 | } 188 | ] 189 | ) 190 | ) 191 | 192 | annotators.append( 193 | Annotator( 194 | name='Uniformer', 195 | slug='uniformer', 196 | import_path='annotator.uniformer', 197 | import_class_name='UniformerDetector', 198 | ) 199 | ) 200 | 201 | annotators.append( 202 | Annotator( 203 | name='Lineart Anime', 204 | slug='lineart_anime', 205 | import_path='annotator.lineart_anime', 206 | import_class_name='LineartAnimeDetector' 207 | ) 208 | ) 209 | 210 | annotators.append( 211 | Annotator( 212 | name='Lineart', 213 | slug='lineart', 214 | import_path='annotator.lineart', 215 | import_class_name='LineartDetector', 216 | additional_args=[ 217 | { 218 | 'slug': 'lineart_coarse', 219 | 'keyword': 'coarse', 220 | 'type': bool, 221 | 'help': 'Whether to use coarse model' 222 | } 223 | ] 224 | ) 225 | ) 226 | 227 | annotators.append( 228 | Annotator( 229 | name='Oneformer COCO', 230 | slug='oneformer_coco', 231 | import_path='annotator.oneformer', 232 | import_class_name='OneformerCOCODetector' 233 | ) 234 | ) 235 | 236 | annotators.append( 237 | Annotator( 238 | name='Oneformer ADE20k', 239 | slug='oneformer_ade20k', 240 | import_path='annotator.oneformer', 241 | import_class_name='OneformerADE20kDetector' 242 | ) 243 | ) 244 | 245 | annotators.append( 246 | Annotator( 247 | name='Content Shuffler', 248 | slug='content_shuffler', 249 | import_path='annotator.shuffle', 250 | import_class_name='ContentShuffleDetector' 251 | ) 252 | ) 253 | 254 | annotators.append( 255 | Annotator( 256 | name='Color Shuffler', 257 | slug='color_shuffler', 258 | import_path='annotator.shuffle', 259 | import_class_name='ColorShuffleDetector' 260 | ) 261 | ) 262 | 263 | 264 | # Midas min sets the darkest overlay value. Since it scales from 0.0 to 1.0, 0.1 to 0.5 is a good min value 265 | # To keep the farthest objects visible, we need to adjust the midas values to be higher 266 | def midas_ade20k(self, img, res, midas_ade20k_min=0.5): 267 | # find midas and ade20k 268 | 269 | midas = None 270 | oneformer_ade20k = None 271 | for annotator in annotators: 272 | if annotator.slug == 'midas': 273 | midas = annotator 274 | if annotator.slug == 'oneformer_ade20k': 275 | oneformer_ade20k = annotator 276 | 277 | midas_imd = midas(img, res)[0] 278 | ade20k_img = oneformer_ade20k(img, res)[0] 279 | 280 | # expand to 3 channels 281 | if midas_imd.ndim == 2: 282 | midas_imd = np.expand_dims(midas_imd, axis=-1) 283 | # stack 284 | midas_imd = np.concatenate([midas_imd, midas_imd, midas_imd], axis=-1) 285 | 286 | # convert to 0 - 1 float 287 | midas_img = midas_imd.astype(np.float32) / 255.0 288 | ade20k_img = ade20k_img.astype(np.float32) / 255.0 289 | 290 | # adjust midas min value 291 | midas_img = value_map(midas_img, 0, 1.0, midas_ade20k_min, 1.0) 292 | 293 | merged = ade20k_img * midas_img 294 | merged = np.clip(merged, 0, 1) * 255 295 | merged = merged.astype(np.uint8) 296 | 297 | return [merged] 298 | 299 | 300 | annotators.append( 301 | Annotator( 302 | name='Midas + Oneformer ADE20k', 303 | slug='midas_ade20k', 304 | additional_args=[ 305 | { 306 | 'slug': 'midas_ade20k_min', 307 | 'type': float, 308 | 'default': 0.2, 309 | 'help': 'Minimum value for midas overlay' 310 | } 311 | ], 312 | call_override=midas_ade20k 313 | ) 314 | ) 315 | 316 | 317 | def normalbae_ade20k(self, img, res, normalbae_ade20k_min=0.5): 318 | # find midas and ade20k 319 | 320 | oneformer_ade20k = None 321 | normalbae = None 322 | for annotator in annotators: 323 | if annotator.slug == 'normalbae': 324 | normalbae = annotator 325 | if annotator.slug == 'oneformer_ade20k': 326 | oneformer_ade20k = annotator 327 | 328 | normalbae_img = normalbae(img, res)[0] 329 | ade20k_img = oneformer_ade20k(img, res)[0] 330 | 331 | 332 | # convert to 0 - 1 float 333 | normalbae_img = normalbae_img.astype(np.float32) / 255.0 334 | ade20k_img = ade20k_img.astype(np.float32) / 255.0 335 | 336 | # make it grayscale by averaging the channels 337 | if normalbae_img.ndim == 3: 338 | normalbae_img = np.mean(normalbae_img, axis=-1, keepdims=True) 339 | # stack 340 | normalbae_img = np.concatenate([normalbae_img, normalbae_img, normalbae_img], axis=-1) 341 | 342 | # normalize 343 | normalbae_img = value_map(normalbae_img, np.min(normalbae_img), np.max(normalbae_img), 0, 1) 344 | 345 | # adjust midas min value 346 | normalbae_img = value_map(normalbae_img, 0, 1.0, normalbae_ade20k_min, 1.0) 347 | 348 | merged = ade20k_img * normalbae_img 349 | merged = np.clip(merged, 0, 1) * 255 350 | merged = merged.astype(np.uint8) 351 | 352 | return [merged] 353 | 354 | 355 | annotators.append( 356 | Annotator( 357 | name='Normal Bae + Oneformer ADE20k', 358 | slug='normalbae_ade20k', 359 | additional_args=[ 360 | { 361 | 'slug': 'normalbae_ade20k_min', 362 | 'type': float, 363 | 'default': 0.2, 364 | 'help': 'Minimum value for normal bae overlay' 365 | } 366 | ], 367 | call_override=normalbae_ade20k 368 | ) 369 | ) 370 | 371 | 372 | def kitchen_sink(self, img, res): 373 | 374 | min_midas = 0.2 375 | min_normalbae = 0.5 376 | min_depth_scale = 0.2 377 | # find midas and ade20k 378 | 379 | midas = None 380 | normalbae = None 381 | oneformer_ade20k = None 382 | openpose = None 383 | for annotator in annotators: 384 | if annotator.slug == 'normalbae': 385 | normalbae = annotator 386 | if annotator.slug == 'oneformer_ade20k': 387 | oneformer_ade20k = annotator 388 | if annotator.slug == 'midas': 389 | midas = annotator 390 | if annotator.slug == 'openpose': 391 | openpose = annotator 392 | 393 | 394 | normalbae_img = normalbae(img, res)[0] 395 | ade20k_img = oneformer_ade20k(img, res)[0] 396 | midas_img = midas(img, res)[0] 397 | openpose_img = openpose(img, res)[0] 398 | 399 | 400 | # convert to 0 - 1 float 401 | normalbae_img = normalbae_img.astype(np.float32) / 255.0 402 | ade20k_img = ade20k_img.astype(np.float32) / 255.0 403 | 404 | # make it grayscale by averaging the channels 405 | if normalbae_img.ndim == 3: 406 | normalbae_img = np.mean(normalbae_img, axis=-1, keepdims=True) 407 | # stack 408 | normalbae_img = np.concatenate([normalbae_img, normalbae_img, normalbae_img], axis=-1) 409 | 410 | # expand to 3 channels 411 | if midas_img.ndim == 2: 412 | midas_img = np.expand_dims(midas_img, axis=-1) 413 | # stack 414 | midas_img = np.concatenate([midas_img, midas_img, midas_img], axis=-1) 415 | 416 | # adjust midas min value 417 | normalbae_img = value_map(normalbae_img, np.min(normalbae_img), np.max(normalbae_img), min_normalbae, 1.0) 418 | 419 | # adjust midas min value 420 | midas_img = value_map(midas_img, np.min(midas_img), np.max(midas_img), min_midas, 1.0) 421 | 422 | depth_scaler = normalbae_img * midas_img 423 | 424 | # normalize depth scaler 425 | depth_scaler = value_map(depth_scaler, np.min(depth_scaler), np.max(depth_scaler), min_depth_scale, 1.0) 426 | 427 | image = ade20k_img + openpose_img 428 | 429 | merged = image * depth_scaler 430 | merged = np.clip(merged, 0, 1) * 255 431 | merged = merged.astype(np.uint8) 432 | 433 | return [merged] 434 | 435 | 436 | annotators.append( 437 | Annotator( 438 | name='Kitchen Sink', 439 | slug='kitchen_sink', 440 | call_override=kitchen_sink 441 | ) 442 | ) 443 | 444 | 445 | def post_process(annotated_img, original_image): 446 | img = annotated_img 447 | # is is list, get the first one 448 | if isinstance(img, list): 449 | img = img[0] 450 | img = HWC3(img) 451 | h, w, _ = original_image.shape 452 | ha, wa, _ = img.shape 453 | if h != ha or w != wa: 454 | output_img = cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR) 455 | else: 456 | output_img = img 457 | return output_img 458 | 459 | 460 | def cleanup_annotators(): 461 | for annotator in annotators: 462 | annotator.cleanup() 463 | 464 | 465 | def annotate(input_image, args): 466 | res = args.res 467 | 468 | if res == -1: 469 | # get resolution 470 | orig_h, orig_w, orig_c = input_image.shape 471 | res = min(orig_h, orig_w) 472 | 473 | # clone numpy image 474 | img = input_image.copy() 475 | with torch.no_grad(): 476 | # find the annotator 477 | for annotator_model in annotators: 478 | if annotator_model.slug == args.annotator: 479 | # build additional kwargs 480 | kwargs = {} 481 | if annotator_model.additional_args is not None: 482 | for arg_dict in annotator_model.additional_args: 483 | keyword = arg_dict['slug'] 484 | if 'keyword' in arg_dict: 485 | keyword = arg_dict['keyword'] 486 | kwargs[keyword] = getattr(args, arg_dict['slug']) 487 | # run the model 488 | result = annotator_model(img, res, **kwargs) 489 | # post process 490 | result = post_process(result, img) 491 | return result 492 | 493 | # if we made it here, we didn't find the annotator 494 | raise Exception(f'Annotator {args.annotator} not found') 495 | -------------------------------------------------------------------------------- /assets/img.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img.jpg -------------------------------------------------------------------------------- /assets/img_canny.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_canny.jpg -------------------------------------------------------------------------------- /assets/img_color_shuffler.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_color_shuffler.jpg -------------------------------------------------------------------------------- /assets/img_content_shuffler.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_content_shuffler.jpg -------------------------------------------------------------------------------- /assets/img_hed.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_hed.jpg -------------------------------------------------------------------------------- /assets/img_kitchen_sink.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_kitchen_sink.jpg -------------------------------------------------------------------------------- /assets/img_lineart.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_lineart.jpg -------------------------------------------------------------------------------- /assets/img_lineart_anime.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_lineart_anime.jpg -------------------------------------------------------------------------------- /assets/img_midas.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_midas.jpg -------------------------------------------------------------------------------- /assets/img_midas_ade20k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_midas_ade20k.jpg -------------------------------------------------------------------------------- /assets/img_mlsd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_mlsd.jpg -------------------------------------------------------------------------------- /assets/img_normalbae.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_normalbae.jpg -------------------------------------------------------------------------------- /assets/img_normalbae_ade20k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_normalbae_ade20k.jpg -------------------------------------------------------------------------------- /assets/img_oneformer_ade20k.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_oneformer_ade20k.jpg -------------------------------------------------------------------------------- /assets/img_oneformer_coco.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_oneformer_coco.jpg -------------------------------------------------------------------------------- /assets/img_openpose.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_openpose.jpg -------------------------------------------------------------------------------- /assets/img_pidi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_pidi.jpg -------------------------------------------------------------------------------- /assets/img_uniformer.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_uniformer.jpg -------------------------------------------------------------------------------- /assets/img_zoe.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostris/batch-annotator/420e142f6ad3cc14b3ea0500affc2c6c7e7544bf/assets/img_zoe.jpg -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import argparse 5 | from tqdm import tqdm 6 | from annotators import add_annotators_to_arg_parser 7 | 8 | 9 | def main(): 10 | parser = argparse.ArgumentParser( 11 | description="Batch converts images to depth map using MiDaS" 12 | ) 13 | parser.add_argument("input_dir", help="input directory") 14 | parser.add_argument("output_dir", help="output directory") 15 | parser.add_argument("annotator", type=str, default="midas", help="Annotator to use") 16 | parser.add_argument("--res", type=int, default=512, 17 | help="Resolution to process at. -1 for original size (be careful for large images!)") 18 | parser.add_argument("--gpu", type=int, default=0, help="GPU id to use") 19 | 20 | add_annotators_to_arg_parser(parser) 21 | 22 | args = parser.parse_args() 23 | 24 | if args.gpu is not None: 25 | os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) 26 | 27 | # make output directory 28 | if not os.path.exists(args.output_dir): 29 | os.makedirs(args.output_dir, exist_ok=True) 30 | 31 | from annotators import annotate 32 | 33 | img_ext = [".jpg", ".jpeg", ".png", ".webp"] 34 | 35 | image_paths = [img_path for img_path in os.listdir(args.input_dir) if 36 | os.path.splitext(img_path)[1].lower() in img_ext] 37 | 38 | print(f"Found {len(image_paths)} images") 39 | 40 | for img_path in tqdm(image_paths): 41 | full_img_path = os.path.join(args.input_dir, img_path) 42 | 43 | img = cv2.imread(full_img_path) 44 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 45 | 46 | output = annotate(img, args) 47 | 48 | output = output.astype('uint8') 49 | output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR) 50 | 51 | cv2.imwrite(os.path.join(args.output_dir, img_path), output) 52 | 53 | print("FIN") 54 | 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | basicsr 2 | einops 3 | numpy 4 | opencv_python 5 | timm==0.6.7 6 | torch 7 | torchvision 8 | tqdm 9 | matplotlib 10 | prettytable 11 | omegaconf 12 | fvcore 13 | pycocotools 14 | Pillow==9.5.0 15 | ftfy 16 | regex -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import cv2 4 | import argparse 5 | from tqdm import tqdm 6 | from annotators import annotate, annotators, cleanup_annotators, add_annotators_to_arg_parser 7 | 8 | 9 | def main(): 10 | parser = argparse.ArgumentParser( 11 | description="Converts an image to all annotations" 12 | ) 13 | parser.add_argument("input_img", help="input image") 14 | parser.add_argument("--res", type=int, default=512, 15 | help="Resolution to process at. -1 for original size (be careful for large images!)") 16 | add_annotators_to_arg_parser(parser) 17 | args = parser.parse_args() 18 | 19 | img = cv2.imread(args.input_img) 20 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 21 | 22 | for annotator in tqdm(annotators): 23 | with torch.no_grad(): 24 | args.annotator = annotator.slug 25 | # append annotator to output filename 26 | file_path_without_ext, ext = os.path.splitext(args.input_img) 27 | out_path = file_path_without_ext + "_" + annotator.slug + ext 28 | output = annotate(img, args) 29 | 30 | output = output.astype('uint8') 31 | output = cv2.cvtColor(output, cv2.COLOR_RGB2BGR) 32 | 33 | cv2.imwrite(out_path, output) 34 | annotator.cleanup() 35 | 36 | 37 | print("FIN") 38 | 39 | 40 | if __name__ == "__main__": 41 | main() 42 | --------------------------------------------------------------------------------