├── .gitignore ├── LICENSE ├── README.md ├── publish.sh ├── setup.py ├── webuiapi ├── __init__.py └── webuiapi.py └── webuiapi_demo.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | webuiapi.egg-info/ 3 | build/ 4 | *.pyc 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2022 ChunKoo Park 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sdwebuiapi 2 | API client for AUTOMATIC1111/stable-diffusion-webui 3 | 4 | Supports txt2img, img2img, extra-single-image, extra-batch-images API calls. 5 | 6 | API support have to be enabled from webui. Add --api when running webui. 7 | It's explained [here](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API). 8 | 9 | You can use --api-auth user1:pass1,user2:pass2 option to enable authentication for api access. 10 | (Since it's basic http authentication the password is transmitted in cleartext) 11 | 12 | API calls are (almost) direct translation from http://127.0.0.1:7860/docs as of 2022/11/21. 13 | 14 | # Install 15 | 16 | ``` 17 | pip install webuiapi 18 | ``` 19 | 20 | # Usage 21 | 22 | webuiapi_demo.ipynb contains example code with original images. Images are compressed as jpeg in this document. 23 | 24 | ## create API client 25 | ``` 26 | import webuiapi 27 | 28 | # create API client 29 | api = webuiapi.WebUIApi() 30 | 31 | # create API client with custom host, port 32 | #api = webuiapi.WebUIApi(host='127.0.0.1', port=7860) 33 | 34 | # create API client with custom host, port and https 35 | #api = webuiapi.WebUIApi(host='webui.example.com', port=443, use_https=True) 36 | 37 | # create API client with default sampler, steps. 38 | #api = webuiapi.WebUIApi(sampler='Euler a', steps=20) 39 | 40 | # optionally set username, password when --api-auth=username:password is set on webui. 41 | # username, password are not protected and can be derived easily if the communication channel is not encrypted. 42 | # you can also pass username, password to the WebUIApi constructor. 43 | api.set_auth('username', 'password') 44 | ``` 45 | 46 | ## txt2img 47 | ``` 48 | result1 = api.txt2img(prompt="cute squirrel", 49 | negative_prompt="ugly, out of frame", 50 | seed=1003, 51 | styles=["anime"], 52 | cfg_scale=7, 53 | # sampler_index='DDIM', 54 | # steps=30, 55 | # enable_hr=True, 56 | # hr_scale=2, 57 | # hr_upscaler=webuiapi.HiResUpscaler.Latent, 58 | # hr_second_pass_steps=20, 59 | # hr_resize_x=1536, 60 | # hr_resize_y=1024, 61 | # denoising_strength=0.4, 62 | 63 | ) 64 | # images contains the returned images (PIL images) 65 | result1.images 66 | 67 | # image is shorthand for images[0] 68 | result1.image 69 | 70 | # info contains text info about the api call 71 | result1.info 72 | 73 | # info contains paramteres of the api call 74 | result1.parameters 75 | 76 | result1.image 77 | ``` 78 | ![txt2img](https://user-images.githubusercontent.com/1288793/200459205-258d75bb-d2b6-4882-ad22-040bfcf95626.jpg) 79 | 80 | 81 | ## img2img 82 | ``` 83 | result2 = api.img2img(images=[result1.image], prompt="cute cat", seed=5555, cfg_scale=6.5, denoising_strength=0.6) 84 | result2.image 85 | ``` 86 | ![img2img](https://user-images.githubusercontent.com/1288793/200459294-ab1127e5-04e5-47ac-82b2-2bbd0648402a.jpg) 87 | 88 | ## img2img inpainting 89 | ``` 90 | from PIL import Image, ImageDraw 91 | 92 | mask = Image.new('RGB', result2.image.size, color = 'black') 93 | # mask = result2.image.copy() 94 | draw = ImageDraw.Draw(mask) 95 | draw.ellipse((210,150,310,250), fill='white') 96 | draw.ellipse((80,120,160,120+80), fill='white') 97 | 98 | mask 99 | ``` 100 | ![mask](https://user-images.githubusercontent.com/1288793/200459372-7850c6b6-27c5-435a-93e2-8710948d316a.jpg) 101 | 102 | ``` 103 | inpainting_result = api.img2img(images=[result2.image], 104 | mask_image=mask, 105 | inpainting_fill=1, 106 | prompt="cute cat", 107 | seed=104, 108 | cfg_scale=5.0, 109 | denoising_strength=0.7) 110 | inpainting_result.image 111 | ``` 112 | ![img2img_inpainting](https://user-images.githubusercontent.com/1288793/200459398-9c1004be-1352-4427-bc00-442721a0e5a1.jpg) 113 | 114 | ## extra-single-image 115 | ``` 116 | result3 = api.extra_single_image(image=result2.image, 117 | upscaler_1=webuiapi.Upscaler.ESRGAN_4x, 118 | upscaling_resize=1.5) 119 | print(result3.image.size) 120 | result3.image 121 | ``` 122 | (768, 768) 123 | 124 | ![extra_single_image](https://user-images.githubusercontent.com/1288793/200459455-8579d740-3d8f-47f9-8557-cc177b3e99b7.jpg) 125 | 126 | ## extra-batch-images 127 | ``` 128 | result4 = api.extra_batch_images(images=[result1.image, inpainting_result.image], 129 | upscaler_1=webuiapi.Upscaler.ESRGAN_4x, 130 | upscaling_resize=1.5) 131 | result4.images[0] 132 | ``` 133 | ![extra_batch_images_1](https://user-images.githubusercontent.com/1288793/200459540-b0bd2931-93db-4d03-9cc1-a9f5e5c89745.jpg) 134 | ``` 135 | result4.images[1] 136 | ``` 137 | ![extra_batch_images_2](https://user-images.githubusercontent.com/1288793/200459542-aa8547a0-f6db-436b-bec1-031a93a7b1d4.jpg) 138 | 139 | ### Async API support 140 | txt2img, img2img, extra_single_image, extra_batch_images support async api call with use_async=True parameter. You need asyncio, aiohttp packages installed. 141 | ``` 142 | result = await api.txt2img(prompt="cute kitten", 143 | seed=1001, 144 | use_async=True 145 | ) 146 | result.image 147 | ``` 148 | 149 | ### Scripts support 150 | Scripts from AUTOMATIC1111's Web UI are supported, but there aren't official models that define a script's interface. 151 | 152 | To find out the list of arguments that are accepted by a particular script look up the associated python file from 153 | AUTOMATIC1111's repo `scripts/[script_name].py`. Search for its `run(p, **args)` function and the arguments that come 154 | after 'p' is the list of accepted arguments 155 | 156 | #### Example for X/Y/Z Plot script: 157 | ``` 158 | (scripts/xyz_grid.py file from AUTOMATIC1111's repo) 159 | 160 | def run(self, p, x_type, x_values, y_type, y_values, z_type, z_values, draw_legend, include_lone_images, include_sub_grids, no_fixed_seeds, margin_size): 161 | ... 162 | ``` 163 | List of accepted arguments: 164 | * _x_type_: Index of the axis for X axis. Indexes start from [0: Nothing] 165 | * _x_values_: String of comma-separated values for the X axis 166 | * _y_type_: Index of the axis type for Y axis. As the X axis, indexes start from [0: Nothing] 167 | * _y_values_: String of comma-separated values for the Y axis 168 | * _z_type_: Index of the axis type for Z axis. As the X axis, indexes start from [0: Nothing] 169 | * _z_values_: String of comma-separated values for the Z axis 170 | * _draw_legend_: "True" or "False". IMPORTANT: It needs to be a string and not a Boolean value 171 | * _include_lone_images_: "True" or "False". IMPORTANT: It needs to be a string and not a Boolean value 172 | * _include_sub_grids_: "True" or "False". IMPORTANT: It needs to be a string and not a Boolean value 173 | * _no_fixed_seeds_: "True" or "False". IMPORTANT: It needs to be a string and not a Boolean value 174 | * margin_size: int value 175 | ``` 176 | # Available Axis options (Different for txt2img and img2img!) 177 | XYZPlotAvailableTxt2ImgScripts = [ 178 | "Nothing", 179 | "Seed", 180 | "Var. seed", 181 | "Var. strength", 182 | "Steps", 183 | "Hires steps", 184 | "CFG Scale", 185 | "Prompt S/R", 186 | "Prompt order", 187 | "Sampler", 188 | "Checkpoint name", 189 | "Sigma Churn", 190 | "Sigma min", 191 | "Sigma max", 192 | "Sigma noise", 193 | "Eta", 194 | "Clip skip", 195 | "Denoising", 196 | "Hires upscaler", 197 | "VAE", 198 | "Styles", 199 | ] 200 | 201 | XYZPlotAvailableImg2ImgScripts = [ 202 | "Nothing", 203 | "Seed", 204 | "Var. seed", 205 | "Var. strength", 206 | "Steps", 207 | "CFG Scale", 208 | "Image CFG Scale", 209 | "Prompt S/R", 210 | "Prompt order", 211 | "Sampler", 212 | "Checkpoint name", 213 | "Sigma Churn", 214 | "Sigma min", 215 | "Sigma max", 216 | "Sigma noise", 217 | "Eta", 218 | "Clip skip", 219 | "Denoising", 220 | "Cond. Image Mask Weight", 221 | "VAE", 222 | "Styles", 223 | ] 224 | 225 | # Example call 226 | XAxisType = "Steps" 227 | XAxisValues = "20,30" 228 | XAxisValuesDropdown = "" 229 | YAxisType = "Sampler" 230 | YAxisValues = "Euler a, LMS" 231 | YAxisValuesDropdown = "" 232 | ZAxisType = "Nothing" 233 | ZAxisValues = "" 234 | ZAxisValuesDropdown = "" 235 | drawLegend = "True" 236 | includeLoneImages = "False" 237 | includeSubGrids = "False" 238 | noFixedSeeds = "False" 239 | marginSize = 0 240 | 241 | 242 | # x_type, x_values, y_type, y_values, z_type, z_values, draw_legend, include_lone_images, include_sub_grids, no_fixed_seeds, margin_size 243 | 244 | result = api.txt2img( 245 | prompt="cute girl with short brown hair in black t-shirt in animation style", 246 | seed=1003, 247 | script_name="X/Y/Z Plot", 248 | script_args=[ 249 | XYZPlotAvailableTxt2ImgScripts.index(XAxisType), 250 | XAxisValues, 251 | XAxisValuesDropdown, 252 | XYZPlotAvailableTxt2ImgScripts.index(YAxisType), 253 | YAxisValues, 254 | YAxisValuesDropdown, 255 | XYZPlotAvailableTxt2ImgScripts.index(ZAxisType), 256 | ZAxisValues, 257 | ZAxisValuesDropdown, 258 | drawLegend, 259 | includeLoneImages, 260 | includeSubGrids, 261 | noFixedSeeds, 262 | marginSize, ] 263 | ) 264 | 265 | result.image 266 | ``` 267 | ![txt2img_grid_xyz](https://user-images.githubusercontent.com/1288793/222345625-dc2e4090-6786-4a53-8619-700dc2f12412.jpg) 268 | 269 | 270 | ### Configuration APIs 271 | ``` 272 | # return map of current options 273 | options = api.get_options() 274 | 275 | # change sd model 276 | options = {} 277 | options['sd_model_checkpoint'] = 'model.ckpt [7460a6fa]' 278 | api.set_options(options) 279 | 280 | # when calling set_options, do not pass all options returned by get_options(). 281 | # it makes webui unusable (2022/11/21). 282 | 283 | # get available sd models 284 | api.get_sd_models() 285 | 286 | # misc get apis 287 | api.get_samplers() 288 | api.get_cmd_flags() 289 | api.get_hypernetworks() 290 | api.get_face_restorers() 291 | api.get_realesrgan_models() 292 | api.get_prompt_styles() 293 | api.get_artist_categories() # deprecated ? 294 | api.get_artists() # deprecated ? 295 | api.get_progress() 296 | api.get_embeddings() 297 | api.get_cmd_flags() 298 | api.get_scripts() 299 | api.get_schedulers() 300 | api.get_memory() 301 | 302 | # misc apis 303 | api.interrupt() 304 | api.skip() 305 | ``` 306 | 307 | ### Utility methods 308 | ``` 309 | # save current model name 310 | old_model = api.util_get_current_model() 311 | 312 | # get list of available models 313 | models = api.util_get_model_names() 314 | 315 | # get list of available samplers 316 | api.util_get_sampler_names() 317 | 318 | # get list of available schedulers 319 | api.util_get_scheduler_names() 320 | 321 | # refresh list of models 322 | api.refresh_checkpoints() 323 | 324 | # set model (use exact name) 325 | api.util_set_model(models[0]) 326 | 327 | # set model (find closest match) 328 | api.util_set_model('robodiffusion') 329 | 330 | # wait for job complete 331 | api.util_wait_for_ready() 332 | 333 | ``` 334 | 335 | ### LORA and alwayson_scripts example 336 | 337 | ``` 338 | r = api.txt2img(prompt='photo of a cute girl with green hair shuimobysim __juice__', 339 | seed=1000, 340 | save_images=True, 341 | alwayson_scripts={"Simple wildcards":[]} # wildcards extension doesn't accept more parameters. 342 | ) 343 | r.image 344 | ``` 345 | 346 | ### Extension support - Model-Keyword 347 | ``` 348 | # https://github.com/mix1009/model-keyword 349 | mki = webuiapi.ModelKeywordInterface(api) 350 | mki.get_keywords() 351 | ``` 352 | ModelKeywordResult(keywords=['nousr robot'], model='robo-diffusion-v1.ckpt', oldhash='41fef4bd', match_source='model-keyword.txt') 353 | 354 | 355 | ### Extension support - Instruct-Pix2Pix 356 | ``` 357 | # Instruct-Pix2Pix extension is now deprecated and is now part of webui. 358 | # You can use normal img2img with image_cfg_scale when instruct-pix2pix model is loaded. 359 | r = api.img2img(prompt='sunset', images=[pil_img], cfg_scale=7.5, image_cfg_scale=1.5) 360 | r.image 361 | ``` 362 | 363 | ### Extension support - ControlNet 364 | ``` 365 | # https://github.com/Mikubill/sd-webui-controlnet 366 | 367 | api.controlnet_model_list() 368 | ``` 369 |
370 | ['control_v11e_sd15_ip2p [c4bb465c]',
371 |  'control_v11e_sd15_shuffle [526bfdae]',
372 |  'control_v11f1p_sd15_depth [cfd03158]',
373 |  'control_v11p_sd15_canny [d14c016b]',
374 |  'control_v11p_sd15_inpaint [ebff9138]',
375 |  'control_v11p_sd15_lineart [43d4be0d]',
376 |  'control_v11p_sd15_mlsd [aca30ff0]',
377 |  'control_v11p_sd15_normalbae [316696f1]',
378 |  'control_v11p_sd15_openpose [cab727d4]',
379 |  'control_v11p_sd15_scribble [d4ba51ff]',
380 |  'control_v11p_sd15_seg [e1f51eb9]',
381 |  'control_v11p_sd15_softedge [a8575a2a]',
382 |  'control_v11p_sd15s2_lineart_anime [3825e83e]',
383 |  'control_v11u_sd15_tile [1f041471]']
384 |  
385 | 386 | ``` 387 | api.controlnet_version() 388 | api.controlnet_module_list() 389 | ``` 390 | 391 | ``` 392 | # normal txt2img 393 | r = api.txt2img(prompt="photo of a beautiful girl with blonde hair", height=512, seed=100) 394 | img = r.image 395 | img 396 | ``` 397 | ![cn1](https://user-images.githubusercontent.com/1288793/222315754-43c6dc8c-2a62-4a31-b51a-f68523118e0d.png) 398 | 399 | ``` 400 | # txt2img with ControlNet 401 | # input_image parameter is changed to image (change in ControlNet API) 402 | unit1 = webuiapi.ControlNetUnit(image=img, module='canny', model='control_v11p_sd15_canny [d14c016b]') 403 | 404 | r = api.txt2img(prompt="photo of a beautiful girl", controlnet_units=[unit1]) 405 | r.image 406 | ``` 407 | 408 | ![cn2](https://user-images.githubusercontent.com/1288793/222315791-c6c480eb-2987-4044-b673-5f2cb6135f87.png) 409 | 410 | 411 | ``` 412 | # img2img with multiple ControlNets 413 | unit1 = webuiapi.ControlNetUnit(image=img, module='canny', model='control_v11p_sd15_canny [d14c016b]') 414 | unit2 = webuiapi.ControlNetUnit(image=img, module='depth', model='control_v11f1p_sd15_depth [cfd03158]', weight=0.5) 415 | 416 | r2 = api.img2img(prompt="girl", 417 | images=[img], 418 | width=512, 419 | height=512, 420 | controlnet_units=[unit1, unit2], 421 | sampler_name="Euler a", 422 | cfg_scale=7, 423 | ) 424 | r2.image 425 | ``` 426 | ![cn3](https://user-images.githubusercontent.com/1288793/222315816-1155b0c2-570d-4455-a68e-294fc7061b0a.png) 427 | 428 | ``` 429 | r2.images[1] 430 | ``` 431 | ![cn4](https://user-images.githubusercontent.com/1288793/222315836-9a26afec-c407-426b-9a08-b2cef2a32ab1.png) 432 | 433 | ``` 434 | r2.images[2] 435 | ``` 436 | ![cn5](https://user-images.githubusercontent.com/1288793/222315859-e6b6286e-854d-40c1-a516-5a08c827c49a.png) 437 | 438 | 439 | ``` 440 | r = api.controlnet_detect(images=[img], module='canny') 441 | r.image 442 | ``` 443 | 444 | 445 | ### Extension support - AnimateDiff 446 | 447 | ``` 448 | # https://github.com/continue-revolution/sd-webui-animatediff 449 | adiff = webuiapi.AnimateDiff(model='mm_sd15_v3.safetensors', 450 | video_length=24, 451 | closed_loop='R+P', 452 | format=['GIF']) 453 | 454 | r = api.txt2img(prompt='cute puppy', animatediff=adiff) 455 | 456 | # save GIF file. need save_all=True to save animated GIF. 457 | r.image.save('puppy.gif', save_all=True) 458 | 459 | # Display animated GIF in Jupyter notebook 460 | from IPython.display import HTML 461 | HTML(''.format(r.json['images'][0])) 462 | ``` 463 | 464 | ### Extension support - RemBG (contributed by webcoderz) 465 | ``` 466 | # https://github.com/AUTOMATIC1111/stable-diffusion-webui-rembg 467 | rembg = webuiapi.RemBGInterface(api) 468 | r = rembg.rembg(input_image=img, model='u2net', return_mask=False) 469 | r.image 470 | ``` 471 | 472 | 473 | ### Extension support - SegmentAnything (contributed by TimNekk) 474 | ```python 475 | # https://github.com/continue-revolution/sd-webui-segment-anything 476 | 477 | segment = webuiapi.SegmentAnythingInterface(api) 478 | 479 | # Perform a segmentation prediction using the SAM model using points 480 | sam_result = segment.sam_predict( 481 | image=img, 482 | sam_positive_points=[(0.5, 0.25), (0.75, 0.75)], 483 | # add other parameters as needed 484 | ) 485 | 486 | # Perform a segmentation prediction using the SAM model using GroundingDINO 487 | sam_result2 = segment.sam_predict( 488 | image=img, 489 | dino_enabled=True, 490 | dino_text_prompt="A text prompt for GroundingDINO", 491 | # add other parameters as needed 492 | ) 493 | 494 | # Example of dilating a mask 495 | dilation_result = segment.dilate_mask( 496 | image=img, 497 | mask=sam_result.masks[0], # using the first mask from the SAM prediction 498 | dilate_amount=30 499 | ) 500 | 501 | # Example of generating semantic segmentation with category IDs 502 | semantic_seg_result = segment.sam_and_semantic_seg_with_cat_id( 503 | image=img, 504 | category="1+2+3", # Category IDs separated by '+' 505 | # add other parameters as needed 506 | ) 507 | ``` 508 | 509 | ### Extension support - Tagger (contributed by C-BP) 510 | 511 | ```python 512 | # https://github.com/Akegarasu/sd-webui-wd14-tagger 513 | 514 | tagger = webuiapi.TaggerInterface(api) 515 | result = tagger.tagger_interrogate(image) 516 | print(result) 517 | # {"caption": {"additionalProp1":0.9,"additionalProp2": 0.8,"additionalProp3": 0.7}} 518 | ``` 519 | ### Extension support - ADetailer (contributed by tomj2ee and davidmartinrius) 520 | #### txt2img with ADetailer 521 | ``` 522 | # https://github.com/Bing-su/adetailer 523 | 524 | import webuiapi 525 | 526 | api = webuiapi.WebUIApi() 527 | 528 | ads = webuiapi.ADetailer(ad_model="face_yolov8n.pt") 529 | 530 | result1 = api.txt2img(prompt="cute squirrel", 531 | negative_prompt="ugly, out of frame", 532 | seed=-1, 533 | styles=["anime"], 534 | cfg_scale=7, 535 | adetailer=[ads], 536 | steps=30, 537 | enable_hr=True, 538 | denoising_strength=0.5 539 | ) 540 | 541 | 542 | 543 | img = result1.image 544 | img 545 | 546 | # OR 547 | 548 | file_path = "output_image.png" 549 | result1.image.save(file_path) 550 | ``` 551 | 552 | #### img2img with ADetailer 553 | 554 | ``` 555 | import webuiapi 556 | from PIL import Image 557 | 558 | img = Image.open("/path/to/your/image.jpg") 559 | 560 | ads = webuiapi.ADetailer(ad_model="face_yolov8n.pt") 561 | 562 | api = webuiapi.WebUIApi() 563 | 564 | result1 = api.img2img( 565 | images=[img], 566 | prompt="a cute squirrel", 567 | steps=25, 568 | seed=-1, 569 | cfg_scale=7, 570 | denoising_strength=0.5, 571 | resize_mode=2, 572 | width=512, 573 | height=512, 574 | adetailer=[ads], 575 | ) 576 | 577 | file_path = "img2img_output_image.png" 578 | result1.image.save(file_path) 579 | ``` 580 | ### Support for interrogate with "deepdanbooru / deepbooru" (contributed by davidmartinrius) 581 | 582 | ``` 583 | import webuiapi 584 | from PIL import Image 585 | 586 | api = webuiapi.WebUIApi() 587 | 588 | img = Image.open("/path/to/your/image.jpg") 589 | 590 | interrogate_result = api.interrogate(image=img, model="deepdanbooru") 591 | # also you can use clip. clip is set by default 592 | #interrogate_result = api.interrogate(image=img, model="clip") 593 | #interrogate_result = api.interrogate(image=img) 594 | 595 | prompt = interrogate_result.info 596 | prompt 597 | 598 | # OR 599 | print(prompt) 600 | ``` 601 | 602 | ### Support for ReActor, for face swapping (contributed by davidmartinrius) 603 | 604 | ``` 605 | import webuiapi 606 | from PIL import Image 607 | 608 | img = Image.open("/path/to/your/image.jpg") 609 | 610 | api = webuiapi.WebUIApi() 611 | 612 | your_desired_face = Image.open("/path/to/your/desired/face.jpeg") 613 | 614 | reactor = webuiapi.ReActor( 615 | img=your_desired_face, 616 | enable=True 617 | ) 618 | 619 | result1 = api.img2img( 620 | images=[img], 621 | prompt="a cute squirrel", 622 | steps=25, 623 | seed=-1, 624 | cfg_scale=7, 625 | denoising_strength=0.5, 626 | resize_mode=2, 627 | width=512, 628 | height=512, 629 | reactor=reactor 630 | ) 631 | 632 | file_path = "face_swapped_image.png" 633 | result1.image.save(file_path) 634 | ``` 635 | 636 | 637 | ### Support for Self Attention Guidance (contributed by yano) 638 | 639 | https://github.com/ashen-sensored/sd_webui_SAG 640 | 641 | ``` 642 | import webuiapi 643 | from PIL import Image 644 | 645 | img = Image.open("/path/to/your/image.jpg") 646 | 647 | api = webuiapi.WebUIApi() 648 | 649 | your_desired_face = Image.open("/path/to/your/desired/face.jpeg") 650 | 651 | sag = webuiapi.Sag( 652 | enable=True, 653 | scale=0.75, 654 | mask_threshold=1.00 655 | ) 656 | 657 | result1 = api.img2img( 658 | images=[img], 659 | prompt="a cute squirrel", 660 | steps=25, 661 | seed=-1, 662 | cfg_scale=7, 663 | denoising_strength=0.5, 664 | resize_mode=2, 665 | width=512, 666 | height=512, 667 | sag=sag 668 | ) 669 | 670 | file_path = "face_swapped_image.png" 671 | result1.image.save(file_path) 672 | ``` 673 | 674 | ### Prompt generator API by [David Martin Rius](https://github.com/davidmartinrius/): 675 | 676 | 677 | This is an unofficial implementation to use the api of promptgen. 678 | Before installing the extension you have to check if you already have an extension called Promptgen. If so, you need to uninstall it. 679 | Once uninstalled you can install it in two ways: 680 | 681 | #### 1. From the user interface 682 | ![image](https://github.com/davidmartinrius/sdwebuiapi/assets/16558194/d879719f-bb9f-44a7-aef7-b893d117bbea) 683 | 684 | #### 2. From the command line 685 | 686 | cd stable-diffusion-webui/extensions 687 | 688 | git clone -b api-implementation https://github.com/davidmartinrius/stable-diffusion-webui-promptgen.git 689 | 690 | Once installed: 691 | ``` 692 | api = webuiapi.WebUIApi() 693 | 694 | result = api.list_prompt_gen_models() 695 | print("list of models") 696 | print(result) 697 | # you will get something like this: 698 | #['AUTOMATIC/promptgen-lexart', 'AUTOMATIC/promptgen-majinai-safe', 'AUTOMATIC/promptgen-majinai-unsafe'] 699 | 700 | text = "a box" 701 | 702 | To create a prompt from a text: 703 | # by default model_name is "AUTOMATIC/promptgen-lexart" 704 | result = api.prompt_gen(text=text) 705 | 706 | # Using a different model 707 | result = api.prompt_gen(text=text, model_name="AUTOMATIC/promptgen-majinai-unsafe") 708 | 709 | #Complete usage 710 | result = api.prompt_gen( 711 | text=text, 712 | model_name="AUTOMATIC/promptgen-majinai-unsafe", 713 | batch_count= 1, 714 | batch_size=10, 715 | min_length=20, 716 | max_length=150, 717 | num_beams=1, 718 | temperature=1, 719 | repetition_penalty=1, 720 | length_preference=1, 721 | sampling_mode="Top K", 722 | top_k=12, 723 | top_p=0.15 724 | ) 725 | 726 | # result is a list of prompts. You can iterate the list or just get the first result like this: result[0] 727 | 728 | ``` 729 | 730 | ### TIPS for using Flux [David Martin Rius](https://github.com/davidmartinrius/): 731 | 732 | In both cases, it is needed cfg_scale = 1, sampler_name = "Euler", scheduler = "Simple" and in txt2img enable_hr=False 733 | 734 | ## For txt2img 735 | ``` 736 | import webuiapi 737 | 738 | result1 = api.txt2img(prompt="cute squirrel", 739 | negative_prompt="ugly, out of frame", 740 | seed=-1, 741 | styles=["anime"], 742 | cfg_scale=1, 743 | steps=20, 744 | enable_hr=False, 745 | denoising_strength=0.5, 746 | sampler_name= "Euler", 747 | scheduler= "Simple" 748 | ) 749 | 750 | 751 | 752 | img = result1.image 753 | img 754 | 755 | # OR 756 | 757 | file_path = "output_image.png" 758 | result1.image.save(file_path) 759 | 760 | ``` 761 | 762 | ## For img2img 763 | 764 | ``` 765 | import webuiapi 766 | from PIL import Image 767 | 768 | img = Image.open("/path/to/your/image.jpg") 769 | 770 | api = webuiapi.WebUIApi() 771 | 772 | result1 = api.img2img( 773 | images=[img], 774 | prompt="a cute squirrel", 775 | steps=20, 776 | seed=-1, 777 | cfg_scale=1, 778 | denoising_strength=0.5, 779 | resize_mode=2, 780 | width=512, 781 | height=512, 782 | sampler_name= "Euler", 783 | scheduler= "Simple" 784 | ) 785 | 786 | file_path = "face_swapped_image.png" 787 | result1.image.save(file_path) 788 | 789 | ``` 790 | 791 | 792 | -------------------------------------------------------------------------------- /publish.sh: -------------------------------------------------------------------------------- 1 | rm -rf dist 2 | rm -rf build 3 | rm -rf webuiapi.egg-info 4 | python3 setup.py sdist bdist_wheel 5 | twine upload dist/* 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ See: 2 | https://github.com/mix1009/sdwebuiapi 3 | """ 4 | 5 | # Always prefer setuptools over distutils 6 | from setuptools import setup, find_packages 7 | import pathlib 8 | 9 | here = pathlib.Path(__file__).parent.resolve() 10 | long_description = (here / "README.md").read_text(encoding="utf-8") 11 | 12 | setup( 13 | name="webuiapi", 14 | version="0.9.17", 15 | description="Python API client for AUTOMATIC1111/stable-diffusion-webui", 16 | url="https://github.com/mix1009/sdwebuiapi", 17 | author="ChunKoo Park", 18 | author_email="mix100f9@gmail.com", 19 | keywords="stable-diffuion-webui, AUTOMATIC1111, stable-diffusion, api", 20 | packages=["webuiapi"], 21 | #packages=find_packages(), 22 | python_requires=">=3.7, <4", 23 | install_requires=['requests', 24 | 'Pillow',], 25 | long_description=long_description, 26 | long_description_content_type='text/markdown', 27 | license="MIT", 28 | ) 29 | -------------------------------------------------------------------------------- /webuiapi/__init__.py: -------------------------------------------------------------------------------- 1 | from .webuiapi import ( 2 | WebUIApi, 3 | WebUIApiResult, 4 | Upscaler, 5 | HiResUpscaler, 6 | b64_img, 7 | raw_b64_img, 8 | ModelKeywordResult, 9 | ModelKeywordInterface, 10 | InstructPix2PixInterface, 11 | ControlNetInterface, 12 | ControlNetUnit, 13 | RemBGInterface, 14 | ADetailer, 15 | Roop, 16 | ReActor, 17 | ADetailer, 18 | AnimateDiff, 19 | Roop, 20 | Sag, 21 | SegmentAnythingInterface, 22 | SegmentAnythingSamResult, 23 | SegmentAnythingDilationResult, 24 | SegmentAnythingGinoResult, 25 | SegmentAnythingControlNetSegRandomResult, 26 | SegmentAnythingControlNetSegNotRandomResult, 27 | SegmentAnythingSemanticSegWithCatIdResult, 28 | TaggerInterface 29 | ) 30 | 31 | __version__ = "0.9.17" 32 | 33 | __all__ = [ 34 | "__version__", 35 | "WebUIApi", 36 | "WebUIApiResult", 37 | "Upscaler", 38 | "HiResUpscaler", 39 | "b64_img", 40 | "ModelKeywordResult", 41 | "ModelKeywordInterface", 42 | "InstructPix2PixInterface", 43 | "ControlNetInterface", 44 | "ControlNetUnit", 45 | "RemBGInterface", 46 | "ADetailer", 47 | "Roop", 48 | "ReActor", 49 | "Sag", 50 | "SegmentAnythingInterface", 51 | "SegmentAnythingSamResult", 52 | "SegmentAnythingDilationResult", 53 | "SegmentAnythingGinoResult", 54 | "SegmentAnythingControlNetSegRandomResult", 55 | "SegmentAnythingControlNetSegNotRandomResult", 56 | "SegmentAnythingSemanticSegWithCatIdResult", 57 | "TaggerInterface" 58 | ] 59 | -------------------------------------------------------------------------------- /webuiapi/webuiapi.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import PIL 4 | import requests 5 | import io 6 | import base64 7 | from PIL import Image, PngImagePlugin 8 | from dataclasses import dataclass 9 | from enum import Enum 10 | from typing import List, Dict, Any, Optional, Union ,Literal 11 | 12 | 13 | class Upscaler(str, Enum): 14 | none = "None" 15 | Lanczos = "Lanczos" 16 | Nearest = "Nearest" 17 | LDSR = "LDSR" 18 | BSRGAN = "BSRGAN" 19 | ESRGAN_4x = "R-ESRGAN 4x+" 20 | R_ESRGAN_General_4xV3 = "R-ESRGAN General 4xV3" 21 | ScuNET_GAN = "ScuNET GAN" 22 | ScuNET_PSNR = "ScuNET PSNR" 23 | SwinIR_4x = "SwinIR 4x" 24 | 25 | 26 | class HiResUpscaler(str, Enum): 27 | none = "None" 28 | Latent = "Latent" 29 | LatentAntialiased = "Latent (antialiased)" 30 | LatentBicubic = "Latent (bicubic)" 31 | LatentBicubicAntialiased = "Latent (bicubic antialiased)" 32 | LatentNearest = "Latent (nearest)" 33 | LatentNearestExact = "Latent (nearest-exact)" 34 | Lanczos = "Lanczos" 35 | Nearest = "Nearest" 36 | ESRGAN_4x = "R-ESRGAN 4x+" 37 | LDSR = "LDSR" 38 | ScuNET_GAN = "ScuNET GAN" 39 | ScuNET_PSNR = "ScuNET PSNR" 40 | SwinIR_4x = "SwinIR 4x" 41 | 42 | 43 | @dataclass 44 | class WebUIApiResult: 45 | images: list 46 | parameters: dict 47 | info: dict 48 | json: dict 49 | 50 | @property 51 | def image(self): 52 | return self.images[0] 53 | 54 | 55 | class ControlNetUnit: 56 | def __init__( 57 | self, 58 | image: Image = None, 59 | mask: Image = None, 60 | module: str = "none", 61 | model: str = "None", 62 | weight: float = 1.0, 63 | resize_mode: str = "Resize and Fill", 64 | low_vram: bool = False, 65 | processor_res: int = 512, 66 | threshold_a: float = 64, 67 | threshold_b: float = 64, 68 | guidance_start: float = 0.0, 69 | guidance_end: float = 1.0, 70 | control_mode: int = 0, 71 | pixel_perfect: bool = False, 72 | guessmode: int = None, # deprecated: use control_mode 73 | hr_option: str = "Both", # Both, Low res only, High res only 74 | enabled: bool = True, 75 | ): 76 | self.image = image 77 | self.mask = mask 78 | self.module = module 79 | self.model = model 80 | self.weight = weight 81 | self.resize_mode = resize_mode 82 | self.low_vram = low_vram 83 | self.processor_res = processor_res 84 | self.threshold_a = threshold_a 85 | self.threshold_b = threshold_b 86 | self.guidance_start = guidance_start 87 | self.guidance_end = guidance_end 88 | self.enabled = enabled 89 | if guessmode: 90 | print( 91 | "ControlNetUnit guessmode is deprecated. Please use control_mode instead." 92 | ) 93 | control_mode = guessmode 94 | 95 | if control_mode == 0: 96 | self.control_mode = 'Balanced' 97 | elif control_mode == 1: 98 | self.control_mode = 'My prompt is more important' 99 | elif control_mode == 2: 100 | self.control_mode = 'ControlNet is more important' 101 | else: 102 | self.control_mode = control_mode 103 | 104 | self.pixel_perfect = pixel_perfect 105 | self.hr_option = hr_option 106 | 107 | def to_dict(self): 108 | return { 109 | "image": raw_b64_img(self.image) if self.image else "", 110 | "mask": raw_b64_img(self.mask) if self.mask is not None else None, 111 | "module": self.module, 112 | "model": self.model, 113 | "weight": self.weight, 114 | "resize_mode": self.resize_mode, 115 | "low_vram": self.low_vram, 116 | "processor_res": self.processor_res, 117 | "threshold_a": self.threshold_a, 118 | "threshold_b": self.threshold_b, 119 | "guidance_start": self.guidance_start, 120 | "guidance_end": self.guidance_end, 121 | "control_mode": self.control_mode, 122 | "pixel_perfect": self.pixel_perfect, 123 | "hr_option": self.hr_option, 124 | "enabled": self.enabled, 125 | } 126 | 127 | class ADetailer: 128 | def __init__(self, 129 | ad_model: str = "None", 130 | ad_model_classes: str = "", 131 | ad_tab_enable: bool = True, 132 | ad_prompt: str = "", 133 | ad_negative_prompt: str = "", 134 | ad_confidence: float = 0.3, 135 | ad_mask_k_largest: float = 0.0, 136 | ad_mask_min_ratio: float = 0.0, 137 | ad_mask_max_ratio: float = 1.0, 138 | ad_dilate_erode: int = 4, 139 | ad_x_offset: int = 0, 140 | ad_y_offset: int = 0, 141 | ad_mask_merge_invert: Literal["None", "Merge", "Merge and Invert"] = "None", 142 | ad_mask_blur: int = 4, 143 | ad_denoising_strength: int = 0.4, 144 | ad_inpaint_only_masked: bool = True, 145 | ad_inpaint_only_masked_padding: int = 32, 146 | ad_use_inpaint_width_height: bool = False, 147 | ad_inpaint_width: int = 512, 148 | ad_inpaint_height: int = 512, 149 | ad_use_steps: bool = False, 150 | ad_steps: int = 28, 151 | ad_use_cfg_scale: bool = False, 152 | ad_cfg_scale: float = 7.0, 153 | ad_use_checkpoint: bool = False, 154 | ad_checkpoint: str = None, 155 | ad_use_vae: bool = False, 156 | ad_vae: str = None, 157 | ad_use_sampler: bool = False, 158 | ad_sampler: str = "DPM++ 2M Karras", 159 | ad_scheduler: str = "Use same scheduler", 160 | ad_use_noise_multiplier: bool = False, 161 | ad_noise_multiplier=1.0, 162 | ad_use_clip_skip: bool = False, 163 | ad_clip_skip: int= 1, 164 | ad_restore_face: bool = False, 165 | ad_controlnet_model: str = "None", 166 | ad_controlnet_module: str = "None", 167 | ad_controlnet_weight: float = 1.0, 168 | ad_controlnet_guidance_start: float = 0.0, 169 | ad_controlnet_guidance_end: float = 1.0, 170 | ): 171 | self.ad_model = ad_model 172 | self.ad_model_classes = ad_model_classes 173 | self.ad_tab_enable = ad_tab_enable 174 | self.ad_prompt = ad_prompt 175 | self.ad_negative_prompt = ad_negative_prompt 176 | self.ad_confidence = ad_confidence 177 | self.ad_mask_k_largest = ad_mask_k_largest 178 | self.ad_mask_min_ratio = ad_mask_min_ratio 179 | self.ad_mask_max_ratio = ad_mask_max_ratio 180 | self.ad_dilate_erode = ad_dilate_erode 181 | self.ad_x_offset = ad_x_offset 182 | self.ad_y_offset = ad_y_offset 183 | self.ad_mask_merge_invert = ad_mask_merge_invert 184 | self.ad_mask_blur = ad_mask_blur 185 | self.ad_denoising_strength = ad_denoising_strength 186 | self.ad_inpaint_only_masked = ad_inpaint_only_masked 187 | self.ad_inpaint_only_masked_padding = ad_inpaint_only_masked_padding 188 | self.ad_use_inpaint_width_height = ad_use_inpaint_width_height 189 | self.ad_inpaint_width = ad_inpaint_width 190 | self.ad_inpaint_height = ad_inpaint_height 191 | self.ad_use_steps = ad_use_steps 192 | self.ad_steps = ad_steps 193 | self.ad_use_cfg_scale = ad_use_cfg_scale 194 | self.ad_cfg_scale = ad_cfg_scale 195 | self.ad_use_checkpoint = ad_use_checkpoint 196 | self.ad_checkpoint = ad_checkpoint 197 | self.ad_use_vae = ad_use_vae 198 | self.ad_vae = ad_vae 199 | self.ad_use_sampler = ad_use_sampler 200 | self.ad_sampler = ad_sampler 201 | self.ad_scheduler = ad_scheduler 202 | self.ad_use_noise_multiplier = ad_use_noise_multiplier 203 | self.ad_noise_multiplier = ad_noise_multiplier 204 | self.ad_use_clip_skip = ad_use_clip_skip 205 | self.ad_clip_skip = ad_clip_skip 206 | self.ad_restore_face = ad_restore_face 207 | self.ad_controlnet_model = ad_controlnet_model 208 | self.ad_controlnet_module = ad_controlnet_module 209 | self.ad_controlnet_weight = ad_controlnet_weight 210 | self.ad_controlnet_guidance_start = ad_controlnet_guidance_start 211 | self.ad_controlnet_guidance_end = ad_controlnet_guidance_end 212 | 213 | 214 | def to_dict(self): 215 | return { 216 | "ad_model": self.ad_model, 217 | "ad_model_classes": self.ad_model_classes, 218 | "ad_tab_enable" : self.ad_tab_enable, 219 | "ad_prompt": self.ad_prompt, 220 | "ad_negative_prompt": self.ad_negative_prompt, 221 | "ad_confidence": self.ad_confidence, 222 | "ad_mask_k_largest": self.ad_mask_k_largest, 223 | "ad_mask_min_ratio": self.ad_mask_min_ratio, 224 | "ad_mask_max_ratio": self.ad_mask_max_ratio, 225 | "ad_dilate_erode": self.ad_dilate_erode, 226 | "ad_x_offset": self.ad_x_offset, 227 | "ad_y_offset": self.ad_y_offset, 228 | "ad_mask_merge_invert": self.ad_mask_merge_invert, 229 | "ad_mask_blur": self.ad_mask_blur, 230 | "ad_denoising_strength": self.ad_denoising_strength, 231 | "ad_inpaint_only_masked": self.ad_inpaint_only_masked, 232 | "ad_inpaint_only_masked_padding": self.ad_inpaint_only_masked_padding, 233 | "ad_use_inpaint_width_height": self.ad_use_inpaint_width_height, 234 | "ad_inpaint_width": self.ad_inpaint_width, 235 | "ad_inpaint_height": self.ad_inpaint_height, 236 | "ad_use_steps": self.ad_use_steps, 237 | "ad_steps": self.ad_steps, 238 | "ad_use_cfg_scale": self.ad_use_cfg_scale, 239 | "ad_cfg_scale": self.ad_cfg_scale, 240 | "ad_use_checkpoint": self.ad_use_checkpoint, 241 | "ad_checkpoint": self.ad_checkpoint, 242 | "ad_use_vae": self.ad_use_vae, 243 | "ad_vae": self.ad_vae, 244 | "ad_use_sampler": self.ad_use_sampler, 245 | "ad_sampler": self.ad_sampler, 246 | "ad_scheduler": self.ad_scheduler, 247 | "ad_use_noise_multiplier": self.ad_use_noise_multiplier, 248 | "ad_noise_multiplier": self.ad_noise_multiplier, 249 | "ad_use_clip_skip": self.ad_use_clip_skip, 250 | "ad_clip_skip": self.ad_clip_skip, 251 | "ad_restore_face": self.ad_restore_face, 252 | "ad_controlnet_model": self.ad_controlnet_model, 253 | "ad_controlnet_module": self.ad_controlnet_module, 254 | "ad_controlnet_weight": self.ad_controlnet_weight, 255 | "ad_controlnet_guidance_start": self.ad_controlnet_guidance_start, 256 | "ad_controlnet_guidance_end": self.ad_controlnet_guidance_end, 257 | } 258 | 259 | class AnimateDiff: 260 | def __init__(self, 261 | model="mm_sd15_v3.safetensors", 262 | enable=True, 263 | video_length=0, 264 | fps=8, 265 | loop_number=0, # Display loop number 266 | closed_loop='R-P', # Closed loop, 'N' | 'R-P' | 'R+P' | 'A' 267 | batch_size=16, 268 | stride=1, 269 | overlap=-1, 270 | format=['GIF'], # 'GIF' | 'MP4' | 'PNG' | 'WEBP' | 'WEBM' | 'TXT' | 'Frame' 271 | interp='Off', # Frame interpolation, 'Off' | 'FILM' 272 | interp_x=10, # Interp X 273 | video_source=None, 274 | video_path='', 275 | mask_path='', 276 | freeinit_enable=False, 277 | freeinit_filter="butterworth", 278 | freeinit_ds=0.25, 279 | freeinit_dt=0.25, 280 | freeinit_iters=3, 281 | latent_power=1, 282 | latent_scale=32, 283 | last_frame=None, 284 | latent_power_last=1, 285 | latent_scale_last=32, 286 | request_id = '', 287 | ): 288 | self.model = model 289 | self.enable = enable 290 | self.video_length = video_length 291 | self.fps = fps 292 | self.loop_number = loop_number 293 | self.closed_loop = closed_loop 294 | self.batch_size = batch_size 295 | self.stride = stride 296 | self.overlap = overlap 297 | self.format = format 298 | self.interp = interp 299 | self.interp_x = interp_x 300 | self.video_source = video_source 301 | self.video_path = video_path 302 | self.mask_path = mask_path 303 | self.freeinit_enable = freeinit_enable 304 | self.freeinit_filter = freeinit_filter 305 | self.freeinit_ds = freeinit_ds 306 | self.freeinit_dt = freeinit_dt 307 | self.freeinit_iters = freeinit_iters 308 | self.latent_power = latent_power 309 | self.latent_scale = latent_scale 310 | self.last_frame = last_frame 311 | self.latent_power_last = latent_power_last 312 | self.latent_scale_last = latent_scale_last 313 | self.request_id = request_id 314 | 315 | 316 | 317 | def to_dict(self, is_img2img=False): 318 | infotext = { 319 | "model": self.model, 320 | "enable": self.enable, 321 | "video_length": self.video_length, 322 | "format": self.format, 323 | "fps": self.fps, 324 | "loop_number": self.loop_number, 325 | "closed_loop": self.closed_loop, 326 | "batch_size": self.batch_size, 327 | "stride": self.stride, 328 | "overlap": self.overlap, 329 | "interp": self.interp, 330 | "interp_x": self.interp_x, 331 | "freeinit_enable": self.freeinit_enable, 332 | "freeinit_filter": self.freeinit_filter, 333 | "freeinit_ds": self.freeinit_ds, 334 | "freeinit_dt": self.freeinit_dt, 335 | "freeinit_iters": self.freeinit_iters, 336 | } 337 | if self.request_id: 338 | infotext['request_id'] = self.request_id 339 | if self.last_frame: 340 | infotext['last_frame'] = self.last_frame 341 | if len(self.video_path) > 0: 342 | infotext['video_path'] = self.video_path 343 | if len(self.mask_path) > 0: 344 | infotext['mask_path'] = self.mask_path 345 | 346 | if is_img2img: 347 | infotext.update({ 348 | "latent_power": self.latent_power, 349 | "latent_scale": self.latent_scale, 350 | "latent_power_last": self.latent_power_last, 351 | "latent_scale_last": self.latent_scale_last, 352 | }) 353 | 354 | return infotext 355 | 356 | class Roop: 357 | def __init__(self, img: PIL.Image , 358 | enable: bool = True, 359 | faces_index: str = "0", 360 | model: str = None, 361 | face_restorer_name: str = "GFPGAN", 362 | face_restorer_visibility: float = 1, 363 | upscaler_name: str = "R-ESRGAN 4x+", 364 | upscaler_scale: float = 1, 365 | upscaler_visibility: float = 1, 366 | swap_in_source: bool = False, 367 | swap_in_generated: bool = True): 368 | self.img = b64_img(img) 369 | self.enable = enable 370 | self.faces_index = faces_index 371 | self.model = model 372 | self.face_restorer_name = face_restorer_name 373 | self.face_restorer_visibility = face_restorer_visibility 374 | self.upscaler_name = upscaler_name 375 | self.upscaler_scale = upscaler_scale 376 | self.upscaler_visibility = upscaler_visibility 377 | self.swap_in_source = swap_in_source 378 | self.swap_in_generated = swap_in_generated 379 | 380 | def to_dict(self): 381 | return [ 382 | self.img, 383 | self.enable, 384 | self.faces_index, 385 | self.model, 386 | self.face_restorer_name, 387 | self.face_restorer_visibility, 388 | self.upscaler_name, 389 | self.upscaler_scale, 390 | self.upscaler_visibility, 391 | self.swap_in_source, 392 | self.swap_in_generated] 393 | 394 | class ReActor: 395 | def __init__(self, 396 | img: PIL.Image, #0 397 | enable: bool = True, #1 Enable ReActor 398 | source_faces_index: str = "0", #2 Comma separated face number(s) from swap-source image 399 | faces_index: str = "0", #3 Comma separated face number(s) for target image (result) 400 | model: str = 'inswapper_128.onnx', # None, #4 model path 401 | face_restorer_name: str = "CodeFormer", #4 Restore Face: None; CodeFormer; GFPGAN 402 | face_restorer_visibility: float = 1, #5 Restore visibility value 403 | restore_first: bool = True, #7 Restore face -> Upscale 404 | upscaler_name: str = "R-ESRGAN 4x+",# None, # "R-ESRGAN 4x+", #8 Upscaler (type 'None' if doesn't need), see full list here: http://127.0.0.1:7860/sdapi/v1/script-info -> reactor -> sec.8 405 | upscaler_scale: int = 2,#9 Upscaler scale value 406 | upscaler_visibility: float = 1, 407 | swap_in_source: bool = False, 408 | swap_in_generated: bool = True, 409 | console_logging_level: int = 1, #13 Console Log Level (0 - min, 1 - med or 2 - max) 410 | gender_source: int = 0, #14 Gender Detection (Source) (0 - No, 1 - Female Only, 2 - Male Only) 411 | gender_target: int = 0, #14 Gender Detection (Target) (0 - No, 1 - Female Only, 2 - Male Only) 412 | save_original: bool = False, 413 | codeFormer_weight: float = 0.5, 414 | source_hash_check: bool = True, 415 | target_hash_check: bool = False, 416 | device: str = "CUDA", #or CPU 417 | mask_face: bool = True, 418 | select_source: int = 0, #IMPORTANT. MUST BE 0 or faceswap won't work 419 | face_model: str = None, 420 | ): 421 | 422 | self.img = b64_img(img) 423 | self.enable = enable 424 | self.source_faces_index = source_faces_index 425 | self.faces_index = faces_index 426 | self.model = model 427 | self.face_restorer_name = face_restorer_name 428 | self.face_restorer_visibility = face_restorer_visibility 429 | self.restore_first = restore_first 430 | self.upscaler_name = upscaler_name 431 | self.upscaler_scale = upscaler_scale 432 | self.upscaler_visibility = upscaler_visibility 433 | self.swap_in_source = swap_in_source 434 | self.swap_in_generated = swap_in_generated 435 | self.console_logging_level = console_logging_level 436 | self.gender_source = gender_source 437 | self.gender_target = gender_target 438 | self.save_original = save_original 439 | self.codeFormer_weight = codeFormer_weight 440 | self.source_hash_check = source_hash_check 441 | self.target_hash_check = target_hash_check 442 | self.device = device 443 | self.mask_face = mask_face 444 | self.select_source = select_source 445 | self.face_model = face_model 446 | 447 | def to_dict(self): 448 | 449 | return [ 450 | self.img, 451 | self.enable, 452 | self.source_faces_index, 453 | self.faces_index, 454 | self.model, 455 | self.face_restorer_name, 456 | self.face_restorer_visibility, 457 | self.restore_first, 458 | self.upscaler_name, 459 | self.upscaler_scale, 460 | self.upscaler_visibility, 461 | self.swap_in_source, 462 | self.swap_in_generated, 463 | self.console_logging_level, 464 | self.gender_source, 465 | self.gender_target, 466 | self.save_original, 467 | self.codeFormer_weight, 468 | self.source_hash_check, 469 | self.target_hash_check, 470 | self.device, 471 | self.mask_face, 472 | self.select_source, 473 | self.face_model, 474 | ] 475 | 476 | class Sag: 477 | def __init__(self, 478 | enable: bool = True, #1 Enable Sag 479 | scale: float = 0.75, 480 | mask_threshold: float = 1.00 481 | ): 482 | self.enable = enable 483 | self.scale = scale 484 | self.mask_threshold = mask_threshold 485 | 486 | def to_dict(self): 487 | 488 | return [ 489 | self.enable, 490 | self.scale, 491 | self.mask_threshold, 492 | ] 493 | 494 | 495 | 496 | def b64_img(image: Image) -> str: 497 | return "data:image/png;base64," + raw_b64_img(image) 498 | 499 | 500 | def raw_b64_img(image: Image) -> str: 501 | # XXX controlnet only accepts RAW base64 without headers 502 | with io.BytesIO() as output_bytes: 503 | metadata = None 504 | for key, value in image.info.items(): 505 | if isinstance(key, str) and isinstance(value, str): 506 | if metadata is None: 507 | metadata = PngImagePlugin.PngInfo() 508 | metadata.add_text(key, value) 509 | image.save(output_bytes, format="PNG", pnginfo=metadata) 510 | 511 | bytes_data = output_bytes.getvalue() 512 | 513 | return str(base64.b64encode(bytes_data), "utf-8") 514 | 515 | 516 | class WebUIApi: 517 | has_controlnet = False 518 | has_adetailer = False 519 | has_animatediff = False 520 | 521 | def __init__( 522 | self, 523 | host="127.0.0.1", 524 | port=7860, 525 | baseurl=None, 526 | sampler="Euler a", 527 | scheduler="automatic", 528 | steps=20, 529 | use_https=False, 530 | username=None, 531 | password=None, 532 | ): 533 | if baseurl is None: 534 | if use_https: 535 | baseurl = f"https://{host}:{port}/sdapi/v1" 536 | else: 537 | baseurl = f"http://{host}:{port}/sdapi/v1" 538 | 539 | self.baseurl = baseurl 540 | self.default_sampler = sampler 541 | self.default_scheduler = scheduler 542 | self.default_steps = steps 543 | 544 | self.session = requests.Session() 545 | 546 | if username and password: 547 | self.set_auth(username, password) 548 | else: 549 | self.check_extensions() 550 | 551 | 552 | def check_extensions(self): 553 | try: 554 | scripts = self.get_scripts() 555 | self.has_controlnet = "controlnet m2m" in scripts["txt2img"] 556 | self.has_adetailer = "adetailer" in scripts["txt2img"] 557 | self.has_animatediff = "animatediff" in scripts["txt2img"] 558 | 559 | except: 560 | pass 561 | 562 | 563 | def set_auth(self, username, password): 564 | self.session.auth = (username, password) 565 | self.check_extensions() 566 | 567 | def _to_api_result(self, response): 568 | if response.status_code != 200: 569 | raise RuntimeError(response.status_code, response.text) 570 | 571 | r = response.json() 572 | images = [] 573 | if "images" in r.keys(): 574 | images = [Image.open(io.BytesIO(base64.b64decode(i))) for i in r["images"]] 575 | elif "image" in r.keys(): 576 | images = [Image.open(io.BytesIO(base64.b64decode(r["image"])))] 577 | 578 | info = "" 579 | if "info" in r.keys(): 580 | try: 581 | info = json.loads(r["info"]) 582 | except: 583 | info = r["info"] 584 | elif "html_info" in r.keys(): 585 | info = r["html_info"] 586 | elif "caption" in r.keys(): 587 | info = r["caption"] 588 | 589 | parameters = "" 590 | if "parameters" in r.keys(): 591 | parameters = r["parameters"] 592 | 593 | return WebUIApiResult(images, parameters, info, r) 594 | 595 | async def _to_api_result_async(self, response): 596 | if response.status != 200: 597 | raise RuntimeError(response.status, await response.text()) 598 | 599 | r = await response.json() 600 | images = [] 601 | if "images" in r.keys(): 602 | images = [Image.open(io.BytesIO(base64.b64decode(i))) for i in r["images"]] 603 | elif "image" in r.keys(): 604 | images = [Image.open(io.BytesIO(base64.b64decode(r["image"])))] 605 | 606 | info = "" 607 | if "info" in r.keys(): 608 | try: 609 | info = json.loads(r["info"]) 610 | except: 611 | info = r["info"] 612 | elif "html_info" in r.keys(): 613 | info = r["html_info"] 614 | elif "caption" in r.keys(): 615 | info = r["caption"] 616 | 617 | parameters = "" 618 | if "parameters" in r.keys(): 619 | parameters = r["parameters"] 620 | 621 | return WebUIApiResult(images, parameters, info, r) 622 | 623 | def txt2img( 624 | self, 625 | enable_hr=False, 626 | denoising_strength=0.7, 627 | firstphase_width=0, 628 | firstphase_height=0, 629 | hr_scale=2, 630 | hr_upscaler=HiResUpscaler.Latent, 631 | hr_second_pass_steps=0, 632 | hr_resize_x=0, 633 | hr_resize_y=0, 634 | hr_checkpoint_name=None, 635 | hr_sampler_name=None, 636 | hr_scheduler=None, 637 | hr_prompt="", 638 | hr_negative_prompt="", 639 | prompt="", 640 | styles=[], 641 | seed=-1, 642 | subseed=-1, 643 | subseed_strength=0.0, 644 | seed_resize_from_h=0, 645 | seed_resize_from_w=0, 646 | sampler_name=None, # use this instead of sampler_index 647 | scheduler=None, 648 | batch_size=1, 649 | n_iter=1, 650 | steps=None, 651 | cfg_scale=7.0, 652 | width=512, 653 | height=512, 654 | restore_faces=False, 655 | tiling=False, 656 | do_not_save_samples=False, 657 | do_not_save_grid=False, 658 | negative_prompt="", 659 | eta=1.0, 660 | s_churn=0, 661 | s_tmax=0, 662 | s_tmin=0, 663 | s_noise=1, 664 | override_settings={}, 665 | override_settings_restore_afterwards=True, 666 | script_args=None, # List of arguments for the script "script_name" 667 | script_name=None, 668 | send_images=True, 669 | save_images=False, 670 | alwayson_scripts={}, 671 | controlnet_units: List[ControlNetUnit] = [], 672 | adetailer: List[ADetailer] = [], 673 | animatediff: AnimateDiff = None, 674 | roop: Roop = None, 675 | reactor: ReActor = None, 676 | sag: Sag = None, 677 | sampler_index=None, # deprecated: use sampler_name 678 | use_deprecated_controlnet=False, 679 | use_async=False, 680 | ): 681 | if sampler_index is None: 682 | sampler_index = self.default_sampler 683 | if sampler_name is None: 684 | sampler_name = self.default_sampler 685 | 686 | if scheduler is None: 687 | scheduler = self.default_scheduler 688 | 689 | if steps is None: 690 | steps = self.default_steps 691 | if script_args is None: 692 | script_args = [] 693 | payload = { 694 | "enable_hr": enable_hr, 695 | "hr_scale": hr_scale, 696 | "hr_upscaler": hr_upscaler, 697 | "hr_second_pass_steps": hr_second_pass_steps, 698 | "hr_resize_x": hr_resize_x, 699 | "hr_resize_y": hr_resize_y, 700 | "hr_checkpoint_name": hr_checkpoint_name, 701 | "hr_sampler_name": hr_sampler_name, 702 | "hr_scheduler": hr_scheduler, 703 | "hr_prompt": hr_prompt, 704 | "hr_negative_prompt": hr_negative_prompt, 705 | "denoising_strength": denoising_strength, 706 | "firstphase_width": firstphase_width, 707 | "firstphase_height": firstphase_height, 708 | "prompt": prompt, 709 | "styles": styles, 710 | "seed": seed, 711 | "subseed": subseed, 712 | "subseed_strength": subseed_strength, 713 | "seed_resize_from_h": seed_resize_from_h, 714 | "seed_resize_from_w": seed_resize_from_w, 715 | "batch_size": batch_size, 716 | "n_iter": n_iter, 717 | "steps": steps, 718 | "cfg_scale": cfg_scale, 719 | "width": width, 720 | "height": height, 721 | "restore_faces": restore_faces, 722 | "tiling": tiling, 723 | "do_not_save_samples": do_not_save_samples, 724 | "do_not_save_grid": do_not_save_grid, 725 | "negative_prompt": negative_prompt, 726 | "eta": eta, 727 | "s_churn": s_churn, 728 | "s_tmax": s_tmax, 729 | "s_tmin": s_tmin, 730 | "s_noise": s_noise, 731 | "override_settings": override_settings, 732 | "override_settings_restore_afterwards": override_settings_restore_afterwards, 733 | "sampler_name": sampler_name, 734 | "scheduler": scheduler, 735 | "sampler_index": sampler_index, 736 | "script_name": script_name, 737 | "script_args": script_args, 738 | "send_images": send_images, 739 | "save_images": save_images, 740 | "alwayson_scripts": alwayson_scripts, 741 | } 742 | 743 | if use_deprecated_controlnet and controlnet_units and len(controlnet_units) > 0: 744 | payload["controlnet_units"] = [x.to_dict() for x in controlnet_units] 745 | return self.custom_post( 746 | "controlnet/txt2img", payload=payload, use_async=use_async 747 | ) 748 | 749 | if adetailer and len(adetailer) > 0: 750 | ads = [True] 751 | for x in adetailer: 752 | ads.append(x.to_dict()) 753 | payload["alwayson_scripts"]["ADetailer"] = { 754 | "args": ads 755 | } 756 | elif self.has_adetailer: 757 | payload["alwayson_scripts"]["ADetailer"] = { 758 | "args": [False] 759 | } 760 | 761 | if animatediff: 762 | payload["alwayson_scripts"]["animatediff"] = { 763 | "args": [animatediff.to_dict(False)] 764 | } 765 | elif self.has_animatediff: 766 | payload["alwayson_scripts"]["animatediff"] = { 767 | "args": [False], 768 | } 769 | 770 | if roop : 771 | payload["alwayson_scripts"]["roop"] = { 772 | "args": roop.to_dict() 773 | } 774 | 775 | if reactor : 776 | payload["alwayson_scripts"]["reactor"] = { 777 | "args": reactor.to_dict() 778 | } 779 | 780 | if sag : 781 | payload["alwayson_scripts"]["Self Attention Guidance"] = { 782 | "args": sag.to_dict() 783 | } 784 | 785 | 786 | if controlnet_units and len(controlnet_units) > 0: 787 | payload["alwayson_scripts"]["ControlNet"] = { 788 | "args": [x.to_dict() for x in controlnet_units] 789 | } 790 | elif self.has_controlnet: 791 | # workaround : if not passed, webui will use previous args! 792 | payload["alwayson_scripts"]["ControlNet"] = {"args": []} 793 | 794 | return self.post_and_get_api_result( 795 | f"{self.baseurl}/txt2img", payload, use_async 796 | ) 797 | 798 | def post_and_get_api_result(self, url, json, use_async): 799 | if use_async: 800 | import asyncio 801 | 802 | return asyncio.ensure_future(self.async_post(url=url, json=json)) 803 | else: 804 | response = self.session.post(url=url, json=json) 805 | return self._to_api_result(response) 806 | 807 | async def async_post(self, url, json): 808 | import aiohttp 809 | 810 | async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout()) as session: 811 | infinite_timeout = aiohttp.ClientTimeout(total=None) 812 | auth = aiohttp.BasicAuth(self.session.auth[0], self.session.auth[1]) if self.session.auth else None 813 | async with session.post(url, json=json, auth=auth, timeout=infinite_timeout) as response: # infinite_timeout timeout here for timeout fix 814 | return await self._to_api_result_async(response) 815 | 816 | def img2img( 817 | self, 818 | images=[], # list of PIL Image 819 | resize_mode=0, 820 | denoising_strength=0.75, 821 | image_cfg_scale=1.5, 822 | mask_image=None, # PIL Image mask 823 | mask_blur=4, 824 | inpainting_fill=0, 825 | inpaint_full_res=True, 826 | inpaint_full_res_padding=0, 827 | inpainting_mask_invert=0, 828 | initial_noise_multiplier=1, 829 | prompt="", 830 | styles=[], 831 | seed=-1, 832 | subseed=-1, 833 | subseed_strength=0, 834 | seed_resize_from_h=0, 835 | seed_resize_from_w=0, 836 | sampler_name=None, # use this instead of sampler_index 837 | scheduler=None, 838 | batch_size=1, 839 | n_iter=1, 840 | steps=None, 841 | cfg_scale=7.0, 842 | width=512, 843 | height=512, 844 | restore_faces=False, 845 | tiling=False, 846 | do_not_save_samples=False, 847 | do_not_save_grid=False, 848 | negative_prompt="", 849 | eta=1.0, 850 | s_churn=0, 851 | s_tmax=0, 852 | s_tmin=0, 853 | s_noise=1, 854 | override_settings={}, 855 | override_settings_restore_afterwards=True, 856 | script_args=None, # List of arguments for the script "script_name" 857 | sampler_index=None, # deprecated: use sampler_name 858 | include_init_images=False, 859 | script_name=None, 860 | send_images=True, 861 | save_images=False, 862 | alwayson_scripts={}, 863 | controlnet_units: List[ControlNetUnit] = [], 864 | adetailer: List[ADetailer] = [], 865 | animatediff: AnimateDiff = None, 866 | roop: Roop = None, 867 | reactor: ReActor = None, 868 | sag: Sag = None, 869 | use_deprecated_controlnet=False, 870 | use_async=False, 871 | ): 872 | if sampler_name is None: 873 | sampler_name = self.default_sampler 874 | if sampler_index is None: 875 | sampler_index = self.default_sampler 876 | if scheduler is None: 877 | scheduler = self.default_scheduler 878 | if steps is None: 879 | steps = self.default_steps 880 | if script_args is None: 881 | script_args = [] 882 | 883 | payload = { 884 | "init_images": [b64_img(x) for x in images], 885 | "resize_mode": resize_mode, 886 | "denoising_strength": denoising_strength, 887 | "mask_blur": mask_blur, 888 | "inpainting_fill": inpainting_fill, 889 | "inpaint_full_res": inpaint_full_res, 890 | "inpaint_full_res_padding": inpaint_full_res_padding, 891 | "inpainting_mask_invert": inpainting_mask_invert, 892 | "initial_noise_multiplier": initial_noise_multiplier, 893 | "prompt": prompt, 894 | "styles": styles, 895 | "seed": seed, 896 | "subseed": subseed, 897 | "subseed_strength": subseed_strength, 898 | "seed_resize_from_h": seed_resize_from_h, 899 | "seed_resize_from_w": seed_resize_from_w, 900 | "batch_size": batch_size, 901 | "n_iter": n_iter, 902 | "steps": steps, 903 | "cfg_scale": cfg_scale, 904 | "image_cfg_scale": image_cfg_scale, 905 | "width": width, 906 | "height": height, 907 | "restore_faces": restore_faces, 908 | "tiling": tiling, 909 | "do_not_save_samples": do_not_save_samples, 910 | "do_not_save_grid": do_not_save_grid, 911 | "negative_prompt": negative_prompt, 912 | "eta": eta, 913 | "s_churn": s_churn, 914 | "s_tmax": s_tmax, 915 | "s_tmin": s_tmin, 916 | "s_noise": s_noise, 917 | "override_settings": override_settings, 918 | "override_settings_restore_afterwards": override_settings_restore_afterwards, 919 | "sampler_name": sampler_name, 920 | "scheduler": scheduler, 921 | "sampler_index": sampler_index, 922 | "include_init_images": include_init_images, 923 | "script_name": script_name, 924 | "script_args": script_args, 925 | "send_images": send_images, 926 | "save_images": save_images, 927 | "alwayson_scripts": alwayson_scripts, 928 | } 929 | if mask_image is not None: 930 | payload["mask"] = b64_img(mask_image) 931 | 932 | if use_deprecated_controlnet and controlnet_units and len(controlnet_units) > 0: 933 | payload["controlnet_units"] = [x.to_dict() for x in controlnet_units] 934 | return self.custom_post( 935 | "controlnet/img2img", payload=payload, use_async=use_async 936 | ) 937 | 938 | if adetailer and len(adetailer) > 0: 939 | ads = [True] 940 | for x in adetailer: 941 | ads.append(x.to_dict()) 942 | payload["alwayson_scripts"]["ADetailer"] = { 943 | "args": ads 944 | } 945 | elif self.has_adetailer: 946 | payload["alwayson_scripts"]["ADetailer"] = { 947 | "args": [False] 948 | } 949 | 950 | if animatediff: 951 | payload["alwayson_scripts"]["animatediff"] = { 952 | "args": [animatediff.to_dict(True)] 953 | } 954 | elif self.has_animatediff: 955 | payload["alwayson_scripts"]["animatediff"] = { 956 | "args": [False], 957 | } 958 | 959 | if roop : 960 | payload["alwayson_scripts"]["roop"] = { 961 | "args": roop.to_dict() 962 | } 963 | 964 | if reactor: 965 | payload["alwayson_scripts"]["reactor"] = { 966 | "args": reactor.to_dict() 967 | } 968 | 969 | if sag: 970 | payload["alwayson_scripts"]["Self Attention Guidance"] = { 971 | "args": sag.to_dict() 972 | } 973 | 974 | if controlnet_units and len(controlnet_units) > 0: 975 | payload["alwayson_scripts"]["ControlNet"] = { 976 | "args": [x.to_dict() for x in controlnet_units] 977 | } 978 | elif self.has_controlnet: 979 | payload["alwayson_scripts"]["ControlNet"] = {"args": []} 980 | 981 | return self.post_and_get_api_result( 982 | f"{self.baseurl}/img2img", payload, use_async 983 | ) 984 | 985 | def extra_single_image( 986 | self, 987 | image, # PIL Image 988 | resize_mode=0, 989 | show_extras_results=True, 990 | gfpgan_visibility=0, 991 | codeformer_visibility=0, 992 | codeformer_weight=0, 993 | upscaling_resize=2, 994 | upscaling_resize_w=512, 995 | upscaling_resize_h=512, 996 | upscaling_crop=True, 997 | upscaler_1="None", 998 | upscaler_2="None", 999 | extras_upscaler_2_visibility=0, 1000 | upscale_first=False, 1001 | use_async=False, 1002 | ): 1003 | payload = { 1004 | "resize_mode": resize_mode, 1005 | "show_extras_results": show_extras_results, 1006 | "gfpgan_visibility": gfpgan_visibility, 1007 | "codeformer_visibility": codeformer_visibility, 1008 | "codeformer_weight": codeformer_weight, 1009 | "upscaling_resize": upscaling_resize, 1010 | "upscaling_resize_w": upscaling_resize_w, 1011 | "upscaling_resize_h": upscaling_resize_h, 1012 | "upscaling_crop": upscaling_crop, 1013 | "upscaler_1": upscaler_1, 1014 | "upscaler_2": upscaler_2, 1015 | "extras_upscaler_2_visibility": extras_upscaler_2_visibility, 1016 | "upscale_first": upscale_first, 1017 | "image": b64_img(image), 1018 | } 1019 | 1020 | return self.post_and_get_api_result( 1021 | f"{self.baseurl}/extra-single-image", payload, use_async 1022 | ) 1023 | 1024 | def extra_batch_images( 1025 | self, 1026 | images, # list of PIL images 1027 | name_list=None, # list of image names 1028 | resize_mode=0, 1029 | show_extras_results=True, 1030 | gfpgan_visibility=0, 1031 | codeformer_visibility=0, 1032 | codeformer_weight=0, 1033 | upscaling_resize=2, 1034 | upscaling_resize_w=512, 1035 | upscaling_resize_h=512, 1036 | upscaling_crop=True, 1037 | upscaler_1="None", 1038 | upscaler_2="None", 1039 | extras_upscaler_2_visibility=0, 1040 | upscale_first=False, 1041 | use_async=False, 1042 | ): 1043 | if name_list is not None: 1044 | if len(name_list) != len(images): 1045 | raise RuntimeError("len(images) != len(name_list)") 1046 | else: 1047 | name_list = [f"image{i + 1:05}" for i in range(len(images))] 1048 | images = [b64_img(x) for x in images] 1049 | 1050 | image_list = [] 1051 | for name, image in zip(name_list, images): 1052 | image_list.append({"data": image, "name": name}) 1053 | 1054 | payload = { 1055 | "resize_mode": resize_mode, 1056 | "show_extras_results": show_extras_results, 1057 | "gfpgan_visibility": gfpgan_visibility, 1058 | "codeformer_visibility": codeformer_visibility, 1059 | "codeformer_weight": codeformer_weight, 1060 | "upscaling_resize": upscaling_resize, 1061 | "upscaling_resize_w": upscaling_resize_w, 1062 | "upscaling_resize_h": upscaling_resize_h, 1063 | "upscaling_crop": upscaling_crop, 1064 | "upscaler_1": upscaler_1, 1065 | "upscaler_2": upscaler_2, 1066 | "extras_upscaler_2_visibility": extras_upscaler_2_visibility, 1067 | "upscale_first": upscale_first, 1068 | "imageList": image_list, 1069 | } 1070 | 1071 | return self.post_and_get_api_result( 1072 | f"{self.baseurl}/extra-batch-images", payload, use_async 1073 | ) 1074 | 1075 | # XXX 500 error (2022/12/26) 1076 | def png_info(self, image): 1077 | payload = { 1078 | "image": b64_img(image), 1079 | } 1080 | 1081 | response = self.session.post(url=f"{self.baseurl}/png-info", json=payload) 1082 | return self._to_api_result(response) 1083 | 1084 | """ 1085 | :param image pass base64 encoded image or PIL Image 1086 | :param model "clip" or "deepdanbooru" 1087 | """ 1088 | def interrogate(self, image, model="clip"): 1089 | payload = { 1090 | "image": b64_img(image) if isinstance(image, Image.Image) else image, 1091 | "model": model, 1092 | } 1093 | 1094 | response = self.session.post(url=f"{self.baseurl}/interrogate", json=payload) 1095 | return self._to_api_result(response) 1096 | 1097 | def list_prompt_gen_models(self): 1098 | r = self.custom_get("promptgen/list_models") 1099 | return r['available_models'] 1100 | 1101 | def prompt_gen(self, 1102 | model_name: str = "AUTOMATIC/promptgen-lexart", 1103 | batch_count: int = 1, 1104 | batch_size: int = 10, 1105 | text: str = "", 1106 | min_length: int = 20, 1107 | max_length: int = 150, 1108 | num_beams: int = 1, 1109 | temperature: float = 1, 1110 | repetition_penalty: float = 1, 1111 | length_preference: float = 1, 1112 | sampling_mode: str = "Top K", 1113 | top_k: float = 12, 1114 | top_p: float = 0.15, 1115 | ): 1116 | payload = { 1117 | "model_name": model_name, 1118 | "batch_count": batch_count, 1119 | "batch_size": batch_size, 1120 | "text": text, 1121 | "min_length": min_length, 1122 | "max_length": max_length, 1123 | "num_beams": num_beams, 1124 | "temperature": temperature, 1125 | "repetition_penalty": repetition_penalty, 1126 | "length_preference": length_preference, 1127 | "sampling_mode": sampling_mode, 1128 | "top_k": top_k, 1129 | "top_p": top_p 1130 | } 1131 | 1132 | r = self.custom_post("promptgen/generate", payload=payload) 1133 | return r.json['prompts'] 1134 | 1135 | def interrupt(self): 1136 | response = self.session.post(url=f"{self.baseurl}/interrupt") 1137 | return response.json() 1138 | 1139 | def skip(self): 1140 | response = self.session.post(url=f"{self.baseurl}/skip") 1141 | return response.json() 1142 | 1143 | def get_options(self): 1144 | response = self.session.get(url=f"{self.baseurl}/options") 1145 | return response.json() 1146 | 1147 | def set_options(self, options): 1148 | response = self.session.post(url=f"{self.baseurl}/options", json=options) 1149 | return response.json() 1150 | 1151 | def get_cmd_flags(self): 1152 | response = self.session.get(url=f"{self.baseurl}/cmd-flags") 1153 | return response.json() 1154 | 1155 | def get_progress(self): 1156 | response = self.session.get(url=f"{self.baseurl}/progress") 1157 | return response.json() 1158 | 1159 | def get_cmd_flags(self): 1160 | response = self.session.get(url=f"{self.baseurl}/cmd-flags") 1161 | return response.json() 1162 | 1163 | def get_samplers(self): 1164 | response = self.session.get(url=f"{self.baseurl}/samplers") 1165 | return response.json() 1166 | 1167 | def get_sd_vae(self): 1168 | response = self.session.get(url=f"{self.baseurl}/sd-vae") 1169 | return response.json() 1170 | 1171 | def get_upscalers(self): 1172 | response = self.session.get(url=f"{self.baseurl}/upscalers") 1173 | return response.json() 1174 | 1175 | def get_latent_upscale_modes(self): 1176 | response = self.session.get(url=f"{self.baseurl}/latent-upscale-modes") 1177 | return response.json() 1178 | 1179 | def get_loras(self): 1180 | response = self.session.get(url=f"{self.baseurl}/loras") 1181 | return response.json() 1182 | 1183 | def get_sd_models(self): 1184 | response = self.session.get(url=f"{self.baseurl}/sd-models") 1185 | return response.json() 1186 | 1187 | def get_hypernetworks(self): 1188 | response = self.session.get(url=f"{self.baseurl}/hypernetworks") 1189 | return response.json() 1190 | 1191 | def get_face_restorers(self): 1192 | response = self.session.get(url=f"{self.baseurl}/face-restorers") 1193 | return response.json() 1194 | 1195 | def get_realesrgan_models(self): 1196 | response = self.session.get(url=f"{self.baseurl}/realesrgan-models") 1197 | return response.json() 1198 | 1199 | def get_prompt_styles(self): 1200 | response = self.session.get(url=f"{self.baseurl}/prompt-styles") 1201 | return response.json() 1202 | 1203 | def get_artist_categories(self): # deprecated ? 1204 | response = self.session.get(url=f"{self.baseurl}/artist-categories") 1205 | return response.json() 1206 | 1207 | def get_artists(self): # deprecated ? 1208 | response = self.session.get(url=f"{self.baseurl}/artists") 1209 | return response.json() 1210 | 1211 | def refresh_checkpoints(self): 1212 | response = self.session.post(url=f"{self.baseurl}/refresh-checkpoints") 1213 | return response.json() 1214 | 1215 | def get_scripts(self): 1216 | response = self.session.get(url=f"{self.baseurl}/scripts") 1217 | return response.json() 1218 | 1219 | def get_embeddings(self): 1220 | response = self.session.get(url=f"{self.baseurl}/embeddings") 1221 | return response.json() 1222 | 1223 | def get_memory(self): 1224 | response = self.session.get(url=f"{self.baseurl}/memory") 1225 | return response.json() 1226 | 1227 | def get_schedulers(self): 1228 | response = self.session.get(url=f"{self.baseurl}/schedulers") 1229 | return response.json() 1230 | 1231 | def get_endpoint(self, endpoint, baseurl): 1232 | if baseurl: 1233 | return f"{self.baseurl}/{endpoint}" 1234 | else: 1235 | from urllib.parse import urlparse, urlunparse 1236 | 1237 | parsed_url = urlparse(self.baseurl) 1238 | basehost = parsed_url.netloc 1239 | parsed_url2 = (parsed_url[0], basehost, endpoint, "", "", "") 1240 | return urlunparse(parsed_url2) 1241 | 1242 | def custom_get(self, endpoint, baseurl=False): 1243 | url = self.get_endpoint(endpoint, baseurl) 1244 | response = self.session.get(url=url) 1245 | return response.json() 1246 | 1247 | def custom_post(self, endpoint, payload={}, baseurl=False, use_async=False): 1248 | url = self.get_endpoint(endpoint, baseurl) 1249 | if use_async: 1250 | import asyncio 1251 | 1252 | return asyncio.ensure_future(self.async_post(url=url, json=payload)) 1253 | else: 1254 | response = self.session.post(url=url, json=payload) 1255 | return self._to_api_result(response) 1256 | 1257 | def controlnet_version(self): 1258 | r = self.custom_get("controlnet/version") 1259 | return r["version"] 1260 | 1261 | def controlnet_model_list(self): 1262 | r = self.custom_get("controlnet/model_list") 1263 | return r["model_list"] 1264 | 1265 | def controlnet_module_list(self): 1266 | r = self.custom_get("controlnet/module_list") 1267 | return r["module_list"] 1268 | 1269 | def controlnet_detect( 1270 | self, images, module="none", processor_res=512, threshold_a=64, threshold_b=64 1271 | ): 1272 | images = [b64_img(x) for x in images] 1273 | payload = { 1274 | "controlnet_module": module, 1275 | "controlnet_images": images, 1276 | "controlnet_processor_res": processor_res, 1277 | "controlnet_threshold_a": threshold_a, 1278 | "controlnet_threshold_b": threshold_b, 1279 | } 1280 | r = self.custom_post("controlnet/detect", payload=payload) 1281 | return r 1282 | 1283 | def util_get_model_names(self): 1284 | return sorted([x["title"] for x in self.get_sd_models()]) 1285 | 1286 | def util_get_sampler_names(self): 1287 | return sorted([s['name'] for s in self.get_samplers()]) 1288 | 1289 | def util_get_scheduler_names(self): 1290 | return sorted([s['name'] for s in self.get_schedulers()]) 1291 | 1292 | def util_set_model(self, name, find_closest=True): 1293 | if find_closest: 1294 | name = name.lower() 1295 | models = self.util_get_model_names() 1296 | found_model = None 1297 | if name in models: 1298 | found_model = name 1299 | elif find_closest: 1300 | import difflib 1301 | 1302 | def str_simularity(a, b): 1303 | return difflib.SequenceMatcher(None, a, b).ratio() 1304 | 1305 | max_sim = 0.0 1306 | max_model = models[0] 1307 | for model in models: 1308 | sim = str_simularity(name, model) 1309 | if sim >= max_sim: 1310 | max_sim = sim 1311 | max_model = model 1312 | found_model = max_model 1313 | if found_model: 1314 | print(f"loading {found_model}") 1315 | options = {} 1316 | options["sd_model_checkpoint"] = found_model 1317 | self.set_options(options) 1318 | print(f"model changed to {found_model}") 1319 | else: 1320 | print("model not found") 1321 | 1322 | def util_get_current_model(self): 1323 | options = self.get_options() 1324 | if ("sd_model_checkpoint" in options): 1325 | return options["sd_model_checkpoint"] 1326 | else: 1327 | sd_models = self.get_sd_models() 1328 | sd_model = [model for model in sd_models if model["sha256"] == options["sd_checkpoint_hash"]] 1329 | return sd_model[0]["title"] 1330 | 1331 | def util_wait_for_ready(self, check_interval=5.0): 1332 | import time 1333 | 1334 | while True: 1335 | result = self.get_progress() 1336 | progress = result["progress"] 1337 | job_count = result["state"]["job_count"] 1338 | if progress == 0.0 and job_count == 0: 1339 | break 1340 | else: 1341 | print(f"[WAIT]: progress = {progress:.4f}, job_count = {job_count}") 1342 | time.sleep(check_interval) 1343 | 1344 | 1345 | ## Interface for extensions 1346 | 1347 | 1348 | # https://github.com/mix1009/model-keyword 1349 | @dataclass 1350 | class ModelKeywordResult: 1351 | keywords: list 1352 | model: str 1353 | oldhash: str 1354 | match_source: str 1355 | 1356 | 1357 | class ModelKeywordInterface: 1358 | def __init__(self, webuiapi): 1359 | self.api = webuiapi 1360 | 1361 | def get_keywords(self): 1362 | result = self.api.custom_get("model_keyword/get_keywords") 1363 | keywords = result["keywords"] 1364 | model = result["model"] 1365 | oldhash = result["hash"] 1366 | match_source = result["match_source"] 1367 | return ModelKeywordResult(keywords, model, oldhash, match_source) 1368 | 1369 | 1370 | 1371 | 1372 | 1373 | # https://github.com/Klace/stable-diffusion-webui-instruct-pix2pix 1374 | class InstructPix2PixInterface: 1375 | def __init__(self, webuiapi): 1376 | self.api = webuiapi 1377 | 1378 | def img2img( 1379 | self, 1380 | images=[], 1381 | prompt: str = "", 1382 | negative_prompt: str = "", 1383 | output_batches: int = 1, 1384 | sampler: str = "Euler a", 1385 | steps: int = 20, 1386 | seed: int = 0, 1387 | randomize_seed: bool = True, 1388 | text_cfg: float = 7.5, 1389 | image_cfg: float = 1.5, 1390 | randomize_cfg: bool = False, 1391 | output_image_width: int = 512, 1392 | ): 1393 | init_images = [b64_img(x) for x in images] 1394 | payload = { 1395 | "init_images": init_images, 1396 | "prompt": prompt, 1397 | "negative_prompt": negative_prompt, 1398 | "output_batches": output_batches, 1399 | "sampler": sampler, 1400 | "steps": steps, 1401 | "seed": seed, 1402 | "randomize_seed": randomize_seed, 1403 | "text_cfg": text_cfg, 1404 | "image_cfg": image_cfg, 1405 | "randomize_cfg": randomize_cfg, 1406 | "output_image_width": output_image_width, 1407 | } 1408 | return self.api.custom_post("instruct-pix2pix/img2img", payload=payload) 1409 | 1410 | 1411 | #https://github.com/AUTOMATIC1111/stable-diffusion-webui-rembg 1412 | class RemBGInterface: 1413 | def __init__(self, webuiapi): 1414 | self.api = webuiapi 1415 | 1416 | def rembg( 1417 | self, 1418 | input_image: str = "", #image string (?) 1419 | model: str = 'u2net', #[None, 'u2net', 'u2netp', 'u2net_human_seg', 'u2net_cloth_seg','silueta','isnet-general-use','isnet-anime'] 1420 | return_mask: bool = False, 1421 | alpha_matting: bool = False, 1422 | alpha_matting_foreground_threshold: int = 240, 1423 | alpha_matting_background_threshold: int = 10, 1424 | alpha_matting_erode_size: int = 10 1425 | ): 1426 | 1427 | payload = { 1428 | "input_image": b64_img(input_image), 1429 | "model": model, 1430 | "return_mask": return_mask, 1431 | "alpha_matting": alpha_matting, 1432 | "alpha_matting_foreground_threshold": alpha_matting_foreground_threshold, 1433 | "alpha_matting_background_threshold": alpha_matting_background_threshold, 1434 | "alpha_matting_erode_size": alpha_matting_erode_size 1435 | } 1436 | return self.api.custom_post("rembg", payload=payload) 1437 | 1438 | 1439 | # https://github.com/Mikubill/sd-webui-controlnet 1440 | class ControlNetInterface: 1441 | def __init__(self, webuiapi, show_deprecation_warning=True): 1442 | self.api = webuiapi 1443 | self.show_deprecation_warning = show_deprecation_warning 1444 | 1445 | def print_deprecation_warning(self): 1446 | print( 1447 | "ControlNetInterface txt2img/img2img is deprecated. Please use normal txt2img/img2img with controlnet_units param" 1448 | ) 1449 | 1450 | def txt2img( 1451 | self, 1452 | prompt: str = "", 1453 | negative_prompt: str = "", 1454 | controlnet_image: [] = [], 1455 | controlnet_mask: [] = [], 1456 | controlnet_module: str = "", 1457 | controlnet_model: str = "", 1458 | controlnet_weight: float = 0.5, 1459 | controlnet_resize_mode: str = "Scale to Fit (Inner Fit)", 1460 | controlnet_low_vram: bool = False, 1461 | controlnet_processor_res: int = 512, 1462 | controlnet_threshold_a: int = 64, 1463 | controlnet_threshold_b: int = 64, 1464 | controlnet_guidance: float = 1.0, 1465 | enable_hr: bool = False, # hiresfix 1466 | denoising_strength: float = 0.5, 1467 | hr_scale: float = 1.5, 1468 | hr_upscale: str = "Latent", 1469 | guess_mode: bool = True, 1470 | seed: int = -1, 1471 | subseed: int = -1, 1472 | subseed_strength: int = -1, 1473 | sampler_index: str = "Euler a", 1474 | batch_size: int = 1, 1475 | n_iter: int = 1, # Iteration 1476 | steps: int = 20, 1477 | cfg_scale: float = 7, 1478 | width: int = 512, 1479 | height: int = 512, 1480 | restore_faces: bool = False, 1481 | override_settings: Dict[str, Any] = None, 1482 | override_settings_restore_afterwards: bool = True, 1483 | ): 1484 | if self.show_deprecation_warning: 1485 | self.print_deprecation_warning() 1486 | 1487 | controlnet_image_b64 = [raw_b64_img(x) for x in controlnet_image] 1488 | controlnet_mask_b64 = [raw_b64_img(x) for x in controlnet_mask] 1489 | 1490 | payload = { 1491 | "prompt": prompt, 1492 | "negative_prompt": negative_prompt, 1493 | "controlnet_image": controlnet_image_b64, 1494 | "controlnet_mask": controlnet_mask_b64, 1495 | "controlnet_module": controlnet_module, 1496 | "controlnet_model": controlnet_model, 1497 | "controlnet_weight": controlnet_weight, 1498 | "controlnet_resize_mode": controlnet_resize_mode, 1499 | "controlnet_low_vram": controlnet_low_vram, 1500 | "controlnet_processor_res": controlnet_processor_res, 1501 | "controlnet_threshold_a": controlnet_threshold_a, 1502 | "controlnet_threshold_b": controlnet_threshold_b, 1503 | "enable_hr": enable_hr, 1504 | "denoising_strength": denoising_strength, 1505 | "hr_scale": hr_scale, 1506 | "hr_upscale": hr_upscale, 1507 | "guess_mode": guess_mode, 1508 | "seed": seed, 1509 | "subseed": subseed, 1510 | "subseed_strength": subseed_strength, 1511 | "sampler_index": sampler_index, 1512 | "batch_size": batch_size, 1513 | "n_iter": n_iter, 1514 | "steps": steps, 1515 | "cfg_scale": cfg_scale, 1516 | "width": width, 1517 | "height": height, 1518 | "restore_faces": restore_faces, 1519 | "override_settings": override_settings, 1520 | "override_settings_restore_afterwards": override_settings_restore_afterwards, 1521 | } 1522 | return self.api.custom_post("controlnet/txt2img", payload=payload) 1523 | 1524 | def img2img( 1525 | self, 1526 | init_images: [] = [], 1527 | mask: str = None, 1528 | mask_blur: int = 30, 1529 | inpainting_fill: int = 0, 1530 | inpaint_full_res: bool = True, 1531 | inpaint_full_res_padding: int = 1, 1532 | inpainting_mask_invert: int = 1, 1533 | resize_mode: int = 0, 1534 | denoising_strength: float = 0.7, 1535 | prompt: str = "", 1536 | negative_prompt: str = "", 1537 | controlnet_image: [] = [], 1538 | controlnet_mask: [] = [], 1539 | controlnet_module: str = "", 1540 | controlnet_model: str = "", 1541 | controlnet_weight: float = 1.0, 1542 | controlnet_resize_mode: str = "Scale to Fit (Inner Fit)", 1543 | controlnet_low_vram: bool = False, 1544 | controlnet_processor_res: int = 512, 1545 | controlnet_threshold_a: int = 64, 1546 | controlnet_threshold_b: int = 64, 1547 | guess_mode: bool = True, 1548 | seed: int = -1, 1549 | subseed: int = -1, 1550 | subseed_strength: int = -1, 1551 | sampler_index: str = "", 1552 | batch_size: int = 1, 1553 | n_iter: int = 1, # Iteration 1554 | steps: int = 20, 1555 | cfg_scale: float = 7, 1556 | width: int = 512, 1557 | height: int = 512, 1558 | restore_faces: bool = False, 1559 | include_init_images: bool = True, 1560 | override_settings: Dict[str, Any] = None, 1561 | override_settings_restore_afterwards: bool = True, 1562 | ): 1563 | if self.show_deprecation_warning: 1564 | self.print_deprecation_warning() 1565 | 1566 | init_images_b64 = [raw_b64_img(x) for x in init_images] 1567 | controlnet_image_b64 = [raw_b64_img(x) for x in controlnet_image] 1568 | controlnet_mask_b64 = [raw_b64_img(x) for x in controlnet_mask] 1569 | 1570 | payload = { 1571 | "init_images": init_images_b64, 1572 | "mask": raw_b64_img(mask) if mask else None, 1573 | "mask_blur": mask_blur, 1574 | "inpainting_fill": inpainting_fill, 1575 | "inpaint_full_res": inpaint_full_res, 1576 | "inpaint_full_res_padding": inpaint_full_res_padding, 1577 | "inpainting_mask_invert": inpainting_mask_invert, 1578 | "resize_mode": resize_mode, 1579 | "denoising_strength": denoising_strength, 1580 | "prompt": prompt, 1581 | "negative_prompt": negative_prompt, 1582 | "controlnet_image": controlnet_image_b64, 1583 | "controlnet_mask": controlnet_mask_b64, 1584 | "controlnet_module": controlnet_module, 1585 | "controlnet_model": controlnet_model, 1586 | "controlnet_weight": controlnet_weight, 1587 | "controlnet_resize_mode": controlnet_resize_mode, 1588 | "controlnet_low_vram": controlnet_low_vram, 1589 | "controlnet_processor_res": controlnet_processor_res, 1590 | "controlnet_threshold_a": controlnet_threshold_a, 1591 | "controlnet_threshold_b": controlnet_threshold_b, 1592 | "guess_mode": guess_mode, 1593 | "seed": seed, 1594 | "subseed": subseed, 1595 | "subseed_strength": subseed_strength, 1596 | "sampler_index": sampler_index, 1597 | "batch_size": batch_size, 1598 | "n_iter": n_iter, 1599 | "steps": steps, 1600 | "cfg_scale": cfg_scale, 1601 | "width": width, 1602 | "height": height, 1603 | "restore_faces": restore_faces, 1604 | "include_init_images": include_init_images, 1605 | "override_settings": override_settings, 1606 | "override_settings_restore_afterwards": override_settings_restore_afterwards, 1607 | } 1608 | return self.api.custom_post("controlnet/img2img", payload=payload) 1609 | 1610 | def model_list(self): 1611 | r = self.api.custom_get("controlnet/model_list") 1612 | return r["model_list"] 1613 | 1614 | 1615 | # https://github.com/continue-revolution/sd-webui-segment-anything 1616 | @dataclass 1617 | class SegmentAnythingSamResult: 1618 | message: Optional[str] 1619 | blended_images: List[Image.Image] 1620 | masks: List[Image.Image] 1621 | masked_images: List[Image.Image] 1622 | 1623 | 1624 | @dataclass 1625 | class SegmentAnythingGinoResult: 1626 | message: str 1627 | image_with_box: Image.Image 1628 | 1629 | 1630 | @dataclass 1631 | class SegmentAnythingDilationResult: 1632 | blended_image: Image.Image 1633 | mask: Image.Image 1634 | masked_image: Image.Image 1635 | 1636 | 1637 | @dataclass 1638 | class SegmentAnythingControlNetSegNotRandomResult: 1639 | message: str 1640 | sem_presam: Image.Image 1641 | sem_postsam: Image.Image 1642 | blended_presam: Image.Image 1643 | blended_postsam: Image.Image 1644 | 1645 | 1646 | @dataclass 1647 | class SegmentAnythingControlNetSegRandomResult: 1648 | message: str 1649 | blended_image: Image.Image 1650 | random_seg: Image.Image 1651 | edit_anything_control: Image.Image 1652 | 1653 | 1654 | @dataclass 1655 | class SegmentAnythingSemanticSegWithCatIdResult: 1656 | message: str 1657 | blended_image: Image.Image 1658 | mask: Image.Image 1659 | masked_image: Image.Image 1660 | resized_input: Image.Image 1661 | 1662 | 1663 | class SegmentAnythingInterface: 1664 | def __init__(self, webuiapi: WebUIApi): 1665 | self.api = webuiapi 1666 | 1667 | def heartbeat(self) -> Dict[str, str]: 1668 | """Check if this extension is working.""" 1669 | return self.api.custom_get("sam/heartbeat") 1670 | 1671 | def get_sam_models(self) -> List[str]: 1672 | """Get available SAM models""" 1673 | return self.api.custom_get("sam/sam-model") 1674 | 1675 | def sam_predict( 1676 | self, 1677 | image: Image, 1678 | sam_model_name: str = "sam_vit_h_4b8939.pth", 1679 | sam_positive_points: Optional[List[List[float]]] = None, 1680 | sam_negative_points: Optional[List[List[float]]] = None, 1681 | dino_enabled: bool = False, 1682 | dino_model_name: Optional[str] = "GroundingDINO_SwinT_OGC (694MB)", 1683 | dino_text_prompt: Optional[str] = None, 1684 | dino_box_threshold: Optional[float] = 0.3, 1685 | dino_preview_checkbox: bool = False, 1686 | dino_preview_boxes_selection: Optional[List[int]] = None 1687 | ) -> SegmentAnythingSamResult: 1688 | """ 1689 | Get masks from SAM 1690 | 1691 | :param image: Input image. 1692 | :param sam_model_name: SAM model name. You should manually download models before using them. 1693 | :param sam_positive_points: Positive point prompts in N * 2 python list. 1694 | :param sam_negative_points: Negative point prompts in N * 2 python list. 1695 | :param dino_enabled: Whether to use GroundingDINO to generate bounding boxes 1696 | from text to guide SAM to generate masks. 1697 | :param dino_model_name: Choose one of "GroundingDINO_SwinT_OGC (694MB)" and "GroundingDINO_SwinB (938MB)" 1698 | as your desired GroundingDINO model. 1699 | :param dino_text_prompt: Text prompt for GroundingDINO to generate bounding boxes. 1700 | Separate different categories with . 1701 | :param dino_box_threshold: Threshold for selecting bounding boxes. Do not use a very high value, 1702 | otherwise you may get no box. 1703 | :param dino_preview_checkbox: Whether to preview checkbox. 1704 | You can enable preview to select boxes you want if you have accessed API dino-predict 1705 | :param dino_preview_boxes_selection: Choose the boxes you want. Index start from 0. 1706 | """ 1707 | payload = { 1708 | "input_image": raw_b64_img(image), 1709 | "sam_model_name": sam_model_name, 1710 | "sam_positive_points": sam_positive_points or [], 1711 | "sam_negative_points": sam_negative_points or [], 1712 | "dino_enabled": dino_enabled, 1713 | "dino_model_name": dino_model_name, 1714 | "dino_text_prompt": dino_text_prompt, 1715 | "dino_box_threshold": dino_box_threshold, 1716 | "dino_preview_checkbox": dino_preview_checkbox, 1717 | "dino_preview_boxes_selection": dino_preview_boxes_selection 1718 | } 1719 | 1720 | url = self.api.get_endpoint("sam/sam-predict", baseurl=False) 1721 | r = self.api.session.post(url=url, json=payload).json() 1722 | 1723 | return SegmentAnythingSamResult( 1724 | message=r.get("msg"), 1725 | blended_images=[Image.open(io.BytesIO(base64.b64decode(i))) for i in r["blended_images"]], 1726 | masks=[Image.open(io.BytesIO(base64.b64decode(i))) for i in r["masks"]], 1727 | masked_images=[Image.open(io.BytesIO(base64.b64decode(i))) for i in r["masked_images"]] 1728 | ) 1729 | 1730 | def dino_predict( 1731 | self, 1732 | image: Image, 1733 | text_prompt: str, 1734 | dino_model_name: str = "GroundingDINO_SwinT_OGC (694MB)", 1735 | box_threshold: float = 0.3 1736 | ) -> SegmentAnythingGinoResult: 1737 | """ 1738 | Get bounding boxes from GroundingDINO 1739 | 1740 | :param image: Input image. 1741 | :param text_prompt: Text prompt for GroundingDINO to generate bounding boxes. 1742 | Separate different categories with . 1743 | :param dino_model_name: Choose one of "GroundingDINO_SwinT_OGC (694MB)" and "GroundingDINO_SwinB (938MB)" 1744 | as your desired GroundingDINO model. 1745 | :param box_threshold: Threshold for selecting bounding boxes. Do not use a very high value, 1746 | otherwise you may get no box. 1747 | """ 1748 | payload = { 1749 | "input_image": raw_b64_img(image), 1750 | "text_prompt": text_prompt, 1751 | "dino_model_name": dino_model_name, 1752 | "box_threshold": box_threshold 1753 | } 1754 | 1755 | url = self.api.get_endpoint("sam/dino-predict", baseurl=False) 1756 | r = self.api.session.post(url=url, json=payload).json() 1757 | 1758 | return SegmentAnythingGinoResult( 1759 | message=r.get("msg"), 1760 | image_with_box=Image.open(io.BytesIO(base64.b64decode(r["image_with_box"]))) 1761 | ) 1762 | 1763 | def dilate_mask( 1764 | self, 1765 | image: Image, 1766 | mask: Image, 1767 | dilate_amount: int = 10 1768 | ) -> SegmentAnythingDilationResult: 1769 | """ 1770 | Expand mask 1771 | 1772 | :param image: Input image. 1773 | :param mask: Input mask. 1774 | :param dilate_amount: Mask expansion amount from 0 to 100. 1775 | """ 1776 | payload = { 1777 | "input_image": raw_b64_img(image), 1778 | "mask": raw_b64_img(mask), 1779 | "dilate_amount": dilate_amount 1780 | } 1781 | 1782 | url = self.api.get_endpoint("sam/dilate-mask", baseurl=False) 1783 | r = self.api.session.post(url=url, json=payload).json() 1784 | 1785 | return SegmentAnythingDilationResult( 1786 | blended_image=Image.open(io.BytesIO(base64.b64decode(r["blended_image"]))), 1787 | mask=Image.open(io.BytesIO(base64.b64decode(r["mask"]))), 1788 | masked_image=Image.open(io.BytesIO(base64.b64decode(r["masked_image"]))) 1789 | ) 1790 | 1791 | def generate_semantic_segmentation( 1792 | self, 1793 | image: Image, 1794 | sam_model_name: str = "sam_vit_h_4b8939.pth", 1795 | processor: str = "seg_ofade20k", 1796 | processor_res: int = 512, 1797 | pixel_perfect: bool = False, 1798 | resize_mode: Optional[int] = 1, 1799 | target_width: Optional[int] = None, 1800 | target_height: Optional[int] = None, 1801 | points_per_side: Optional[int] = 32, 1802 | points_per_batch: int = 64, 1803 | pred_iou_thresh: float = 0.88, 1804 | stability_score_thresh: float = 0.95, 1805 | stability_score_offset: float = 1.0, 1806 | box_nms_thresh: float = 0.7, 1807 | crop_n_layers: int = 0, 1808 | crop_nms_thresh: float = 0.7, 1809 | crop_overlap_ratio: float = 512 / 1500, 1810 | crop_n_points_downscale_factor: int = 1, 1811 | min_mask_region_area: int = 0 1812 | ) -> Union[SegmentAnythingControlNetSegNotRandomResult, SegmentAnythingControlNetSegRandomResult]: 1813 | """ 1814 | Generate semantic segmentation enhanced by SAM. 1815 | 1816 | :param image: Input image. 1817 | :param sam_model_name: SAM model name. 1818 | :param processor: Preprocessor for semantic segmentation, choose from one of "seg_ufade20k" 1819 | (uniformer trained on ade20k, performance really bad, can be greatly enhanced by SAM), 1820 | "seg_ofade20k" (oneformer trained on ade20k, performance far better than uniformer, can 1821 | be slightly improved by SAM), "seg_ofcoco" (oneformer trained on coco, similar to seg_ofade20k), 1822 | "random" (for EditAnything) 1823 | :param processor_res: Preprocessor resolution, range in (64, 2048]. 1824 | :param pixel_perfect: Whether to enable pixel perfect. If enabled, target_W and target_H will be required, 1825 | and the processor resolution will be overridden by the optimal value. 1826 | :param resize_mode: Resize mode from the original shape to target shape, 1827 | only effective when pixel_perfect is enabled. 0: just resize, 1: crop and resize, 2: resize and fill 1828 | :param target_width: [Required if pixel_perfect is True] Target width if the segmentation will be used 1829 | to generate a new image. 1830 | :param target_height: [Required if pixel_perfect is True] Target height if the segmentation will be used 1831 | to generate a new image. 1832 | :param points_per_side: The number of points to be sampled 1833 | along one side of the image. The total number of points is 1834 | points_per_side**2. If None, 'point_grids' must provide explicit 1835 | point sampling. 1836 | :param points_per_batch: Sets the number of points run simultaneously 1837 | by the model. Higher numbers may be faster but use more GPU memory. 1838 | :param pred_iou_thresh: A filtering threshold in [0,1], using the 1839 | model's predicted mask quality. 1840 | :param stability_score_thresh: A filtering threshold in [0,1], using 1841 | the stability of the mask under changes to the cutoff used to binarize 1842 | the model's mask predictions. 1843 | :param stability_score_offset: The amount to shift the cutoff when 1844 | calculated the stability score. 1845 | :param box_nms_thresh: The box IoU cutoff used by non-maximal 1846 | suppression to filter duplicate masks. 1847 | :param crop_n_layers: If >0, mask prediction will be run again on 1848 | crops of the image. Sets the number of layers to run, where each 1849 | layer has 2**i_layer number of image crops. 1850 | :param crop_nms_thresh: The box IoU cutoff used by non-maximal 1851 | suppression to filter duplicate masks between different crops. 1852 | :param crop_overlap_ratio: Sets the degree to which crops overlap. 1853 | In the first crop layer, crops will overlap by this fraction of 1854 | the image length. Later layers with more crops scale down this overlap. 1855 | :param crop_n_points_downscale_factor: The number of points-per-side 1856 | sampled in layer n is scaled down by crop_n_points_downscale_factor**n. 1857 | :param min_mask_region_area: If >0, postprocessing will be applied 1858 | to remove disconnected regions and holes in masks with area smaller 1859 | than min_mask_region_area. Requires opencv. 1860 | """ 1861 | payload = { 1862 | "input_image": raw_b64_img(image), 1863 | "sam_model_name": sam_model_name, 1864 | "processor": processor, 1865 | "processor_res": processor_res, 1866 | "pixel_perfect": pixel_perfect, 1867 | "resize_mode": resize_mode, 1868 | "target_W": target_width, 1869 | "target_H": target_height 1870 | } 1871 | 1872 | autosam_conf = { 1873 | "points_per_side": points_per_side, 1874 | "points_per_batch": points_per_batch, 1875 | "pred_iou_thresh": pred_iou_thresh, 1876 | "stability_score_thresh": stability_score_thresh, 1877 | "stability_score_offset": stability_score_offset, 1878 | "box_nms_thresh": box_nms_thresh, 1879 | "crop_n_layers": crop_n_layers, 1880 | "crop_nms_thresh": crop_nms_thresh, 1881 | "crop_overlap_ratio": crop_overlap_ratio, 1882 | "crop_n_points_downscale_factor": crop_n_points_downscale_factor, 1883 | "min_mask_region_area": min_mask_region_area 1884 | } 1885 | 1886 | url = self.api.get_endpoint("sam/controlnet-seg", baseurl=False) 1887 | r = self.api.session.post(url=url, json={"payload": payload, "autosam_conf": autosam_conf}).json() 1888 | 1889 | if r.get("random_seg"): 1890 | return SegmentAnythingControlNetSegRandomResult( 1891 | message=r.get("msg"), 1892 | blended_image=Image.open(io.BytesIO(base64.b64decode(r["blended_image"]))), 1893 | random_seg=Image.open(io.BytesIO(base64.b64decode(r["random_seg"]))), 1894 | edit_anything_control=Image.open(io.BytesIO(base64.b64decode(r["edit_anything_control"]))) 1895 | ) 1896 | else: 1897 | return SegmentAnythingControlNetSegNotRandomResult( 1898 | message=r.get("msg"), 1899 | sem_presam=Image.open(io.BytesIO(base64.b64decode(r["sem_presam"]))), 1900 | sem_postsam=Image.open(io.BytesIO(base64.b64decode(r["sem_postsam"]))), 1901 | blended_presam=Image.open(io.BytesIO(base64.b64decode(r["blended_presam"]))), 1902 | blended_postsam=Image.open(io.BytesIO(base64.b64decode(r["blended_postsam"]))) 1903 | ) 1904 | 1905 | def sam_and_semantic_seg_with_cat_id( 1906 | self, 1907 | image: Image, 1908 | category: str, 1909 | sam_model_name: str = "sam_vit_h_4b8939.pth", 1910 | processor: str = "seg_ofade20k", 1911 | processor_res: int = 512, 1912 | pixel_perfect: bool = False, 1913 | resize_mode: Optional[int] = 1, 1914 | target_width: Optional[int] = None, 1915 | target_height: Optional[int] = None, 1916 | points_per_side: Optional[int] = 32, 1917 | points_per_batch: int = 64, 1918 | pred_iou_thresh: float = 0.88, 1919 | stability_score_thresh: float = 0.95, 1920 | stability_score_offset: float = 1.0, 1921 | box_nms_thresh: float = 0.7, 1922 | crop_n_layers: int = 0, 1923 | crop_nms_thresh: float = 0.7, 1924 | crop_overlap_ratio: float = 512 / 1500, 1925 | crop_n_points_downscale_factor: int = 1, 1926 | min_mask_region_area: int = 0 1927 | ) -> SegmentAnythingSemanticSegWithCatIdResult: 1928 | """ 1929 | Get masks generated by SAM + Semantic segmentation with category IDs. 1930 | 1931 | :param image: Input image. 1932 | :param category: Category IDs separated by +. 1933 | :param sam_model_name: SAM model name. 1934 | :param processor: Preprocessor for semantic segmentation. 1935 | :param processor_res: Preprocessor resolution. 1936 | :param pixel_perfect: Whether to enable pixel perfect. 1937 | :param resize_mode: Resize mode from the original shape to target shape. 1938 | :param target_width: Target width if the segmentation will be used to generate a new image. 1939 | :param target_height: Target height if the segmentation will be used to generate a new image. 1940 | :param points_per_side: The number of points to be sampled 1941 | along one side of the image. The total number of points is 1942 | points_per_side**2. If None, 'point_grids' must provide explicit 1943 | point sampling. 1944 | :param points_per_batch: Sets the number of points run simultaneously 1945 | by the model. Higher numbers may be faster but use more GPU memory. 1946 | :param pred_iou_thresh: A filtering threshold in [0,1], using the 1947 | model's predicted mask quality. 1948 | :param stability_score_thresh: A filtering threshold in [0,1], using 1949 | the stability of the mask under changes to the cutoff used to binarize 1950 | the model's mask predictions. 1951 | :param stability_score_offset: The amount to shift the cutoff when 1952 | calculated the stability score. 1953 | :param box_nms_thresh: The box IoU cutoff used by non-maximal 1954 | suppression to filter duplicate masks. 1955 | :param crop_n_layers: If >0, mask prediction will be run again on 1956 | crops of the image. Sets the number of layers to run, where each 1957 | layer has 2**i_layer number of image crops. 1958 | :param crop_nms_thresh: The box IoU cutoff used by non-maximal 1959 | suppression to filter duplicate masks between different crops. 1960 | :param crop_overlap_ratio: Sets the degree to which crops overlap. 1961 | In the first crop layer, crops will overlap by this fraction of 1962 | the image length. Later layers with more crops scale down this overlap. 1963 | :param crop_n_points_downscale_factor: The number of points-per-side 1964 | sampled in layer n is scaled down by crop_n_points_downscale_factor**n. 1965 | :param min_mask_region_area: If >0, postprocessing will be applied 1966 | to remove disconnected regions and holes in masks with area smaller 1967 | than min_mask_region_area. Requires opencv. 1968 | """ 1969 | payload = { 1970 | "input_image": raw_b64_img(image), 1971 | "category": category, 1972 | "sam_model_name": sam_model_name, 1973 | "processor": processor, 1974 | "processor_res": processor_res, 1975 | "pixel_perfect": pixel_perfect, 1976 | "resize_mode": resize_mode, 1977 | "target_W": target_width, 1978 | "target_H": target_height, 1979 | } 1980 | 1981 | autosam_conf = { 1982 | "points_per_side": points_per_side, 1983 | "points_per_batch": points_per_batch, 1984 | "pred_iou_thresh": pred_iou_thresh, 1985 | "stability_score_thresh": stability_score_thresh, 1986 | "stability_score_offset": stability_score_offset, 1987 | "box_nms_thresh": box_nms_thresh, 1988 | "crop_n_layers": crop_n_layers, 1989 | "crop_nms_thresh": crop_nms_thresh, 1990 | "crop_overlap_ratio": crop_overlap_ratio, 1991 | "crop_n_points_downscale_factor": crop_n_points_downscale_factor, 1992 | "min_mask_region_area": min_mask_region_area 1993 | } 1994 | 1995 | url = self.api.get_endpoint("sam/category-mask", baseurl=False) 1996 | r = self.api.session.post(url=url, json={"payload": payload, "autosam_conf": autosam_conf}).json() 1997 | 1998 | return SegmentAnythingSemanticSegWithCatIdResult( 1999 | message=r.get("msg"), 2000 | blended_image=Image.open(io.BytesIO(base64.b64decode(r["blended_image"]))), 2001 | mask=Image.open(io.BytesIO(base64.b64decode(r["mask"]))), 2002 | masked_image=Image.open(io.BytesIO(base64.b64decode(r["masked_image"]))), 2003 | resized_input=Image.open(io.BytesIO(base64.b64decode(r["resized_input"]))) 2004 | ) 2005 | 2006 | # https://github.com/Akegarasu/sd-webui-wd14-tagger 2007 | 2008 | class TaggerInterface: 2009 | def __init__(self, webuiapi: WebUIApi): 2010 | self.api = webuiapi 2011 | 2012 | def tagger_interrogate(self, image, model="wd14-vit-v2-git", threshold=0.3, use_async=False): 2013 | """ 2014 | Interrogates the tagger model with the provided image and parameters. 2015 | 2016 | Args: 2017 | image (Image.Image or str): The image to be interrogated. Can be a PIL Image object or a base64 encoded string. 2018 | model (str, optional): The model to use for interrogation. Defaults to "wd14-vit-v2-git". 2019 | threshold (float, optional): The threshold value for the model. Defaults to 0.3 2020 | use_async (bool, optional): Whether to use asynchronous processing. Defaults to False. 2021 | 2022 | Returns: 2023 | WebUIApiResult.info: The information returned by the web API. 2024 | """ 2025 | payload = { 2026 | "image": b64_img(image) if isinstance(image, Image.Image) else image, 2027 | "model": model, 2028 | "threshold": threshold 2029 | } 2030 | return self.api.custom_post("tagger/v1/interrogate", payload=payload, use_async=use_async) 2031 | def tagger_interrogators(self): 2032 | return self.api.custom_get("tagger/v1/interrogators") --------------------------------------------------------------------------------