├── README.md
├── calculator.py
├── ebsynth_utility.py
├── imgs
├── clipseg.png
├── controlnet_0.png
├── controlnet_1.png
├── controlnet_option_in_ebsynthutil.png
├── controlnet_setting.png
├── sample1.mp4
├── sample2.mp4
├── sample3.mp4
├── sample4.mp4
├── sample5.mp4
├── sample6.mp4
├── sample_anyaheh.mp4
├── sample_autotag.mp4
└── sample_clipseg.mp4
├── install.py
├── sample
├── add_token.txt
└── blacklist.txt
├── scripts
├── custom_script.py
└── ui.py
├── stage1.py
├── stage2.py
├── stage3_5.py
├── stage5.py
├── stage7.py
├── stage8.py
└── style.css
/README.md:
--------------------------------------------------------------------------------
1 | # ebsynth_utility
2 |
3 | ## Overview
4 | #### AUTOMATIC1111 UI extension for creating videos using img2img and ebsynth.
5 | #### This extension allows you to output edited videos using ebsynth.(AE is not required)
6 |
7 |
8 | ##### With [Controlnet](https://github.com/Mikubill/sd-webui-controlnet) installed, I have confirmed that all features of this extension are working properly!
9 | ##### [Controlnet](https://github.com/Mikubill/sd-webui-controlnet) is a must for video editing, so I recommend installing it.
10 | ##### Multi ControlNet("canny" + "normal map") would be suitable for video editing.
11 |
12 |
13 |
14 | ###### I modified animatediff-cli to create a txt2video tool that allows flexible prompt specification. You can use it if you like.
15 | ###### [animatediff-cli-prompt-travel](https://github.com/s9roll7/animatediff-cli-prompt-travel)
16 |
\ 107 | See \ 108 | [here] for depth map.\ 109 |
") 110 | 111 | with gr.Accordion("ControlNet option"): 112 | controlnet_weight = gr.Slider(minimum=0.0, maximum=2.0, step=0.01, value=0.5, label="Control Net Weight") 113 | controlnet_weight_for_face = gr.Slider(minimum=0.0, maximum=2.0, step=0.01, value=0.5, label="Control Net Weight For Face") 114 | use_preprocess_img = gr.Checkbox(True, label="Use Preprocess image If exists in /controlnet_preprocess") 115 | gr.HTML(value="\
116 | Please enable the following settings to use controlnet from this script.
\
117 | \
118 | Settings->ControlNet->Allow other script to control this extension\
119 | \
120 |
\
131 | The results are stored in timestamp_prompts.txt.
\
132 | If you want to use the same tagging results the next time you run img2img, rename the file to prompts.txt
\
133 | Recommend enabling the following settings.
\
134 | \
135 | Settings->Interrogate Option->Interrogate: include ranks of model tags matches in results\
136 | \
137 |
\ 145 | If loading of the Yolov5_anime model fails, check\ 146 | [this] solution.\ 147 |
") 148 | face_crop_resolution = gr.Slider(minimum=128, maximum=2048, step=1, value=512, label="Face Crop Resolution") 149 | max_crop_size = gr.Slider(minimum=0, maximum=2048, step=1, value=1024, label="Max Crop Size") 150 | face_denoising_strength = gr.Slider(minimum=0.00, maximum=1.00, step=0.01, value=0.5, label="Face Denoising Strength") 151 | face_area_magnification = gr.Slider(minimum=1.00, maximum=10.00, step=0.01, value=1.5, label="Face Area Magnification ") 152 | disable_facecrop_lpbk_last_time = gr.Checkbox(False, label="Disable at the last loopback time") 153 | 154 | with gr.Column(): 155 | enable_face_prompt = gr.Checkbox(False, label="Enable Face Prompt") 156 | face_prompt = gr.Textbox(label="Face Prompt", show_label=False, lines=2, 157 | placeholder="Prompt for Face", 158 | value = "face close up," 159 | ) 160 | 161 | return [project_dir, generation_test, mask_mode, inpaint_area, use_depth, img2img_repeat_count, inc_seed, auto_tag_mode, add_tag_to_head, add_tag_replace_underscore, is_facecrop, face_detection_method, face_crop_resolution, max_crop_size, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt, controlnet_weight, controlnet_weight_for_face, disable_facecrop_lpbk_last_time,use_preprocess_img] 162 | 163 | 164 | def detect_face_from_img(self, img_array): 165 | if not self.face_detector: 166 | dnn_model_path = autocrop.download_and_cache_models(os.path.join(models_path, "opencv")) 167 | self.face_detector = cv2.FaceDetectorYN.create(dnn_model_path, "", (0, 0)) 168 | 169 | self.face_detector.setInputSize((img_array.shape[1], img_array.shape[0])) 170 | _, result = self.face_detector.detect(img_array) 171 | return result 172 | 173 | def detect_anime_face_from_img(self, img_array): 174 | import sys 175 | 176 | if not self.anime_face_detector: 177 | if 'models' in sys.modules: 178 | del sys.modules['models'] 179 | 180 | anime_model_path = download_and_cache_models(os.path.join(models_path, "yolov5_anime")) 181 | 182 | if not os.path.isfile(anime_model_path): 183 | print( "WARNING!! " + anime_model_path + " not found.") 184 | print( "use YuNet instead.") 185 | return self.detect_face_from_img(img_array) 186 | 187 | self.anime_face_detector = torch.hub.load('ultralytics/yolov5', 'custom', path=anime_model_path) 188 | 189 | # warmup 190 | test = np.zeros([512,512,3],dtype=np.uint8) 191 | _ = self.anime_face_detector(test) 192 | 193 | result = self.anime_face_detector(img_array) 194 | #models.common.Detections 195 | faces = [] 196 | for x_c, y_c, w, h, _, _ in result.xywh[0].tolist(): 197 | faces.append( [ x_c - w/2 , y_c - h/2, w, h ] ) 198 | 199 | return faces 200 | 201 | def detect_face(self, img, mask, face_detection_method, max_crop_size): 202 | img_array = np.array(img) 203 | 204 | # image without alpha 205 | if img_array.shape[2] == 4: 206 | img_array = img_array[:,:,:3] 207 | 208 | if mask is not None: 209 | if self.is_invert_mask: 210 | mask = ImageOps.invert(mask) 211 | mask_array = np.array(mask)/255 212 | if mask_array.ndim == 2: 213 | mask_array = mask_array[:, :, np.newaxis] 214 | 215 | if mask_array.shape[2] == 4: 216 | mask_array = mask_array[:,:,:3] 217 | 218 | img_array = mask_array * img_array 219 | img_array = img_array.astype(np.uint8) 220 | 221 | if face_detection_method == "YuNet": 222 | faces = self.detect_face_from_img(img_array) 223 | elif face_detection_method == "Yolov5_anime": 224 | faces = self.detect_anime_face_from_img(img_array) 225 | else: 226 | faces = self.detect_face_from_img(img_array) 227 | 228 | if faces is None or len(faces) == 0: 229 | return [] 230 | 231 | face_coords = [] 232 | for face in faces: 233 | x = int(face[0]) 234 | y = int(face[1]) 235 | w = int(face[2]) 236 | h = int(face[3]) 237 | if max(w,h) > max_crop_size: 238 | print("ignore big face") 239 | continue 240 | if w == 0 or h == 0: 241 | print("ignore w,h = 0 face") 242 | continue 243 | 244 | face_coords.append( [ x/img_array.shape[1],y/img_array.shape[0],w/img_array.shape[1],h/img_array.shape[0]] ) 245 | 246 | return face_coords 247 | 248 | def get_mask(self): 249 | def create_mask( output, x_rate, y_rate, k_size ): 250 | img = np.zeros((512, 512, 3)) 251 | img = cv2.ellipse(img, ((256, 256), (int(512 * x_rate), int(512 * y_rate)), 0), (255, 255, 255), thickness=-1) 252 | img = cv2.GaussianBlur(img, (k_size, k_size), 0) 253 | cv2.imwrite(output, img) 254 | 255 | if self.face_merge_mask_image is None: 256 | mask_file_path = os.path.join( get_my_dir() , self.face_merge_mask_filename) 257 | if not os.path.isfile(mask_file_path): 258 | create_mask( mask_file_path, 0.9, 0.9, 91) 259 | 260 | m = cv2.imread( mask_file_path )[:,:,0] 261 | m = m[:, :, np.newaxis] 262 | self.face_merge_mask_image = m / 255 263 | 264 | return self.face_merge_mask_image 265 | 266 | def face_img_crop(self, img, face_coords,face_area_magnification): 267 | img_array = np.array(img) 268 | face_imgs =[] 269 | new_coords = [] 270 | 271 | for face in face_coords: 272 | x = int(face[0] * img_array.shape[1]) 273 | y = int(face[1] * img_array.shape[0]) 274 | w = int(face[2] * img_array.shape[1]) 275 | h = int(face[3] * img_array.shape[0]) 276 | print([x,y,w,h]) 277 | 278 | cx = x + int(w/2) 279 | cy = y + int(h/2) 280 | 281 | x = cx - int(w*face_area_magnification / 2) 282 | x = x if x > 0 else 0 283 | w = cx + int(w*face_area_magnification / 2) - x 284 | w = w if x+w < img.width else img.width - x 285 | 286 | y = cy - int(h*face_area_magnification / 2) 287 | y = y if y > 0 else 0 288 | h = cy + int(h*face_area_magnification / 2) - y 289 | h = h if y+h < img.height else img.height - y 290 | 291 | print([x,y,w,h]) 292 | 293 | face_imgs.append( img_array[y: y+h, x: x+w] ) 294 | new_coords.append( [x,y,w,h] ) 295 | 296 | resized = [] 297 | for face_img in face_imgs: 298 | if face_img.shape[1] < face_img.shape[0]: 299 | re_w = self.face_crop_resolution 300 | re_h = int(x_ceiling( (self.face_crop_resolution / face_img.shape[1]) * face_img.shape[0] , 64)) 301 | else: 302 | re_w = int(x_ceiling( (self.face_crop_resolution / face_img.shape[0]) * face_img.shape[1] , 64)) 303 | re_h = self.face_crop_resolution 304 | 305 | face_img = resize_img(face_img, re_w, re_h) 306 | resized.append( Image.fromarray(face_img)) 307 | 308 | return resized, new_coords 309 | 310 | def face_crop_img2img(self, p, face_coords, face_denoising_strength, face_area_magnification, enable_face_prompt, face_prompt, controlnet_input_img, controlnet_input_face_imgs, preprocess_img_exist): 311 | 312 | def merge_face(img, face_img, face_coord, base_img_size, mask): 313 | x_rate = img.width / base_img_size[0] 314 | y_rate = img.height / base_img_size[1] 315 | 316 | img_array = np.array(img) 317 | x = int(face_coord[0] * x_rate) 318 | y = int(face_coord[1] * y_rate) 319 | w = int(face_coord[2] * x_rate) 320 | h = int(face_coord[3] * y_rate) 321 | 322 | face_array = np.array(face_img) 323 | face_array = resize_img(face_array, w, h) 324 | mask = resize_img(mask, w, h) 325 | if mask.ndim == 2: 326 | mask = mask[:, :, np.newaxis] 327 | 328 | bg = img_array[y: y+h, x: x+w] 329 | img_array[y: y+h, x: x+w] = mask * face_array + (1-mask)*bg 330 | 331 | return Image.fromarray(img_array) 332 | 333 | base_img = p.init_images[0] 334 | 335 | base_img_size = (base_img.width, base_img.height) 336 | 337 | if face_coords is None or len(face_coords) == 0: 338 | print("no face detected") 339 | return process_images(p) 340 | 341 | print(face_coords) 342 | face_imgs, new_coords = self.face_img_crop(base_img, face_coords, face_area_magnification) 343 | 344 | if not face_imgs: 345 | return process_images(p) 346 | 347 | face_p = copy.copy(p) 348 | 349 | ### img2img base img 350 | proc = self.process_images(p, controlnet_input_img, self.controlnet_weight, preprocess_img_exist) 351 | print(proc.seed) 352 | 353 | ### img2img for each face 354 | face_img2img_results = [] 355 | 356 | for face, coord, controlnet_input_face in zip(face_imgs, new_coords, controlnet_input_face_imgs): 357 | # cv2.imwrite("scripts/face.png", np.array(face)[:, :, ::-1]) 358 | face_p.init_images = [face] 359 | face_p.width = face.width 360 | face_p.height = face.height 361 | face_p.denoising_strength = face_denoising_strength 362 | 363 | if enable_face_prompt: 364 | face_p.prompt = face_prompt 365 | else: 366 | face_p.prompt = "close-up face ," + face_p.prompt 367 | 368 | if p.image_mask is not None: 369 | x,y,w,h = coord 370 | cropped_face_mask = Image.fromarray(np.array(p.image_mask)[y: y+h, x: x+w]) 371 | face_p.image_mask = modules.images.resize_image(0, cropped_face_mask, face.width, face.height) 372 | 373 | face_proc = self.process_images(face_p, controlnet_input_face, self.controlnet_weight_for_face, preprocess_img_exist) 374 | print(face_proc.seed) 375 | 376 | face_img2img_results.append((face_proc.images[0], coord)) 377 | 378 | ### merge faces 379 | bg = proc.images[0] 380 | mask = self.get_mask() 381 | 382 | for face_img, coord in face_img2img_results: 383 | bg = merge_face(bg, face_img, coord, base_img_size, mask) 384 | 385 | proc.images[0] = bg 386 | 387 | return proc 388 | 389 | def get_depth_map(self, mask, depth_path ,img_basename, is_invert_mask): 390 | depth_img_path = os.path.join( depth_path , img_basename ) 391 | 392 | depth = None 393 | 394 | if os.path.isfile( depth_img_path ): 395 | depth = Image.open(depth_img_path) 396 | else: 397 | # try 00001-0000.png 398 | os.path.splitext(img_basename)[0] 399 | depth_img_path = os.path.join( depth_path , os.path.splitext(img_basename)[0] + "-0000.png" ) 400 | if os.path.isfile( depth_img_path ): 401 | depth = Image.open(depth_img_path) 402 | 403 | if depth: 404 | if mask: 405 | mask_array = np.array(mask) 406 | depth_array = np.array(depth) 407 | 408 | if is_invert_mask == False: 409 | depth_array[mask_array[:,:,0] == 0] = 0 410 | else: 411 | depth_array[mask_array[:,:,0] != 0] = 0 412 | 413 | depth = Image.fromarray(depth_array) 414 | 415 | tmp_path = os.path.join( depth_path , "tmp" ) 416 | os.makedirs(tmp_path, exist_ok=True) 417 | tmp_path = os.path.join( tmp_path , img_basename ) 418 | depth_array = depth_array.astype(np.uint16) 419 | cv2.imwrite(tmp_path, depth_array) 420 | 421 | mask = depth 422 | 423 | return depth!=None, mask 424 | 425 | ### auto tagging 426 | debug_count = 0 427 | 428 | def get_masked_image(self, image, mask_image): 429 | 430 | if mask_image == None: 431 | return image.convert("RGB") 432 | 433 | mask = mask_image.convert('L') 434 | if self.is_invert_mask: 435 | mask = ImageOps.invert(mask) 436 | crop_region = masking.get_crop_region(np.array(mask), 0) 437 | # crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height) 438 | # x1, y1, x2, y2 = crop_region 439 | image = image.crop(crop_region).convert("RGB") 440 | mask = mask.crop(crop_region) 441 | 442 | base_img = Image.new("RGB", image.size, (255, 190, 200)) 443 | 444 | image = Image.composite( image, base_img, mask ) 445 | 446 | # image.save("scripts/get_masked_image_test_"+ str(self.debug_count) + ".png") 447 | # self.debug_count += 1 448 | 449 | return image 450 | 451 | def interrogate_deepdanbooru(self, imgs, masks): 452 | prompts_dict = {} 453 | cause_err = False 454 | 455 | try: 456 | deepbooru.model.start() 457 | 458 | for img,mask in zip(imgs,masks): 459 | key = os.path.basename(img) 460 | print(key + " interrogate deepdanbooru") 461 | 462 | image = Image.open(img) 463 | mask_image = Image.open(mask) if mask else None 464 | image = self.get_masked_image(image, mask_image) 465 | 466 | prompt = deepbooru.model.tag_multi(image) 467 | 468 | prompts_dict[key] = prompt 469 | except Exception as e: 470 | import traceback 471 | traceback.print_exc() 472 | print(e) 473 | cause_err = True 474 | finally: 475 | deepbooru.model.stop() 476 | if cause_err: 477 | print("Exception occurred during auto-tagging(deepdanbooru)") 478 | return Processed() 479 | 480 | return prompts_dict 481 | 482 | 483 | def interrogate_clip(self, imgs, masks): 484 | from modules import devices, shared, lowvram, paths 485 | import importlib 486 | import models 487 | 488 | caption_list = [] 489 | prompts_dict = {} 490 | cause_err = False 491 | 492 | try: 493 | if shared.cmd_opts.lowvram or shared.cmd_opts.medvram: 494 | lowvram.send_everything_to_cpu() 495 | devices.torch_gc() 496 | 497 | with paths.Prioritize("BLIP"): 498 | importlib.reload(models) 499 | shared.interrogator.load() 500 | 501 | for img,mask in zip(imgs,masks): 502 | key = os.path.basename(img) 503 | print(key + " generate caption") 504 | 505 | image = Image.open(img) 506 | mask_image = Image.open(mask) if mask else None 507 | image = self.get_masked_image(image, mask_image) 508 | 509 | caption = shared.interrogator.generate_caption(image) 510 | caption_list.append(caption) 511 | 512 | shared.interrogator.send_blip_to_ram() 513 | devices.torch_gc() 514 | 515 | for img,mask,caption in zip(imgs,masks,caption_list): 516 | key = os.path.basename(img) 517 | print(key + " interrogate clip") 518 | 519 | image = Image.open(img) 520 | mask_image = Image.open(mask) if mask else None 521 | image = self.get_masked_image(image, mask_image) 522 | 523 | clip_image = shared.interrogator.clip_preprocess(image).unsqueeze(0).type(shared.interrogator.dtype).to(devices.device_interrogate) 524 | 525 | res = "" 526 | 527 | with torch.no_grad(), devices.autocast(): 528 | image_features = shared.interrogator.clip_model.encode_image(clip_image).type(shared.interrogator.dtype) 529 | image_features /= image_features.norm(dim=-1, keepdim=True) 530 | 531 | for name, topn, items in shared.interrogator.categories(): 532 | matches = shared.interrogator.rank(image_features, items, top_count=topn) 533 | for match, score in matches: 534 | if shared.opts.interrogate_return_ranks: 535 | res += f", ({match}:{score/100:.3f})" 536 | else: 537 | res += ", " + match 538 | 539 | prompts_dict[key] = (caption + res) 540 | 541 | except Exception as e: 542 | import traceback 543 | traceback.print_exc() 544 | print(e) 545 | cause_err = True 546 | finally: 547 | shared.interrogator.unload() 548 | if cause_err: 549 | print("Exception occurred during auto-tagging(blip/clip)") 550 | return Processed() 551 | 552 | return prompts_dict 553 | 554 | 555 | def remove_reserved_token(self, token_list): 556 | reserved_list = ["pink_background","simple_background","pink","pink_theme"] 557 | 558 | result_list = [] 559 | 560 | head_token = token_list[0] 561 | 562 | if head_token[2] == "normal": 563 | head_token_str = head_token[0].replace('pink background', '') 564 | token_list[0] = (head_token_str, head_token[1], head_token[2]) 565 | 566 | for token in token_list: 567 | if token[0] in reserved_list: 568 | continue 569 | result_list.append(token) 570 | 571 | return result_list 572 | 573 | def remove_blacklisted_token(self, token_list): 574 | black_list_path = os.path.join(self.prompts_dir, "blacklist.txt") 575 | if not os.path.isfile(black_list_path): 576 | print(black_list_path + " not found.") 577 | return token_list 578 | 579 | with open(black_list_path) as f: 580 | black_list = [s.strip() for s in f.readlines()] 581 | 582 | result_list = [] 583 | 584 | for token in token_list: 585 | if token[0] in black_list: 586 | continue 587 | result_list.append(token) 588 | 589 | token_list = result_list 590 | 591 | return token_list 592 | 593 | def add_token(self, token_list): 594 | add_list_path = os.path.join(self.prompts_dir, "add_token.txt") 595 | if not os.path.isfile(add_list_path): 596 | print(add_list_path + " not found.") 597 | 598 | if self.add_tag_replace_underscore: 599 | token_list = [ (x[0].replace("_"," "), x[1], x[2]) for x in token_list ] 600 | 601 | return token_list 602 | 603 | if not self.calc_parser: 604 | self.calc_parser = CalcParser() 605 | 606 | with open(add_list_path) as f: 607 | add_list = json.load(f) 608 | ''' 609 | [ 610 | { 611 | "target":"test_token", 612 | "min_score":0.8, 613 | "token": ["lora_name_A", "0.5"], 614 | "type":"lora" 615 | }, 616 | { 617 | "target":"test_token", 618 | "min_score":0.5, 619 | "token": ["bbbb", "score - 0.1"], 620 | "type":"normal" 621 | }, 622 | { 623 | "target":"test_token2", 624 | "min_score":0.8, 625 | "token": ["hypernet_name_A", "score"], 626 | "type":"hypernet" 627 | }, 628 | { 629 | "target":"test_token3", 630 | "min_score":0.0, 631 | "token": ["dddd", "score"], 632 | "type":"normal" 633 | } 634 | ] 635 | ''' 636 | result_list = [] 637 | 638 | for token in token_list: 639 | for add_item in add_list: 640 | if token[0] == add_item["target"]: 641 | if token[1] > add_item["min_score"]: 642 | # hit 643 | formula = str(add_item["token"][1]) 644 | formula = formula.replace("score",str(token[1])) 645 | print('Input: %s' % str(add_item["token"][1])) 646 | 647 | try: 648 | score = self.calc_parser.parse(formula) 649 | score = round(score, 3) 650 | except (ParseError, ZeroDivisionError) as e: 651 | print('Input: %s' % str(add_item["token"][1])) 652 | print('Error: %s' % e) 653 | print("ignore this token") 654 | continue 655 | 656 | print("score = " + str(score)) 657 | result_list.append( ( add_item["token"][0], score, add_item["type"] ) ) 658 | 659 | if self.add_tag_replace_underscore: 660 | token_list = [ (x[0].replace("_"," "), x[1], x[2]) for x in token_list ] 661 | 662 | token_list = token_list + result_list 663 | 664 | return token_list 665 | 666 | def create_prompts_dict(self, imgs, masks, auto_tag_mode): 667 | prompts_dict = {} 668 | 669 | if auto_tag_mode == "DeepDanbooru": 670 | raw_dict = self.interrogate_deepdanbooru(imgs, masks) 671 | elif auto_tag_mode == "CLIP": 672 | raw_dict = self.interrogate_clip(imgs, masks) 673 | 674 | repatter = re.compile(r'\((.+)\:([0-9\.]+)\)') 675 | 676 | for key, value_str in raw_dict.items(): 677 | value_list = [x.strip() for x in value_str.split(',')] 678 | 679 | value = [] 680 | for v in value_list: 681 | m = repatter.fullmatch(v) 682 | if m: 683 | value.append((m.group(1), float(m.group(2)), "normal")) 684 | else: 685 | value.append((v, 1, "no_score")) 686 | 687 | # print(value) 688 | value = self.remove_reserved_token(value) 689 | # print(value) 690 | value = self.remove_blacklisted_token(value) 691 | # print(value) 692 | value = self.add_token(value) 693 | # print(value) 694 | 695 | def create_token_str(x): 696 | print(x) 697 | if x[2] == "no_score": 698 | return x[0] 699 | elif x[2] == "lora": 700 | return "\ 25 | If you have trouble entering the video path manually, you can also use drag and drop.For large videos, please enter the path manually. \ 26 |
") 27 | 28 | with gr.TabItem('configuration', elem_id='ebs_configuration'): 29 | with gr.Tabs(elem_id="ebs_configuration_tab"): 30 | with gr.TabItem(label="stage 1",elem_id='ebs_configuration_tab1'): 31 | with gr.Row(): 32 | frame_width = gr.Number(value=-1, label="Frame Width", precision=0, interactive=True) 33 | frame_height = gr.Number(value=-1, label="Frame Height", precision=0, interactive=True) 34 | gr.HTML(value="\ 35 | -1 means that it is calculated automatically. If both are -1, the size will be the same as the source size. \ 36 |
") 37 | 38 | st1_masking_method_index = gr.Radio(label='Masking Method', choices=["transparent-background","clipseg","transparent-background AND clipseg"], value="transparent-background", type="index") 39 | 40 | with gr.Accordion(label="transparent-background options"): 41 | st1_mask_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Mask Threshold', value=0.0) 42 | 43 | # https://pypi.org/project/transparent-background/ 44 | gr.HTML(value="\ 45 | configuration for \ 46 | [transparent-background]\ 47 |
") 48 | tb_use_fast_mode = gr.Checkbox(label="Use Fast Mode(It will be faster, but the quality of the mask will be lower.)", value=False) 49 | tb_use_jit = gr.Checkbox(label="Use Jit", value=False) 50 | 51 | with gr.Accordion(label="clipseg options"): 52 | clipseg_mask_prompt = gr.Textbox(label='Mask Target (e.g., girl, cats)', lines=1) 53 | clipseg_exclude_prompt = gr.Textbox(label='Exclude Target (e.g., finger, book)', lines=1) 54 | clipseg_mask_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Mask Threshold', value=0.4) 55 | clipseg_mask_blur_size = gr.Slider(minimum=0, maximum=150, step=1, label='Mask Blur Kernel Size(MedianBlur)', value=11) 56 | clipseg_mask_blur_size2 = gr.Slider(minimum=0, maximum=150, step=1, label='Mask Blur Kernel Size(GaussianBlur)', value=11) 57 | 58 | with gr.TabItem(label="stage 2", elem_id='ebs_configuration_tab2'): 59 | key_min_gap = gr.Slider(minimum=0, maximum=500, step=1, label='Minimum keyframe gap', value=10) 60 | key_max_gap = gr.Slider(minimum=0, maximum=1000, step=1, label='Maximum keyframe gap', value=300) 61 | key_th = gr.Slider(minimum=0.0, maximum=100.0, step=0.1, label='Threshold of delta frame edge', value=8.5) 62 | key_add_last_frame = gr.Checkbox(label="Add last frame to keyframes", value=True) 63 | 64 | with gr.TabItem(label="stage 3.5", elem_id='ebs_configuration_tab3_5'): 65 | gr.HTML(value="\ 66 | [color-matcher]\ 67 |
") 68 | 69 | color_matcher_method = gr.Radio(label='Color Transfer Method', choices=['default', 'hm', 'reinhard', 'mvgd', 'mkl', 'hm-mvgd-hm', 'hm-mkl-hm'], value="hm-mkl-hm", type="value") 70 | color_matcher_ref_type = gr.Radio(label='Color Matcher Ref Image Type', choices=['original video frame', 'first frame of img2img result'], value="original video frame", type="index") 71 | gr.HTML(value="\ 72 | If an image is specified below, it will be used with highest priority.\ 73 |
") 74 | color_matcher_ref_image = gr.Image(label="Color Matcher Ref Image", source='upload', mirror_webcam=False, type='pil') 75 | st3_5_use_mask = gr.Checkbox(label="Apply mask to the result", value=True) 76 | st3_5_use_mask_ref = gr.Checkbox(label="Apply mask to the Ref Image", value=False) 77 | st3_5_use_mask_org = gr.Checkbox(label="Apply mask to original image", value=False) 78 | #st3_5_number_of_itr = gr.Slider(minimum=1, maximum=10, step=1, label='Number of iterations', value=1) 79 | 80 | with gr.TabItem(label="stage 7", elem_id='ebs_configuration_tab7'): 81 | blend_rate = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Crossfade blend rate', value=1.0) 82 | export_type = gr.Dropdown(choices=["mp4","webm","gif","rawvideo"], value="mp4" ,label="Export type") 83 | 84 | with gr.TabItem(label="stage 8", elem_id='ebs_configuration_tab8'): 85 | bg_src = gr.Textbox(label='Background source(mp4 or directory containing images)', lines=1) 86 | bg_type = gr.Dropdown(choices=["Fit video length","Loop"], value="Fit video length" ,label="Background type") 87 | mask_blur_size = gr.Slider(minimum=0, maximum=150, step=1, label='Mask Blur Kernel Size', value=5) 88 | mask_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Mask Threshold', value=0.0) 89 | #is_transparent = gr.Checkbox(label="Is Transparent", value=True, visible = False) 90 | fg_transparency = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Foreground Transparency', value=0.0) 91 | 92 | with gr.TabItem(label="etc", elem_id='ebs_configuration_tab_etc'): 93 | mask_mode = gr.Dropdown(choices=["Normal","Invert","None"], value="Normal" ,label="Mask Mode") 94 | with gr.TabItem('info', elem_id='ebs_info'): 95 | gr.HTML(value="\
96 | The process of creating a video can be divided into the following stages.
\
97 | (Stage 3, 4, and 6 only show a guide and do nothing actual processing.)
\
98 | stage 1
\
99 | Extract frames from the original video.
\
100 | Generate a mask image.
\
101 | stage 2
\
102 | Select keyframes to be given to ebsynth.
\
103 | stage 3
\
104 | img2img keyframes.
\
105 | stage 3.5
\
106 | (this is optional. Perform color correction on the img2img results and expect flickering to decrease. Or, you can simply change the color tone from the generated result.)
\
107 | stage 4
\
108 | and upscale to the size of the original video.
\
109 | stage 5
\
110 | Rename keyframes.
\
111 | Generate .ebs file.(ebsynth project file)
\
112 | stage 6
\
113 | Running ebsynth.(on your self)
\
114 | Open the generated .ebs under project directory and press [Run All] button.
\
115 | If ""out-*"" directory already exists in the Project directory, delete it manually before executing.
\
116 | If multiple .ebs files are generated, run them all.
\
117 | stage 7
\
118 | Concatenate each frame while crossfading.
\
119 | Composite audio files extracted from the original video onto the concatenated video.
\
120 | stage 8
\
121 | This is an extra stage.
\
122 | You can put any image or images or video you like in the background.
\
123 | You can specify in this field -> [Ebsynth Utility]->[configuration]->[stage 8]->[Background source]
\
124 | If you have already created a background video in Invert Mask Mode([Ebsynth Utility]->[configuration]->[etc]->[Mask Mode]),
\
125 | You can specify \"path_to_project_dir/inv/crossfade_tmp\".
\
126 |