├── wildcards └── lb_sample_wildcard │ ├── lb_blur_weak.txt │ ├── lb_zoom_in.txt │ ├── lb_zoom_out.txt │ ├── lb_center_random.txt │ ├── lb_rot_fast.txt │ ├── lb_rot_slow.txt │ ├── lb_hue.txt │ ├── lb_rot_x_fast.txt │ ├── lb_rot_x_slow.txt │ ├── lb_rot_y_fast.txt │ ├── lb_rot_y_slow.txt │ ├── lb_center_in_corner.txt │ ├── lb_center_in_edge.txt │ ├── lb_vel_slow.txt │ ├── lb_rot_wave.txt │ ├── lb_rot_x_wave.txt │ ├── lb_rot_y_wave.txt │ ├── lb_beat_shake.txt │ ├── lb_beat_shake_weak.txt │ ├── lb_vel_fast.txt │ ├── lb_zoom_wave.txt │ ├── lb_prompt_action.txt │ ├── lb_center_wave.txt │ ├── lb_prompt_face.txt │ ├── lb_beat_other.txt │ ├── lb_vel_wave.txt │ ├── lb_1sec_random.txt │ ├── lb_1sec_pendulum.txt │ ├── lb_slide_x.txt │ └── lb_slide_y.txt ├── style.css ├── imgs ├── bpm_ng.png ├── bpm_ok.png ├── cheat_sheet.png ├── first_frame.png ├── optical_flow.png ├── prompt_test.png ├── lb_controlnet.png ├── main_wave_list.png ├── promt_test_ok.png ├── promt_test_ok2.png ├── audio_file_path.png ├── audio_analyzer_run.png ├── generate_test_audio.png ├── wave_list_generator.png └── how_to_extract_frame.png ├── wav ├── metronome.wav └── metronome2.wav ├── install.py ├── scripts ├── util_sd_loopback_music_sync_wave │ ├── regex.py │ ├── wave_list_test.py │ ├── controlnet_web.py │ ├── sam.py │ ├── bpm.py │ ├── upscale.py │ ├── wave_generator.py │ ├── prompt_test.py │ ├── affine.py │ ├── other_effect.py │ ├── controlnet.py │ ├── slide.py │ ├── frame_extractor.py │ ├── perlin.py │ ├── audio_analyzer.py │ ├── raft.py │ └── sync_effect.py ├── loopback_music_sync_wave_ui.py └── loopback_music_sync_wave.py └── README.md /wildcards/lb_sample_wildcard/lb_blur_weak.txt: -------------------------------------------------------------------------------- 1 | #blur(5) 2 | -------------------------------------------------------------------------------- /style.css: -------------------------------------------------------------------------------- 1 | #lmsw_wave_plot>div{ 2 | height: 250px; 3 | } 4 | 5 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_zoom_in.txt: -------------------------------------------------------------------------------- 1 | #zoom(@random(1.1,1.7)) 2 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_zoom_out.txt: -------------------------------------------------------------------------------- 1 | #zoom(@random(0.5,0.9)) 2 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_center_random.txt: -------------------------------------------------------------------------------- 1 | #center(@random(0,1),@random(0,1)) 2 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_rot_fast.txt: -------------------------------------------------------------------------------- 1 | #rot(@random(70,90)) 2 | #rot(@random(-70,-90)) 3 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_rot_slow.txt: -------------------------------------------------------------------------------- 1 | #rot(@random(20,30)) 2 | #rot(@random(-20,-30)) 3 | -------------------------------------------------------------------------------- /imgs/bpm_ng.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/bpm_ng.png -------------------------------------------------------------------------------- /imgs/bpm_ok.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/bpm_ok.png -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_hue.txt: -------------------------------------------------------------------------------- 1 | #hue(@random(0,10.99),10) 2 | #hue(@random(0,10.99),-10) 3 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_rot_x_fast.txt: -------------------------------------------------------------------------------- 1 | #rot_x(@random(70,90)) 2 | #rot_x(@random(-70,-90)) 3 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_rot_x_slow.txt: -------------------------------------------------------------------------------- 1 | #rot_x(@random(20,30)) 2 | #rot_x(@random(-20,-30)) 3 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_rot_y_fast.txt: -------------------------------------------------------------------------------- 1 | #rot_y(@random(70,90)) 2 | #rot_y(@random(-70,-90)) 3 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_rot_y_slow.txt: -------------------------------------------------------------------------------- 1 | #rot_y(@random(20,30)) 2 | #rot_y(@random(-20,-30)) 3 | -------------------------------------------------------------------------------- /wav/metronome.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/wav/metronome.wav -------------------------------------------------------------------------------- /wav/metronome2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/wav/metronome2.wav -------------------------------------------------------------------------------- /imgs/cheat_sheet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/cheat_sheet.png -------------------------------------------------------------------------------- /imgs/first_frame.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/first_frame.png -------------------------------------------------------------------------------- /imgs/optical_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/optical_flow.png -------------------------------------------------------------------------------- /imgs/prompt_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/prompt_test.png -------------------------------------------------------------------------------- /imgs/lb_controlnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/lb_controlnet.png -------------------------------------------------------------------------------- /imgs/main_wave_list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/main_wave_list.png -------------------------------------------------------------------------------- /imgs/promt_test_ok.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/promt_test_ok.png -------------------------------------------------------------------------------- /imgs/promt_test_ok2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/promt_test_ok2.png -------------------------------------------------------------------------------- /imgs/audio_file_path.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/audio_file_path.png -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_center_in_corner.txt: -------------------------------------------------------------------------------- 1 | #center(0,0) 2 | #center(0,1) 3 | #center(1,0) 4 | #center(1,1) 5 | -------------------------------------------------------------------------------- /imgs/audio_analyzer_run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/audio_analyzer_run.png -------------------------------------------------------------------------------- /imgs/generate_test_audio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/generate_test_audio.png -------------------------------------------------------------------------------- /imgs/wave_list_generator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/wave_list_generator.png -------------------------------------------------------------------------------- /imgs/how_to_extract_frame.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/s9roll7/sd_loopback_music_sync_wave/main/imgs/how_to_extract_frame.png -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_center_in_edge.txt: -------------------------------------------------------------------------------- 1 | #center(@random(0,1),0) 2 | #center(@random(0,1),1) 3 | #center(0,@random(0,1)) 4 | #center(1,@random(0,1)) 5 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_vel_slow.txt: -------------------------------------------------------------------------------- 1 | #vel_x(@random(-0.5,0.5)) 2 | #vel_x(@random(-0.5,0.5)),#vel_y(@random(-0.5,0.5)) 3 | #vel_y(@random(-0.5,0.5)) 4 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_rot_wave.txt: -------------------------------------------------------------------------------- 1 | #rot(@wave_amplitude(0,@random(-30,30))) 2 | #rot(@wave_amplitude(0,@random(30,60))) 3 | #rot(@wave_amplitude(0,@random(-30,-60))) 4 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_rot_x_wave.txt: -------------------------------------------------------------------------------- 1 | #rot_x(@wave_amplitude(0,@random(-30,30))) 2 | #rot_x(@wave_amplitude(0,@random(30,60))) 3 | #rot_x(@wave_amplitude(0,@random(-30,-60))) 4 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_rot_y_wave.txt: -------------------------------------------------------------------------------- 1 | #rot_y(@wave_amplitude(0,@random(-30,30))) 2 | #rot_y(@wave_amplitude(0,@random(30,60))) 3 | #rot_y(@wave_amplitude(0,@random(-30,-60))) 4 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_beat_shake.txt: -------------------------------------------------------------------------------- 1 | $shake_x(250, 0.03) 2 | $shake_y(250, 0.03) 3 | $shake_rot(250, 5) 4 | $shake_rot_x(250, 10) 5 | $shake_rot_y(250, 15) 6 | $shake_zoom(250, 1.05) 7 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_beat_shake_weak.txt: -------------------------------------------------------------------------------- 1 | $shake_x(250, 0.01) 2 | $shake_y(250, 0.01) 3 | $shake_rot(250, 1.5) 4 | $shake_rot_x(250, 5) 5 | $shake_rot_y(250, 12) 6 | $shake_zoom(250, 1.025) 7 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_vel_fast.txt: -------------------------------------------------------------------------------- 1 | #vel_x(@random(1.0,2.0)) 2 | #vel_x(@random(1.0,2.0)),#vel_y(@random(1.0,2.0)) 3 | #vel_y(@random(1.0,2.0)) 4 | #vel_x(@random(-1.0,-2.0)) 5 | #vel_x(@random(-1.0,-2.0)),#vel_y(@random(-1.0,-2.0)) 6 | #vel_y(@random(-1.0,-2.0)) 7 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_zoom_wave.txt: -------------------------------------------------------------------------------- 1 | #zoom(@wave_amplitude(@random(0.8,1.0),@random(1.3,1.6))) 2 | #zoom(@wave_amplitude(@random(1.0,1.1),@random(0.5,0.8))) 3 | #zoom(@wave_amplitude(@random(1.3,1.6),@random(0.5,0.8))) 4 | #zoom(@wave_amplitude(@random(0.5,0.8),@random(1.3,1.6))) 5 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_prompt_action.txt: -------------------------------------------------------------------------------- 1 | (sitting:@wave_amplitude(1.5,0)) 2 | (dancing:@wave_amplitude(1.5,0)) 3 | (jump:@wave_amplitude(1.5,0)) 4 | (stylish posing:@wave_amplitude(1.5,0)) 5 | (folding arms:@wave_amplitude(1.5,0)) 6 | (V-sign in front of face:@wave_amplitude(1.5,0)) 7 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_center_wave.txt: -------------------------------------------------------------------------------- 1 | #center(@wave_amplitude(0.5,@random(0,1)),@wave_amplitude(0.5,@random(0,1))) 2 | #center(@wave_amplitude(@random(0,1),0.5),@wave_amplitude(@random(0,1),0.5)) 3 | #center(@wave_amplitude(@random(0,1),@random(0,1)),@wave_amplitude(@random(0,1),@random(0,1))) 4 | -------------------------------------------------------------------------------- /install.py: -------------------------------------------------------------------------------- 1 | import launch 2 | 3 | 4 | if not launch.is_installed("librosa"): 5 | launch.run_pip("install librosa", "requirements for sd_loopback_music_sync_wave") 6 | 7 | if not launch.is_installed("pydub"): 8 | launch.run_pip("install pydub", "requirements for sd_loopback_music_sync_wave") 9 | 10 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_prompt_face.txt: -------------------------------------------------------------------------------- 1 | (open mouth:@wave_amplitude(1.0,0)),(close mouth:@wave_amplitude(0,1.0)) 2 | (looking at viewer:@wave_amplitude(1.0,0)),(face in profile:@wave_amplitude(0,1.0)) 3 | (tongue out:@wave_amplitude(1.0,0)) 4 | (smile:@wave_amplitude(1.0,0)) 5 | (angry:@wave_amplitude(1.0,0)) 6 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_beat_other.txt: -------------------------------------------------------------------------------- 1 | $vibration(250, 1.05) 2 | $vibration(250, 0.95) 3 | $beat_blur(250, 30) 4 | $beat_slide_x(250, 0, 0.05, @random(0.2,0.8)) 5 | $beat_slide_x(250, 0, 0.05, @random(0.2,0.8)) 6 | $beat_slide_x(250, 1, -0.05, @random(0.2,0.8)) 7 | $beat_slide_x(250, 1, -0.05, @random(0.2,0.8)) 8 | $beat_slide_y(250, 0, 0.05, @random(0.2,0.8)) 9 | $beat_slide_y(250, 0, -0.05, @random(0.2,0.8)) 10 | $beat_slide_y(250, 1, 0.05, @random(0.2,0.8)) 11 | $beat_slide_y(250, 1, -0.05, @random(0.2,0.8)) 12 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_vel_wave.txt: -------------------------------------------------------------------------------- 1 | #vel_x(@wave_amplitude(0,@random(0.3,0.5))) 2 | #vel_x(@wave_amplitude(0,@random(0.3,0.5))),#vel_y(@wave_amplitude(0,@random(0.3,0.5))) 3 | #vel_x(@wave_amplitude(0,@random(0.3,0.5))),#vel_y(@wave_amplitude(0,@random(-0.3,-0.5))) 4 | #vel_y(@wave_amplitude(0,@random(0.3,0.5))) 5 | #vel_x(@wave_amplitude(0,@random(-0.3,-0.5))) 6 | #vel_x(@wave_amplitude(0,@random(-0.3,-0.5))),#vel_y(@wave_amplitude(0,@random(0.3,0.5))) 7 | #vel_x(@wave_amplitude(0,@random(-0.3,-0.5))),#vel_y(@wave_amplitude(0,@random(-0.3,-0.5))) 8 | #vel_y(@wave_amplitude(0,@random(-0.3,-0.5))) 9 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_1sec_random.txt: -------------------------------------------------------------------------------- 1 | $random_xy(1000, 0.2,0.2, 3000) 2 | $random_zoom(1000, 1.25, 3000) 3 | $random_rot(1000, 30, 3000) 4 | $random_rot_x(1000, 30, 3000) 5 | $random_rot_y(1000, 30, 3000) 6 | $random_zoom(1000, 1.25, 3000),$random_center(1000, 0.25, 0.25,0.5,0.5,3000) 7 | $random_rot(1000, 30, 3000),$random_center(1000, 0.25, 0.25,0.5,0.5,3000) 8 | $random_rot_x(1000, 30, 3000),$random_center(1000, 0.25, 0.25,0.5,0.5,3000) 9 | $random_rot_y(1000, 30, 3000),$random_center(1000, 0.25, 0.25,0.5,0.5,3000) 10 | $random_blur(1000, 10, 3000) 11 | $random_hue(1000, 9, 0, 60,3000) 12 | $random_slide_x(1000,0,0.25,0.5,0.5,3000) 13 | $random_slide_y(1000,0,0.25,0.5,0.5,3000) 14 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/regex.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | arg_regex = r'([\-]?[0-9]*\.?[0-9]+)' 5 | opt_regex = r'(?:\s*,\s*([\-]?[0-9]*\.?[0-9]+))?' 6 | 7 | 8 | def create_regex(prefix, name, num_of_args, num_of_opts=0): 9 | args = [arg_regex for x in range(num_of_args)] 10 | opts = [opt_regex for x in range(num_of_opts)] 11 | return prefix + name + r'\(\s*' +\ 12 | r'\s*,\s*'.join(args) +\ 13 | r''.join(opts) +\ 14 | r'\s*\)' 15 | 16 | arg_text_regex = r'\"([^\"]+)\"' 17 | opt_text_regex = r'(?:\s*,\s*\"([^\"]+)\")?' 18 | 19 | def create_regex_text(prefix, name, num_of_args, num_of_opts=0): 20 | args = [arg_text_regex for x in range(num_of_args)] 21 | opts = [opt_text_regex for x in range(num_of_opts)] 22 | return prefix + name + r'\(\s*' +\ 23 | r'\s*,\s*'.join(args) +\ 24 | r''.join(opts) +\ 25 | r'\s*\)' 26 | 27 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_1sec_pendulum.txt: -------------------------------------------------------------------------------- 1 | $pendulum_xy(1000, @random(0.1,0.2),@random(-0.1,-0.2),@random(0.1,0.2),@random(-0.1,-0.2)) 2 | $pendulum_rot(1000, @random(10,30),@random(-10,-30)) 3 | $pendulum_zoom(1000, @random(1.1,1.5),@random(0.5,0.9)) 4 | $pendulum_rot_x(1000, @random(10,30),@random(-10,-30)) 5 | $pendulum_rot_y(1000, @random(30,60),@random(-30,-60)) 6 | $pendulum_rot(1000, @random(10,30),@random(-10,-30)),$pendulum_center(1000, @random(0,1),@random(0,1),@random(0,1),@random(0,1)) 7 | $pendulum_zoom(1000, @random(1.1,1.5),@random(0.5,0.9)),$pendulum_center(1000, @random(0,1),@random(0,1),@random(0,1),@random(0,1)) 8 | $pendulum_rot_x(1000, @random(10,30),@random(-10,-30)),$pendulum_center(1000, @random(0,1),@random(0,1),@random(0,1),@random(0,1)) 9 | $pendulum_rot_y(1000, @random(30,60),@random(-30,-60)),$pendulum_center(1000, @random(0,1),@random(0,1),@random(0,1),@random(0,1)) 10 | $pendulum_hue(1000, 9, 0,@random(30,60)) 11 | $pendulum_hue(1000, 9, @random(-30,-60),@random(30,60)),#post_process(1) 12 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_slide_x.txt: -------------------------------------------------------------------------------- 1 | #slide_x(0,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_amplitude(0,1)) 2 | #slide_x(0,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_progress(0,1)) 3 | #slide_x(0,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_amplitude(1,0)) 4 | #slide_x(0,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_progress(1,0)) 5 | #slide_x(1,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_amplitude(0,1)) 6 | #slide_x(1,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_progress(0,1)) 7 | #slide_x(1,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_amplitude(1,0)) 8 | #slide_x(1,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_progress(1,0)) 9 | #slide_x(1,@wave_amplitude(@random(0.15,0.1),@random(-0.5,-0.3)),@wave_amplitude(0,1)) 10 | #slide_x(1,@wave_amplitude(@random(0.15,0.1),@random(-0.5,-0.3)),@wave_progress(0,1)) 11 | #slide_x(1,@wave_amplitude(@random(0.15,0.1),@random(-0.5,-0.3)),@wave_amplitude(1,0)) 12 | #slide_x(1,@wave_amplitude(@random(0.15,0.1),@random(-0.5,-0.3)),@wave_progress(1,0)) 13 | -------------------------------------------------------------------------------- /wildcards/lb_sample_wildcard/lb_slide_y.txt: -------------------------------------------------------------------------------- 1 | #slide_y(0,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_amplitude(0,1)) 2 | #slide_y(0,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_progress(0,1)) 3 | #slide_y(0,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_amplitude(1,0)) 4 | #slide_y(0,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_progress(1,0)) 5 | #slide_y(1,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_amplitude(0,1)) 6 | #slide_y(1,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_progress(0,1)) 7 | #slide_y(1,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_amplitude(1,0)) 8 | #slide_y(1,@wave_amplitude(@random(-0.15,-0.1),@random(0.5,0.3)),@wave_progress(1,0)) 9 | #slide_y(1,@wave_amplitude(@random(0.15,0.1),@random(-0.5,-0.3)),@wave_amplitude(0,1)) 10 | #slide_y(1,@wave_amplitude(@random(0.15,0.1),@random(-0.5,-0.3)),@wave_progress(0,1)) 11 | #slide_y(1,@wave_amplitude(@random(0.15,0.1),@random(-0.5,-0.3)),@wave_amplitude(1,0)) 12 | #slide_y(1,@wave_amplitude(@random(0.15,0.1),@random(-0.5,-0.3)),@wave_progress(1,0)) 13 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/wave_list_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from pydub import AudioSegment 4 | 5 | from scripts.loopback_music_sync_wave import str_to_wave_list 6 | 7 | 8 | def get_test_wav_dir(): 9 | t= os.path.join(os.path.dirname(os.path.realpath(__file__)), "..") 10 | t= os.path.join(t, "..") 11 | t = os.path.join(t, "wav") 12 | return os.path.normpath(t) 13 | 14 | def wave_list_test_process(audio_file:str, wave_list_str:str): 15 | #start_time,type,(strength) 16 | 17 | if (not audio_file) or (not os.path.isfile(audio_file)): 18 | print("File not found : ", audio_file) 19 | return None, " " 20 | 21 | wave_list = str_to_wave_list(wave_list_str) 22 | 23 | audio_name = os.path.splitext(os.path.basename(audio_file))[0] 24 | 25 | base_audio = AudioSegment.from_file(audio_file) 26 | test_audio = AudioSegment.from_file(os.path.join( get_test_wav_dir(), "metronome.wav" )) 27 | 28 | for w in wave_list: 29 | if w["start_msec"] == 0 or w["type"] == "end": 30 | continue 31 | base_audio = base_audio.overlay(test_audio, position=w["start_msec"]) 32 | 33 | audio_tmp_dir = os.path.join(get_test_wav_dir(), "tmp") 34 | os.makedirs(audio_tmp_dir, exist_ok=True) 35 | 36 | audio_tmp_file_path = os.path.join(audio_tmp_dir, audio_name + "_" + time.strftime("%Y%m%d-%H%M%S")+".mp3") 37 | 38 | base_audio.export(audio_tmp_file_path, format="mp3") 39 | 40 | 41 | return audio_tmp_file_path, " " 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/controlnet_web.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import requests 3 | from PIL import Image 4 | from io import BytesIO 5 | 6 | cn_detect_url = "http://127.0.0.1:7860/controlnet/detect" 7 | 8 | 9 | 10 | 11 | 12 | debug_c = 0 13 | def debug_save_img(img,comment): 14 | global debug_c 15 | img.save( f"scripts/testpngs/{debug_c}_{comment}.png") 16 | 17 | debug_c += 1 18 | 19 | 20 | 21 | def image_to_base64(img_path: str) -> str: 22 | with open(img_path, "rb") as img_file: 23 | img_base64 = base64.b64encode(img_file.read()).decode() 24 | return img_base64 25 | 26 | def pil_to_base64(img:Image, format="png") -> str: 27 | buffer = BytesIO() 28 | img.save(buffer, format) 29 | img_str = base64.b64encode(buffer.getvalue()).decode("ascii") 30 | 31 | return img_str 32 | 33 | def get_detectmap(preprocess_module_name, img:Image): 34 | 35 | payload = { 36 | "controlnet_module": preprocess_module_name, 37 | "controlnet_input_images": [pil_to_base64(img)], 38 | # "controlnet_processor_res": res, 39 | # "controlnet_threshold_a": th_a, 40 | # "controlnet_threshold_b:": th_b, 41 | } 42 | res = requests.post(cn_detect_url, json=payload) 43 | 44 | # print("res from cn : ",res) 45 | 46 | reply = res.json() 47 | 48 | if res.status_code == 200: 49 | print(reply["info"]) 50 | img64 = reply["images"][0] 51 | image_data = base64.b64decode(img64) 52 | image = Image.open(BytesIO(image_data)) 53 | return image 54 | else: 55 | return None 56 | 57 | 58 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/sam.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import requests 3 | from PIL import Image 4 | from io import BytesIO 5 | 6 | sam_url = "http://127.0.0.1:7860/sam/sam-predict" 7 | 8 | 9 | 10 | 11 | 12 | debug_c = 0 13 | def debug_save_img(img,comment): 14 | global debug_c 15 | img.save( f"scripts/testpngs/{debug_c}_{comment}.png") 16 | 17 | debug_c += 1 18 | 19 | 20 | 21 | def image_to_base64(img_path: str) -> str: 22 | with open(img_path, "rb") as img_file: 23 | img_base64 = base64.b64encode(img_file.read()).decode() 24 | return img_base64 25 | 26 | def pil_to_base64(img:Image, format="png") -> str: 27 | buffer = BytesIO() 28 | img.save(buffer, format) 29 | img_str = base64.b64encode(buffer.getvalue()).decode("ascii") 30 | 31 | return img_str 32 | 33 | def get_mask_from_sam(img:Image, prompt, box_th=0.3, padding=30): 34 | payload = { 35 | "input_image": pil_to_base64(img), 36 | "dino_enabled": True, 37 | "dino_text_prompt": prompt, 38 | "dino_preview_checkbox": False, 39 | "dino_box_threshold:": box_th, 40 | } 41 | res = requests.post(sam_url, json=payload) 42 | 43 | print("res from sam : ",res) 44 | 45 | reply = res.json() 46 | 47 | print(reply["msg"]) 48 | 49 | if res.status_code == 200: 50 | masks = [] 51 | for img64 in reply["masks"]: 52 | image_data = base64.b64decode(img64) 53 | image = Image.open(BytesIO(image_data)) 54 | masks.append(image.convert("L")) 55 | #debug_save_img(image, "sam") 56 | return masks 57 | else: 58 | return None 59 | 60 | 61 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/bpm.py: -------------------------------------------------------------------------------- 1 | import re 2 | import numpy as np 3 | 4 | # @@bpm123@12345[] 5 | bpm_regex = r'@@bpm([0-9\.]+)@([0-9]+)\[(.*?)\]' 6 | 7 | 8 | class BpmEvent: 9 | def __init__(self, fps, total_time): 10 | self.fps = fps 11 | self.total_time = int(total_time) 12 | self.events = {} 13 | 14 | def add_event(self, match_obj, start_time): 15 | bpm = 0 16 | prompt = "" 17 | #start_time = int(start_time) 18 | last_msec = self.total_time 19 | 20 | if match_obj.group(1) is not None: 21 | bpm = float(match_obj.group(1)) 22 | if match_obj.group(2) is not None: 23 | last_msec = min( start_time + int(match_obj.group(2)) , last_msec) 24 | if match_obj.group(3) is not None: 25 | prompt = match_obj.group(3) 26 | 27 | if bpm < 1 or not prompt: 28 | return "" 29 | 30 | msec_per_beat = (60 * 1000 / bpm) 31 | 32 | frames = [int(t * self.fps / 1000) for t in np.arange(start_time, last_msec, msec_per_beat)] 33 | 34 | print("bpm event : ",frames) 35 | 36 | for i in frames: 37 | if i in self.events: 38 | self.events[i] = self.events[i] + "," + prompt 39 | else: 40 | self.events[i] = prompt 41 | 42 | return "" 43 | 44 | def get_current_prompt(self, cur_time): 45 | 46 | cur_frame = int(cur_time * self.fps / 1000) 47 | prompt = "" 48 | if cur_frame in self.events: 49 | prompt = self.events[cur_frame] 50 | print("bpm prompt : ", prompt) 51 | 52 | return prompt 53 | 54 | def parse_prompt(self, prompt, cur_time): 55 | prompt = re.sub(bpm_regex, lambda x: self.add_event(x, cur_time), prompt) 56 | return prompt 57 | 58 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/upscale.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import PIL.Image 4 | from modules import processing,images 5 | from modules.processing import Processed 6 | 7 | def upscale(p, us_map, output_path, us_width, us_height, us_method, us_denoising_strength): 8 | 9 | if us_width == -1 and us_height == -1: 10 | return 11 | if us_method == 'None': 12 | return 13 | 14 | print("upscale start") 15 | 16 | calc_time_start = time.perf_counter() 17 | 18 | os.makedirs(output_path, exist_ok=True) 19 | 20 | org_w = p.width 21 | org_h = p.height 22 | 23 | if us_width != -1: 24 | us_width = us_width // 8 * 8 25 | if us_height != -1: 26 | us_height = us_height // 8 * 8 27 | 28 | if us_width == -1: 29 | us_width = int((us_height * org_w / org_h) // 8 * 8) 30 | elif us_height == -1: 31 | us_height = int((us_width * org_h / org_w) // 8 * 8) 32 | 33 | print("({0},{1}) upscale to ({2},{3})".format(org_w, org_h, us_width, us_height)) 34 | 35 | total = len(us_map) 36 | 37 | for i,img_no in enumerate(us_map): 38 | img_path = os.path.join(p.outpath_samples, f"{str(img_no).zfill(5)}.png") 39 | if not os.path.isfile(img_path): 40 | print("warning file not found : ",img_path) 41 | continue 42 | 43 | im = PIL.Image.open(img_path) 44 | _seed = us_map[img_no]["seed"] 45 | _prompt = us_map[img_no]["prompt"] 46 | _info = us_map[img_no]["info"] 47 | 48 | if us_method != 'latent': 49 | resized_img = images.resize_image(0, im, us_width, us_height, us_method ) 50 | else: 51 | p.resize_mode = 3 52 | p.width = us_width 53 | p.height = us_height 54 | p.init_images = [im] 55 | p.seed = _seed 56 | p.prompt = _prompt 57 | p.denoising_strength = us_denoising_strength 58 | 59 | processed = processing.process_images(p) 60 | 61 | resized_img = processed.images[0] 62 | 63 | images.save_image(resized_img, output_path, "", _seed, _prompt, info=_info, save_to_dirs=False, forced_filename=str(img_no).zfill(5), p=p) 64 | 65 | print(f"{i}/{total}") 66 | 67 | 68 | calc_time_end = time.perf_counter() 69 | print("upscale elapsed_time (sec) : ", calc_time_end - calc_time_start) 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/wave_generator.py: -------------------------------------------------------------------------------- 1 | 2 | from PIL import Image 3 | 4 | import scripts.util_sd_loopback_music_sync_wave.audio_analyzer 5 | 6 | def wave_generator_process(bpm: float, beat_per_wave:int, start_msec:int, end_msec:int, default_type:str, default_strength:float): 7 | #start_time,type,(strength) 8 | 9 | if start_msec >= end_msec: 10 | print("Error start_msec >= end_msec") 11 | return "",None," " 12 | 13 | wave_list = [] 14 | 15 | msec_per_beat = 60 * 1000 / bpm 16 | msec_per_wave = msec_per_beat * beat_per_wave 17 | 18 | print("msec_per_beat : ", msec_per_beat) 19 | print("msec_per_wave : ", msec_per_wave) 20 | 21 | cur = 0 22 | 23 | if cur < start_msec: 24 | wave_list.append( ( cur, "zero", 1.0 ) ) 25 | 26 | cur = start_msec 27 | 28 | while True: 29 | if cur + msec_per_wave >= end_msec: 30 | if cur + msec_per_wave > end_msec: 31 | wave_list.append( ( int(cur), "zero", 1.0 ) ) 32 | else: 33 | wave_list.append( ( int(cur), default_type, default_strength ) ) 34 | 35 | wave_list.append( ( end_msec, "end", 1.0 ) ) 36 | break 37 | 38 | wave_list.append( ( int(cur), default_type, default_strength ) ) 39 | 40 | cur += msec_per_wave 41 | 42 | wave_str_list=[] 43 | 44 | for w in wave_list: 45 | if w[1] in ("zero", "end") or w[2] == 1.0: 46 | wave_str_list.append( f"{w[0]},{w[1]}" ) 47 | else: 48 | wave_str_list.append( f"{w[0]},{w[1]},{w[2]}" ) 49 | 50 | print(wave_str_list) 51 | 52 | fig = scripts.util_sd_loopback_music_sync_wave.audio_analyzer.create_figure([x[0]/1000 for x in wave_list]) 53 | 54 | return "\n".join(wave_str_list), fig, " " 55 | 56 | 57 | def f2w_generator_process(fps:int, default_type:str, default_strength:float, f2w_frame_list_txt:str): 58 | f2w_frame_list_txt = f2w_frame_list_txt.strip() 59 | 60 | frames = f2w_frame_list_txt.split(",") 61 | 62 | wave_list=[] 63 | 64 | for i,f in enumerate( frames ): 65 | msec = int(f) * 1000 / fps 66 | 67 | if i == 0 and msec != 0: 68 | wave_list.append( ( 0, default_type, default_strength ) ) 69 | 70 | wave_list.append( ( int(msec), default_type, default_strength ) ) 71 | 72 | wave_list[-1] = (wave_list[-1][0],"end") 73 | 74 | wave_str_list=[] 75 | 76 | for w in wave_list: 77 | if w[1] in ("zero", "end") or w[2] == 1.0: 78 | wave_str_list.append( f"{w[0]},{w[1]}" ) 79 | else: 80 | wave_str_list.append( f"{w[0]},{w[1]},{w[2]}" ) 81 | 82 | return "\n".join(wave_str_list)," " 83 | 84 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/prompt_test.py: -------------------------------------------------------------------------------- 1 | 2 | import statistics 3 | from PIL import Image 4 | 5 | import scripts.util_sd_loopback_music_sync_wave.audio_analyzer 6 | import scripts.loopback_music_sync_wave 7 | 8 | 9 | def prompt_test_process(wave_list_str:str, extend_prompts:str): 10 | 11 | wave_list_str = wave_list_str.strip() 12 | if not wave_list_str: 13 | print("Error wave_list_str empty") 14 | return None, " " 15 | extend_prompts = extend_prompts.strip() 16 | if not extend_prompts: 17 | print("Error extend_prompts empty") 18 | return None, " " 19 | 20 | fps = 24 21 | initial_denoising_strength=0 22 | denoising_strength_change_amplitude=1.0 23 | 24 | stat_map = scripts.loopback_music_sync_wave.fake_run( wave_list_str, extend_prompts, fps, initial_denoising_strength, denoising_strength_change_amplitude ) 25 | 26 | times = [] 27 | dstrs = [] 28 | affine_velx = [] 29 | affine_vely = [] 30 | affine_rot = [] 31 | affine_zoom = [] 32 | affine_cx = [] 33 | affine_cy = [] 34 | affine_rot_x = [] 35 | affine_rot_y = [] 36 | 37 | slide_x_spd = [] 38 | slide_x_pos = [] 39 | slide_y_spd = [] 40 | slide_y_pos = [] 41 | 42 | other_blur = [] 43 | other_hue = [] 44 | 45 | ''' 46 | "prompt":new_prompt, 47 | "denoising_strength":denoising_strength, 48 | "affine_input":affine_input, #affine_input = [_velx,_vely,_rot,_zoom,_cx,_cy, _rot_x, _rot_y] 49 | "slide_inputs":slide_inputs, #slide_inputs = [(int(slide_x_type[-1]), slide_x_speed[-1]/ fps, slide_x_border[-1]), (int(slide_y_type[-1]), slide_y_speed[-1]/ fps, slide_y_border[-1]) ] 50 | "other_effect_input":other_effect_input, #other_effect_input = [_blur_str,_hue_type,_hue_angle] 51 | ''' 52 | 53 | for t in stat_map: 54 | times.append(t) 55 | item = stat_map[t] 56 | dstrs.append(item["denoising_strength"]) 57 | affine_velx.append(item["affine_input"][0] * fps) 58 | affine_vely.append(item["affine_input"][1] * fps) 59 | affine_rot.append(item["affine_input"][2] * fps) 60 | affine_zoom.append( 1.0 + (item["affine_input"][3] - 1.0) * fps) 61 | affine_cx.append(item["affine_input"][4]) 62 | affine_cy.append(item["affine_input"][5]) 63 | affine_rot_x.append(item["affine_input"][6] * fps) 64 | affine_rot_y.append(item["affine_input"][7] * fps) 65 | 66 | slide_x_spd.append(item["slide_inputs"][0][1] * fps) 67 | slide_x_pos.append(item["slide_inputs"][0][2]) 68 | slide_y_spd.append(item["slide_inputs"][1][1] * fps) 69 | slide_y_pos.append(item["slide_inputs"][1][2]) 70 | 71 | other_blur.append(item["other_effect_input"][0]) 72 | other_hue.append(item["other_effect_input"][2]) 73 | 74 | def standardization(l): 75 | l_mean = statistics.mean(l) 76 | l_stdev = statistics.stdev(l) 77 | if l_stdev == 0: 78 | return [0 for i in l] 79 | return [(i - l_mean) / l_stdev for i in l] 80 | def normalization(l): 81 | l_min = min(l) 82 | l_max = max(l) 83 | if l_max == l_min: 84 | return [0 for i in l] 85 | return [(i - l_min) / (l_max - l_min) for i in l] 86 | 87 | affine_velx = normalization(affine_velx) 88 | affine_vely = normalization(affine_vely) 89 | affine_rot = normalization(affine_rot) 90 | affine_zoom = normalization(affine_zoom) 91 | affine_cx = normalization(affine_cx) 92 | affine_cy = normalization(affine_cy) 93 | affine_rot_x = normalization(affine_rot_x) 94 | affine_rot_y = normalization(affine_rot_y) 95 | 96 | slide_x_spd = normalization(slide_x_spd) 97 | slide_x_pos = normalization(slide_x_pos) 98 | slide_y_spd = normalization(slide_y_spd) 99 | slide_y_pos = normalization(slide_y_pos) 100 | 101 | other_blur = normalization(other_blur) 102 | other_hue = normalization(other_hue) 103 | 104 | wave_list = scripts.loopback_music_sync_wave.str_to_wave_list(wave_list_str) 105 | 106 | plot_data = { 107 | "time" : times, 108 | "wave" : [x["start_msec"]/1000 for x in wave_list], 109 | "data" : { 110 | "denoising_strength" : dstrs, 111 | "vel x" : affine_velx, 112 | "vel y" : affine_vely, 113 | "rotate" : affine_rot, 114 | "zoom" : affine_zoom, 115 | "center x" : affine_cx, 116 | "center y" : affine_cy, 117 | "rotate_x" : affine_rot_x, 118 | "rotate_y" : affine_rot_y, 119 | 120 | "slide_x_vel" : slide_x_spd, 121 | "slide_x_pos" : slide_x_pos, 122 | "slide_y_vel" : slide_y_spd, 123 | "slide_y_pos" : slide_y_pos, 124 | 125 | "other_blur_str" : other_blur, 126 | "other_hue_angle" : other_hue, 127 | } 128 | } 129 | 130 | fig = scripts.util_sd_loopback_music_sync_wave.audio_analyzer.create_prompt_figure(plot_data) 131 | 132 | return fig, " " 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/affine.py: -------------------------------------------------------------------------------- 1 | # https://imagingsolution.net/program/python/opencv-python/opencv_python_affine_transformation/ 2 | 3 | import cv2 4 | import numpy as np 5 | from PIL import Image 6 | 7 | def scaleMatrix(scale): 8 | mat = identityMatrix() 9 | mat[0,0] = scale 10 | mat[1,1] = scale 11 | 12 | return mat 13 | 14 | def scaleXYMatrix(sx, sy): 15 | mat = identityMatrix() 16 | mat[0,0] = sx 17 | mat[1,1] = sy 18 | 19 | return mat 20 | 21 | def translateMatrix(tx, ty): 22 | mat = identityMatrix() 23 | mat[0,2] = tx 24 | mat[1,2] = ty 25 | 26 | return mat 27 | 28 | def rotateMatrix(deg): 29 | mat = identityMatrix() 30 | rad = np.deg2rad(deg) 31 | sin = np.sin(rad) 32 | cos = np.cos(rad) 33 | 34 | mat[0,0] = cos 35 | mat[0,1] = -sin 36 | mat[1,0] = sin 37 | mat[1,1] = cos 38 | 39 | return mat 40 | 41 | def scaleAtMatrix(scale, cx, cy): 42 | mat = translateMatrix(-cx, -cy) 43 | mat = scaleMatrix(scale).dot(mat) 44 | mat = translateMatrix(cx, cy).dot(mat) 45 | 46 | return mat 47 | 48 | def rotateAtMatrix(deg, cx, cy): 49 | mat = translateMatrix(-cx, -cy) 50 | mat = rotateMatrix(deg).dot(mat) 51 | mat = translateMatrix(cx, cy).dot(mat) 52 | 53 | return mat 54 | 55 | def afiinePoint(mat, px, py): 56 | 57 | srcPoint = np.array([px, py, 1]) 58 | 59 | return mat.dot(srcPoint)[:2] 60 | 61 | def inverse(mat): 62 | return np.linalg.inv(mat) 63 | 64 | def identityMatrix(): 65 | return np.eye(3, dtype = np.float32) 66 | 67 | # https://github.com/eborboihuc/rotate_3d/blob/master/image_transformer.py 68 | def getProjectionMatrix(theta, phi, gamma, dx, dy, scale, cx, cy, width, height): 69 | 70 | theta, phi, gamma = np.deg2rad([theta, phi, gamma]) 71 | 72 | d = np.sqrt(height**2 + width**2) 73 | focal = d / (2 * np.sin(gamma) if np.sin(gamma) != 0 else 1) 74 | dz = focal / scale 75 | 76 | w = width 77 | h = height 78 | f = focal 79 | 80 | # Projection 2D -> 3D matrix 81 | A1 = np.array([ [1, 0, -cx], 82 | [0, 1, -cy], 83 | [0, 0, 1], 84 | [0, 0, 1]]) 85 | 86 | # Rotation matrices around the X, Y, and Z axis 87 | RX = np.array([ [1, 0, 0, 0], 88 | [0, np.cos(theta), -np.sin(theta), 0], 89 | [0, np.sin(theta), np.cos(theta), 0], 90 | [0, 0, 0, 1]]) 91 | 92 | RY = np.array([ [np.cos(phi), 0, -np.sin(phi), 0], 93 | [0, 1, 0, 0], 94 | [np.sin(phi), 0, np.cos(phi), 0], 95 | [0, 0, 0, 1]]) 96 | 97 | RZ = np.array([ [np.cos(gamma), -np.sin(gamma), 0, 0], 98 | [np.sin(gamma), np.cos(gamma), 0, 0], 99 | [0, 0, 1, 0], 100 | [0, 0, 0, 1]]) 101 | 102 | # Composed rotation matrix with (RX, RY, RZ) 103 | R = np.dot(np.dot(RX, RY), RZ) 104 | 105 | # Translation matrix 106 | T = np.array([ [1, 0, 0, dx], 107 | [0, 1, 0, dy], 108 | [0, 0, 1, dz], 109 | [0, 0, 0, 1]]) 110 | 111 | # Projection 3D -> 2D matrix 112 | A2 = np.array([ [f, 0, cx, 0], 113 | [0, f, cy, 0], 114 | [0, 0, 1, 0]]) 115 | 116 | # Final transformation matrix 117 | return np.dot(A2, np.dot(T, np.dot(R, A1))) 118 | 119 | 120 | 121 | def AffineImage(img:Image, x,y,angle,scale,cx,cy,angle_x,angle_y): 122 | if x == 0 and y == 0 and angle == 0 and scale == 1 and angle_x == 0 and angle_y == 0: 123 | return img 124 | 125 | img_array = np.asarray(img) 126 | h, w, c = img_array.shape 127 | 128 | x = w * x 129 | y = h * y 130 | cx = w * cx 131 | cy = h * (1-cy) 132 | ''' 133 | matAffine = translateMatrix(-cx, -cy) 134 | matAffine = scaleMatrix(scale).dot(matAffine) 135 | matAffine = rotateMatrix(-angle).dot(matAffine) 136 | matAffine = translateMatrix(cx, cy).dot(matAffine) 137 | matAffine = translateMatrix(x, -y).dot(matAffine) 138 | ''' 139 | matAffine = getProjectionMatrix(angle_x, angle_y, angle, x, -y, scale, cx,cy, w, h) 140 | 141 | if scale >= 1.0: 142 | interpolation = interpolation=cv2.INTER_CUBIC 143 | else: 144 | interpolation = interpolation=cv2.INTER_AREA 145 | 146 | # img_array = cv2.warpAffine(img_array, matAffine[:2,], (w, h), borderMode=cv2.BORDER_CONSTANT, flags=interpolation) 147 | img_array = cv2.warpPerspective(img_array, matAffine, (w, h), borderMode=cv2.BORDER_CONSTANT, flags=interpolation) 148 | 149 | return Image.fromarray(img_array) 150 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/other_effect.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import cv2 3 | import numpy as np 4 | 5 | debug_c = 0 6 | def debug_save_img(img,comment): 7 | global debug_c 8 | im = Image.fromarray(img) 9 | im.save( f"scripts/testpngs/{debug_c}_{comment}.png") 10 | 11 | debug_c += 1 12 | 13 | # https://note.nkmk.me/python-numpy-generate-gradation-image/ 14 | def get_gradient_2d(start, stop, width, height, is_horizontal): 15 | if is_horizontal: 16 | return np.tile(np.linspace(start, stop, width), (height, 1)) 17 | else: 18 | return np.tile(np.linspace(start, stop, height), (width, 1)).T 19 | 20 | def get_gradient_3d(width, height, start_list, stop_list, is_horizontal_list): 21 | result = np.zeros((height, width, len(start_list)), dtype=np.float) 22 | 23 | for i, (start, stop, is_horizontal) in enumerate(zip(start_list, stop_list, is_horizontal_list)): 24 | result[:, :, i] = get_gradient_2d(start, stop, width, height, is_horizontal) 25 | 26 | return result 27 | 28 | def get_gradient_circular_2d(start, stop, width, height): 29 | x_axis = np.linspace(-1, 1, width)[None,:] 30 | y_axis = np.linspace(-1, 1, height)[:,None] 31 | arr = np.sqrt(x_axis ** 2 + y_axis ** 2) 32 | 33 | inner = np.array([start])[None, None, :] 34 | outer = np.array([stop])[None, None, :] 35 | arr /= arr.max() 36 | arr = arr[:, :, None] 37 | arr = arr * outer + (1 - arr) * inner 38 | 39 | return arr 40 | 41 | _gradiention_mask = None 42 | _gradiention_mask_stat = [-1,0,0] 43 | 44 | def initialize_cache(): 45 | global _gradiention_mask 46 | global _gradiention_mask_stat 47 | _gradiention_mask = None 48 | _gradiention_mask_stat = [-1,0,0] 49 | 50 | 51 | def get_gradient_mask(mask_type, w, h): 52 | global _gradiention_mask 53 | global _gradiention_mask_stat 54 | 55 | if mask_type != _gradiention_mask_stat[0] or \ 56 | w != _gradiention_mask_stat[1] or \ 57 | h != _gradiention_mask_stat[2] or \ 58 | _gradiention_mask is None: 59 | 60 | _gradiention_mask_stat = [mask_type, w, h] 61 | 62 | # "None","R","L","D","U","RD","LD","RU","LU","C Out","C In" 63 | if mask_type == 1: # R 64 | mask = get_gradient_2d(0.0,1.0,w,h,True) 65 | mask = mask.reshape(*mask.shape, 1) 66 | elif mask_type == 2: # L 67 | mask = get_gradient_2d(1.0,0.0,w,h,True) 68 | mask = mask.reshape(*mask.shape, 1) 69 | elif mask_type == 3: # D 70 | mask = get_gradient_2d(0.0,1.0,w,h,False) 71 | mask = mask.reshape(*mask.shape, 1) 72 | elif mask_type == 4: # U 73 | mask = get_gradient_2d(1.0,0.0,w,h,False) 74 | mask = mask.reshape(*mask.shape, 1) 75 | elif mask_type == 5: # RD 76 | mask1 = get_gradient_2d(0.0,1.0,w,h,True) 77 | mask2 = get_gradient_2d(0.0,1.0,w,h,False) 78 | mask1 = mask1 * mask2 79 | mask1 /= mask1.max() 80 | mask = mask1.reshape(*mask1.shape, 1) 81 | elif mask_type == 6: # LD 82 | mask1 = get_gradient_2d(1.0,0.0,w,h,True) 83 | mask2 = get_gradient_2d(0.0,1.0,w,h,False) 84 | mask1 = mask1 * mask2 85 | mask1 /= mask1.max() 86 | mask = mask1.reshape(*mask1.shape, 1) 87 | elif mask_type == 7: # RU 88 | mask1 = get_gradient_2d(0.0,1.0,w,h,True) 89 | mask2 = get_gradient_2d(1.0,0.0,w,h,False) 90 | mask1 = mask1 * mask2 91 | mask1 /= mask1.max() 92 | mask = mask1.reshape(*mask1.shape, 1) 93 | elif mask_type == 8: # LU 94 | mask1 = get_gradient_2d(1.0,0.0,w,h,True) 95 | mask2 = get_gradient_2d(1.0,0.0,w,h,False) 96 | mask1 = mask1 * mask2 97 | mask1 /= mask1.max() 98 | mask = mask1.reshape(*mask1.shape, 1) 99 | elif mask_type == 9: # C Out 100 | mask = get_gradient_circular_2d(0.0,1.0,w,h) 101 | mask = mask * mask 102 | else: # C In 103 | mask = get_gradient_circular_2d(1.0,0.0,w,h) 104 | mask = mask * mask 105 | 106 | _gradiention_mask = mask 107 | 108 | return _gradiention_mask 109 | 110 | 111 | 112 | 113 | def apply_blur(img_array, blur_str): 114 | blur_str = max( int(blur_str), 0) 115 | if blur_str == 0: 116 | return img_array 117 | blur_str = (blur_str//2)*2 + 1 118 | 119 | img_array = cv2.GaussianBlur(img_array,(blur_str,blur_str),0,cv2.BORDER_DEFAULT) 120 | 121 | return img_array 122 | 123 | 124 | def apply_hue_gradiation(img_array, gradiation_type, hue): 125 | gradiation_type = max(int(gradiation_type), 0) 126 | hue = int(hue) 127 | # 0 <= hue < 180 128 | hue %= 180 129 | 130 | hsv = cv2.cvtColor(img_array, cv2.COLOR_RGB2HSV) 131 | hsv = hsv.astype(np.uint16) 132 | hsv[:,:,0] = (hsv[:,:,0]+hue)%180 133 | hsv = hsv.astype(np.uint8) 134 | colored = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) 135 | if gradiation_type == 0: 136 | return colored.astype(np.uint8) 137 | 138 | h,w,_ = img_array.shape 139 | 140 | mask = get_gradient_mask(gradiation_type, w, h) 141 | 142 | #output = np.zeros((h,w,3), np.uint16) 143 | output = img_array * (1 - mask) + colored * mask 144 | return output.astype(np.uint8) 145 | 146 | 147 | def apply_other_effect(img:Image, blur_str, hue_type, hue ): 148 | 149 | img_array = np.array(img) 150 | 151 | if blur_str != 0: 152 | img_array = apply_blur(img_array, blur_str) 153 | 154 | if hue_type != -1: 155 | img_array = apply_hue_gradiation(img_array, hue_type, hue) 156 | #debug_save_img(img_array,"hue") 157 | 158 | return Image.fromarray(img_array) 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/controlnet.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import numpy as np 3 | import json 4 | import os 5 | from PIL import Image 6 | 7 | from scripts.util_sd_loopback_music_sync_wave.controlnet_web import get_detectmap 8 | 9 | cn_stat={ 10 | "initialized" : False, 11 | "external_code" : None, 12 | "controlnet_units" : None, 13 | "current_stat" : True, 14 | "cache_dir" : "", 15 | "controlnet_modules" : [], 16 | "controlnet_images" : [] 17 | } 18 | 19 | reference_only_list = [ 20 | "reference_only", 21 | "reference_adain", 22 | "reference_adain+attn", 23 | ] 24 | 25 | 26 | def get_cache(module_name, img_path): 27 | if not cn_stat["cache_dir"]: 28 | return None 29 | 30 | if module_name == "none": 31 | return None 32 | 33 | basename = os.path.basename(img_path) 34 | module_path = os.path.join(cn_stat["cache_dir"], module_name) 35 | cache_path = os.path.join(module_path, basename) 36 | 37 | if not os.path.isfile(cache_path): 38 | os.makedirs(module_path, exist_ok=True) 39 | det = get_detectmap( module_name, Image.open(img_path) ) 40 | det.save(cache_path) 41 | 42 | return Image.open(cache_path) 43 | 44 | 45 | def get_external_code(): 46 | if cn_stat["external_code"]: 47 | return cn_stat["external_code"] 48 | try: 49 | if importlib.util.find_spec('extensions.sd-webui-controlnet.scripts.external_code'): 50 | cn_stat["external_code"] = importlib.import_module('extensions.sd-webui-controlnet.scripts.external_code', 'external_code') 51 | except Exception as e: 52 | print(e) 53 | print("import controlnet failed.") 54 | return cn_stat["external_code"] 55 | 56 | def load_unit(path): 57 | external_code = get_external_code() 58 | if not external_code: 59 | return 60 | 61 | params = {} 62 | with open(path, "r") as f: 63 | params = json.load(f) 64 | 65 | try: 66 | for i,(key,c) in enumerate( zip(params,cn_stat["controlnet_units"])): 67 | cn_stat["controlnet_units"][i] = external_code.ControlNetUnit(**params[key]) 68 | except Exception as e: 69 | print(e) 70 | print("load controlnet unit failed.") 71 | 72 | 73 | 74 | def initialize(p, cache_dir, dump_path): 75 | cn_stat["current_stat"] = True 76 | 77 | external_code = get_external_code() 78 | if not external_code: 79 | return 80 | 81 | cn_stat["controlnet_units"] = external_code.get_all_units_in_processing(p) 82 | 83 | if dump_path and os.path.isfile(dump_path): 84 | load_unit(dump_path) 85 | 86 | cn_stat["cache_dir"] = cache_dir 87 | 88 | if cache_dir: 89 | os.makedirs(cache_dir, exist_ok=True) 90 | 91 | if cn_stat["controlnet_units"]: 92 | cn_stat["controlnet_modules"] = [i.module for i in cn_stat["controlnet_units"]] 93 | cn_stat["controlnet_images"] = [i.image for i in cn_stat["controlnet_units"]] 94 | cn_stat["initialized"] = True 95 | 96 | print("controlnet found : ", cn_stat["initialized"]) 97 | 98 | def dump(path): 99 | if not cn_stat["initialized"]: 100 | return 101 | 102 | d = {} 103 | for i, c in enumerate(cn_stat["controlnet_units"]): 104 | d[i] = vars(c) 105 | d[i]["image"] = None 106 | 107 | def default_func(o): 108 | return str(o) 109 | 110 | with open(path, 'w') as f: 111 | json.dump(d, f, indent=4, default=default_func) 112 | 113 | 114 | def enable_controlnet(p, input_info): 115 | if not cn_stat["initialized"]: 116 | return 117 | 118 | img_path, input_for_ref_only = input_info 119 | 120 | external_code = get_external_code() 121 | if not external_code: 122 | return 123 | 124 | for i,c in enumerate(cn_stat["controlnet_units"]): 125 | if c.enabled: 126 | if cn_stat["controlnet_modules"][i] in reference_only_list: 127 | # c.image priority list for reference_*** 128 | # 1. Image specified in UI 129 | # 2. 1st input img or prev frame img 130 | 131 | c.module = cn_stat["controlnet_modules"][i] 132 | if cn_stat["controlnet_images"][i]: 133 | c.image = cn_stat["controlnet_images"][i] 134 | else: 135 | c.image = np.array(input_for_ref_only) 136 | c.resize_mode = 0 137 | else: 138 | # c.image priority list 139 | # 1. Image specified in UI 140 | # 2. preprocessed cache 141 | # 3. video frame (for preprocessor "none") 142 | # 4. None 143 | if cn_stat["controlnet_images"][i]: 144 | c.image = cn_stat["controlnet_images"][i] 145 | c.module = cn_stat["controlnet_modules"][i] 146 | 147 | else: 148 | if img_path is not None: 149 | cache = get_cache( cn_stat["controlnet_modules"][i], img_path) 150 | 151 | if cache: 152 | img = cache 153 | c.module = "none" 154 | else: 155 | img = Image.open(img_path) 156 | c.module = cn_stat["controlnet_modules"][i] 157 | 158 | c.image = np.array(img) 159 | c.resize_mode = 0 160 | else: 161 | c.image = None 162 | c.module = cn_stat["controlnet_modules"][i] 163 | 164 | print("enable_controlnet") 165 | 166 | external_code.update_cn_script_in_processing(p, cn_stat["controlnet_units"]) 167 | 168 | cn_stat["current_stat"] = True 169 | 170 | def disable_controlnet(p): 171 | if not cn_stat["initialized"]: 172 | return 173 | 174 | if cn_stat["current_stat"] == False: 175 | return 176 | 177 | external_code = get_external_code() 178 | if not external_code: 179 | return 180 | 181 | print("disable_controlnet") 182 | 183 | external_code.update_cn_script_in_processing(p, []) 184 | 185 | cn_stat["current_stat"] = False 186 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/slide.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from PIL import Image 4 | 5 | debug_c = 0 6 | 7 | def debug_save_img_array(img_array, comment): 8 | debug_save_img( Image.fromarray(img_array), comment) 9 | 10 | def debug_save_img(img:Image,comment): 11 | global debug_c 12 | img.save( f"scripts/testpngs/{debug_c}_{comment}.png") 13 | 14 | debug_c += 1 15 | 16 | def slide_image(img_array, slide_type, slide_val, slide_direction, border, is_y ): 17 | h, w, c = img_array.shape 18 | output = output = np.zeros((h, w, 4), np.uint8) 19 | 20 | if slide_type == 0: 21 | # open / close 22 | if slide_direction > 0: 23 | dst_start = 0 24 | dst_end = border - slide_val 25 | src_start = slide_val 26 | src_end = border 27 | if src_start < src_end: 28 | if is_y: 29 | output[:,dst_start:dst_end] = img_array[:,src_start:src_end] 30 | else: 31 | output[dst_start:dst_end,:] = img_array[src_start:src_end,:] 32 | 33 | dst_start = border + slide_val 34 | dst_end = w if is_y else h 35 | src_start = border 36 | src_end = (w if is_y else h) - slide_val 37 | if src_start < src_end: 38 | if is_y: 39 | output[:,dst_start:dst_end] = img_array[:,src_start:src_end] 40 | else: 41 | output[dst_start:dst_end,:] = img_array[src_start:src_end,:] 42 | 43 | else: 44 | dst_start = slide_val 45 | dst_end = border 46 | src_start = 0 47 | src_end = border - slide_val 48 | if src_start < src_end: 49 | if is_y: 50 | output[:,dst_start:dst_end] = img_array[:,src_start:src_end] 51 | else: 52 | output[dst_start:dst_end,:] = img_array[src_start:src_end,:] 53 | 54 | dst_start = border 55 | dst_end = (w if is_y else h) - slide_val 56 | src_start = border + slide_val 57 | src_end = w if is_y else h 58 | if src_start < src_end: 59 | if is_y: 60 | output[:,dst_start:dst_end] = img_array[:,src_start:src_end] 61 | else: 62 | output[dst_start:dst_end,:] = img_array[src_start:src_end,:] 63 | else: 64 | # cross 65 | if slide_direction > 0: 66 | dst_start = 0 67 | dst_end = (h if is_y else w) - slide_val 68 | src_start = slide_val 69 | src_end = (h if is_y else w) 70 | if src_start < src_end: 71 | if is_y: 72 | output[dst_start:dst_end,:border] = img_array[src_start:src_end,:border] 73 | else: 74 | output[:border,dst_start:dst_end] = img_array[:border,src_start:src_end] 75 | 76 | dst_start = slide_val 77 | dst_end = (h if is_y else w) 78 | src_start = 0 79 | src_end = (h if is_y else w) - slide_val 80 | if src_start < src_end: 81 | if is_y: 82 | output[dst_start:dst_end,border:] = img_array[src_start:src_end,border:] 83 | else: 84 | output[border:,dst_start:dst_end] = img_array[border:,src_start:src_end] 85 | 86 | else: 87 | dst_start = slide_val 88 | dst_end = (h if is_y else w) 89 | src_start = 0 90 | src_end = (h if is_y else w) - slide_val 91 | if src_start < src_end: 92 | if is_y: 93 | output[dst_start:dst_end,:border] = img_array[src_start:src_end,:border] 94 | else: 95 | output[:border,dst_start:dst_end] = img_array[:border,src_start:src_end] 96 | 97 | dst_start = 0 98 | dst_end = (h if is_y else w) - slide_val 99 | src_start = slide_val 100 | src_end = (h if is_y else w) 101 | if src_start < src_end: 102 | if is_y: 103 | output[dst_start:dst_end,border:] = img_array[src_start:src_end,border:] 104 | else: 105 | output[border:,dst_start:dst_end] = img_array[border:,src_start:src_end] 106 | 107 | return output 108 | 109 | 110 | def slide_y_image(img_array, slide_type, slide_val, slide_border_pos): 111 | h, w, c = img_array.shape 112 | 113 | if slide_type == -1: 114 | return img_array 115 | 116 | slide_border_pos = min(max(0, slide_border_pos), 1) 117 | 118 | border = int(slide_border_pos * w) 119 | slide_direction = 1 if slide_val > 0 else -1 120 | 121 | if slide_type == 0: 122 | # open / close 123 | if slide_val > 0: 124 | slide_val = int(w * slide_val) 125 | 126 | else: 127 | slide_val = -1 * int(w * slide_val) 128 | 129 | else: 130 | # up /down 131 | if slide_val > 0: 132 | slide_val = int(h * slide_val) 133 | 134 | else: 135 | slide_val = -1 * int(h * slide_val) 136 | 137 | output = slide_image(img_array, slide_type, slide_val, slide_direction, border, True) 138 | 139 | #debug_save_img_array(img_array, "y_input") 140 | #debug_save_img_array(output, "y_slide") 141 | return output 142 | 143 | def slide_x_image(img_array, slide_type, slide_val, slide_border_pos): 144 | h, w, c = img_array.shape 145 | 146 | if slide_type == -1: 147 | return img_array 148 | 149 | slide_border_pos = min(max(0, slide_border_pos), 1) 150 | 151 | border = int(slide_border_pos * h) 152 | slide_direction = 1 if slide_val > 0 else -1 153 | 154 | if slide_type == 0: 155 | # open / close 156 | if slide_val > 0: 157 | slide_val = int(h * slide_val) 158 | 159 | else: 160 | slide_val = -1 * int(h * slide_val) 161 | 162 | else: 163 | # cross 164 | if slide_val > 0: 165 | slide_val = int(w * slide_val) 166 | 167 | else: 168 | slide_val = -1 * int(w * slide_val) 169 | 170 | output = slide_image(img_array, slide_type, slide_val, slide_direction, border, False) 171 | 172 | #debug_save_img_array(img_array, "x_input") 173 | #debug_save_img_array(output, "x_slide") 174 | return output 175 | 176 | def SlideImage(img:Image, slide_x_inputs, slide_y_inputs): 177 | 178 | img_array = np.asarray(img) 179 | h, w, c = img_array.shape 180 | 181 | img_array = slide_x_image(img_array, *slide_x_inputs) 182 | 183 | img_array = slide_y_image(img_array, *slide_y_inputs) 184 | 185 | return Image.fromarray(img_array) 186 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/frame_extractor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import glob 4 | import platform 5 | import shutil 6 | 7 | from scripts.loopback_music_sync_wave import str_to_wave_list,run_cmd 8 | import scripts.util_sd_loopback_music_sync_wave.raft 9 | 10 | def remove_pngs_in_dir(path): 11 | if not os.path.isdir(path): 12 | return 13 | pngs = glob.glob( os.path.join(path, "*.png") ) 14 | for png in pngs: 15 | os.remove(png) 16 | 17 | def frame_extract_all(fe_project_dir:str, fe_movie_path:str, fe_ffmpeg_path:str, all_extract_fps:int): 18 | 19 | if (not fe_project_dir) or (not os.path.isdir(fe_project_dir)): 20 | print("Directory not found : ", fe_project_dir) 21 | return " " 22 | 23 | if (not fe_movie_path) or (not os.path.isfile(fe_movie_path)): 24 | print("File not found : ", fe_movie_path) 25 | return " " 26 | 27 | extract_dir = os.path.join(os.path.join(fe_project_dir, "video_frame"), f"{all_extract_fps}") 28 | 29 | os.makedirs(extract_dir, exist_ok=True) 30 | 31 | remove_pngs_in_dir(extract_dir) 32 | 33 | args = [ 34 | "-i", fe_movie_path, 35 | "-start_number", 0, 36 | "-vf", 37 | f"fps={all_extract_fps}", 38 | os.path.join(extract_dir, "%05d.png") 39 | ] 40 | 41 | if(fe_ffmpeg_path == ""): 42 | fe_ffmpeg_path = "ffmpeg" 43 | if(platform.system == "Windows"): 44 | fe_ffmpeg_path += ".exe" 45 | 46 | run_cmd([fe_ffmpeg_path] + args) 47 | 48 | return " " 49 | 50 | def frame_extract_one(fe_project_dir:str, fe_movie_path:str, fe_ffmpeg_path:str, fe_fps:int): 51 | 52 | if (not fe_project_dir) or (not os.path.isdir(fe_project_dir)): 53 | print("Directory not found : ", fe_project_dir) 54 | return " " 55 | 56 | if (not fe_movie_path) or (not os.path.isfile(fe_movie_path)): 57 | print("File not found : ", fe_movie_path) 58 | return " " 59 | 60 | extract_dir = fe_project_dir 61 | 62 | args = [ 63 | "-i", fe_movie_path, 64 | "-frames:v",1, 65 | os.path.join(extract_dir, "00000.png") 66 | ] 67 | 68 | if(fe_ffmpeg_path == ""): 69 | fe_ffmpeg_path = "ffmpeg" 70 | if(platform.system == "Windows"): 71 | fe_ffmpeg_path += ".exe" 72 | 73 | run_cmd([fe_ffmpeg_path] + args) 74 | 75 | if fe_fps > 0: 76 | #extract all frame 77 | return frame_extract_all(fe_project_dir, fe_movie_path, fe_ffmpeg_path, fe_fps) 78 | 79 | return " " 80 | 81 | def frame_extract_per_wave(fe_project_dir:str, fe_movie_path:str, fe_ffmpeg_path:str, wave_list_str:str): 82 | 83 | if (not fe_project_dir) or (not os.path.isdir(fe_project_dir)): 84 | print("Directory not found : ", fe_project_dir) 85 | return " " 86 | 87 | if (not fe_movie_path) or (not os.path.isfile(fe_movie_path)): 88 | print("File not found : ", fe_movie_path) 89 | return " " 90 | 91 | wave_list = str_to_wave_list(wave_list_str) 92 | 93 | time_list=[] 94 | for w in wave_list: 95 | if w["type"] == "end": 96 | continue 97 | time_list.append(w["start_msec"]/1000) 98 | 99 | extract_dir = os.path.join(fe_project_dir , "video_frame_per_wave") 100 | os.makedirs(extract_dir, exist_ok=True) 101 | 102 | remove_pngs_in_dir(extract_dir) 103 | 104 | if(fe_ffmpeg_path == ""): 105 | fe_ffmpeg_path = "ffmpeg" 106 | if(platform.system == "Windows"): 107 | fe_ffmpeg_path += ".exe" 108 | 109 | for i, t in enumerate( time_list ): 110 | args = [ 111 | "-ss",t, 112 | "-i", fe_movie_path, 113 | "-frames:v",1, 114 | os.path.join(extract_dir, f"{str(i).zfill(5)}.png") 115 | ] 116 | run_cmd([fe_ffmpeg_path] + args, True) 117 | 118 | 119 | return " " 120 | 121 | def frame_extract_scene_change(fe_project_dir:str, fe_movie_path:str, fe_ffmpeg_path:str, sc_fe_fps:int, sc_use_optical_flow_cache:bool, sc_flow_occ_detect_th:float, sc_sd_threshold:float): 122 | 123 | if (not fe_project_dir) or (not os.path.isdir(fe_project_dir)): 124 | print("Directory not found : ", fe_project_dir) 125 | return " " 126 | 127 | if (not fe_movie_path) or (not os.path.isfile(fe_movie_path)): 128 | print("File not found : ", fe_movie_path) 129 | return " " 130 | 131 | print("create video frame") 132 | frame_extract_all(fe_project_dir, fe_movie_path, fe_ffmpeg_path, sc_fe_fps) 133 | 134 | def get_video_frame_path(project_dir, i, fps): 135 | path = os.path.join(os.path.join(project_dir, "video_frame"), f"{fps}") 136 | path = os.path.join(path, f"{str(i).zfill(5)}.png") 137 | return path 138 | 139 | print("create optical flow") 140 | sample_frame_path = get_video_frame_path(fe_project_dir, 0, sc_fe_fps) 141 | if sample_frame_path and os.path.isfile(sample_frame_path): 142 | v_path = os.path.join(os.path.join(fe_project_dir, "video_frame"), f"{sc_fe_fps}") 143 | o_path = os.path.join(os.path.join(fe_project_dir, "optical_flow"), f"{sc_fe_fps}") 144 | m_path = os.path.join(os.path.join(fe_project_dir, "occ_mask"), f"{sc_fe_fps}") 145 | scripts.util_sd_loopback_music_sync_wave.raft.create_optical_flow(v_path, o_path, m_path, sc_use_optical_flow_cache, None, sc_flow_occ_detect_th) 146 | else: 147 | print("video frame not found") 148 | return " " 149 | 150 | print("scene detection list") 151 | m_path = os.path.join(os.path.join(fe_project_dir, "occ_mask"), f"{sc_fe_fps}") 152 | mask_path_list = sorted(glob.glob( os.path.join(m_path ,"[0-9]*.png"), recursive=False)) 153 | scene_detection_list = scripts.util_sd_loopback_music_sync_wave.raft.get_scene_detection_list(sc_sd_threshold, 1, mask_path_list) 154 | 155 | dst_dir_path = os.path.join(os.path.join(fe_project_dir, "scene_change_frame"), f"{sc_fe_fps}") 156 | os.makedirs(dst_dir_path, exist_ok=True) 157 | remove_pngs_in_dir(dst_dir_path) 158 | 159 | src_dir_path = os.path.join(os.path.join(fe_project_dir, "video_frame"), f"{sc_fe_fps}") 160 | 161 | scene_detection_list[0] = True 162 | 163 | for i, sd in enumerate(scene_detection_list): 164 | if sd: 165 | src_path = os.path.join(src_dir_path, f"{str(i).zfill(5)}.png") 166 | dst_path = os.path.join(dst_dir_path, f"{str(i).zfill(5)}.png") 167 | shutil.copyfile(src_path, dst_path) 168 | 169 | 170 | print("finished") 171 | return " " 172 | 173 | 174 | 175 | 176 | 177 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/perlin.py: -------------------------------------------------------------------------------- 1 | #https://eev.ee/blog/2016/05/29/perlin-noise/ 2 | """Perlin noise implementation.""" 3 | # Licensed under ISC 4 | from itertools import product 5 | import math 6 | import random 7 | 8 | 9 | def smoothstep(t): 10 | """Smooth curve with a zero derivative at 0 and 1, making it useful for 11 | interpolating. 12 | """ 13 | return t * t * (3. - 2. * t) 14 | 15 | 16 | def lerp(t, a, b): 17 | """Linear interpolation between a and b, given a fraction t.""" 18 | return a + t * (b - a) 19 | 20 | 21 | class PerlinNoiseFactory(object): 22 | """Callable that produces Perlin noise for an arbitrary point in an 23 | arbitrary number of dimensions. The underlying grid is aligned with the 24 | integers. 25 | 26 | There is no limit to the coordinates used; new gradients are generated on 27 | the fly as necessary. 28 | """ 29 | 30 | def __init__(self, dimension, octaves=1, tile=(), unbias=False, random_seed=None): 31 | """Create a new Perlin noise factory in the given number of dimensions, 32 | which should be an integer and at least 1. 33 | 34 | More octaves create a foggier and more-detailed noise pattern. More 35 | than 4 octaves is rather excessive. 36 | 37 | ``tile`` can be used to make a seamlessly tiling pattern. For example: 38 | 39 | pnf = PerlinNoiseFactory(2, tile=(0, 3)) 40 | 41 | This will produce noise that tiles every 3 units vertically, but never 42 | tiles horizontally. 43 | 44 | If ``unbias`` is true, the smoothstep function will be applied to the 45 | output before returning it, to counteract some of Perlin noise's 46 | significant bias towards the center of its output range. 47 | """ 48 | self.dimension = dimension 49 | self.octaves = octaves 50 | self.tile = tile + (0,) * dimension 51 | self.unbias = unbias 52 | 53 | # For n dimensions, the range of Perlin noise is ±sqrt(n)/2; multiply 54 | # by this to scale to ±1 55 | self.scale_factor = 2 * dimension ** -0.5 56 | 57 | self.gradient = {} 58 | 59 | self.rand = random.Random(random_seed) 60 | 61 | def _generate_gradient(self): 62 | # Generate a random unit vector at each grid point -- this is the 63 | # "gradient" vector, in that the grid tile slopes towards it 64 | 65 | # 1 dimension is special, since the only unit vector is trivial; 66 | # instead, use a slope between -1 and 1 67 | if self.dimension == 1: 68 | return (self.rand.uniform(-1, 1),) 69 | 70 | # Generate a random point on the surface of the unit n-hypersphere; 71 | # this is the same as a random unit vector in n dimensions. Thanks 72 | # to: http://mathworld.wolfram.com/SpherePointPicking.html 73 | # Pick n normal random variables with stddev 1 74 | random_point = [self.rand.gauss(0, 1) for _ in range(self.dimension)] 75 | # Then scale the result to a unit vector 76 | scale = sum(n * n for n in random_point) ** -0.5 77 | return tuple(coord * scale for coord in random_point) 78 | 79 | def get_plain_noise(self, *point): 80 | """Get plain noise for a single point, without taking into account 81 | either octaves or tiling. 82 | """ 83 | if len(point) != self.dimension: 84 | raise ValueError("Expected {} values, got {}".format( 85 | self.dimension, len(point))) 86 | 87 | # Build a list of the (min, max) bounds in each dimension 88 | grid_coords = [] 89 | for coord in point: 90 | min_coord = math.floor(coord) 91 | max_coord = min_coord + 1 92 | grid_coords.append((min_coord, max_coord)) 93 | 94 | # Compute the dot product of each gradient vector and the point's 95 | # distance from the corresponding grid point. This gives you each 96 | # gradient's "influence" on the chosen point. 97 | dots = [] 98 | for grid_point in product(*grid_coords): 99 | if grid_point not in self.gradient: 100 | self.gradient[grid_point] = self._generate_gradient() 101 | gradient = self.gradient[grid_point] 102 | 103 | dot = 0 104 | for i in range(self.dimension): 105 | dot += gradient[i] * (point[i] - grid_point[i]) 106 | dots.append(dot) 107 | 108 | # Interpolate all those dot products together. The interpolation is 109 | # done with smoothstep to smooth out the slope as you pass from one 110 | # grid cell into the next. 111 | # Due to the way product() works, dot products are ordered such that 112 | # the last dimension alternates: (..., min), (..., max), etc. So we 113 | # can interpolate adjacent pairs to "collapse" that last dimension. Then 114 | # the results will alternate in their second-to-last dimension, and so 115 | # forth, until we only have a single value left. 116 | dim = self.dimension 117 | while len(dots) > 1: 118 | dim -= 1 119 | s = smoothstep(point[dim] - grid_coords[dim][0]) 120 | 121 | next_dots = [] 122 | while dots: 123 | next_dots.append(lerp(s, dots.pop(0), dots.pop(0))) 124 | 125 | dots = next_dots 126 | 127 | return dots[0] * self.scale_factor 128 | 129 | def __call__(self, *point): 130 | """Get the value of this Perlin noise function at the given point. The 131 | number of values given should match the number of dimensions. 132 | """ 133 | ret = 0 134 | for o in range(self.octaves): 135 | o2 = 1 << o 136 | new_point = [] 137 | for i, coord in enumerate(point): 138 | coord *= o2 139 | if self.tile[i]: 140 | coord %= self.tile[i] * o2 141 | new_point.append(coord) 142 | ret += self.get_plain_noise(*new_point) / o2 143 | 144 | # Need to scale n back down since adding all those extra octaves has 145 | # probably expanded it beyond ±1 146 | # 1 octave: ±1 147 | # 2 octaves: ±1½ 148 | # 3 octaves: ±1¾ 149 | ret /= 2 - 2 ** (1 - self.octaves) 150 | 151 | if self.unbias: 152 | # The output of the plain Perlin noise algorithm has a fairly 153 | # strong bias towards the center due to the central limit theorem 154 | # -- in fact the top and bottom 1/8 virtually never happen. That's 155 | # a quarter of our entire output range! If only we had a function 156 | # in [0..1] that could introduce a bias towards the endpoints... 157 | r = (ret + 1) / 2 158 | # Doing it this many times is a completely made-up heuristic. 159 | for _ in range(int(self.octaves / 2 + 0.5)): 160 | r = smoothstep(r) 161 | ret = r * 2 - 1 162 | 163 | return ret -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sd_loopback_music_sync_wave 2 | 3 | ## Overview 4 | #### AUTOMATIC1111 UI extension for creating videos using img2img. 5 | #### This extension was created based on [Loopback Wave Script](https://github.com/FizzleDorf/Loopback-Wave-for-A1111-Webui) 6 | #### The major changes are that the wave length can be set one by one in milliseconds and that wildcard can be used. 7 | #### In addition, I have added various @function. 8 | 9 | ## Example 10 | - The following sample is raw output of this extension.(The file was too large, so I compressed it.) 11 | #### sample 1 12 | ``` 13 | Extend Prompts: 14 | 1::#zoom(@wave_amplitude(0.8,1.6)) 15 | 2::$random_xy(3434, 0.2,0.2, 3000) 16 | 3::#rot_y(@wave_amplitude(60,0)) 17 | 18 | Sub Extend Prompts: 19 | 1::@@bpm139@1700[#slide_x(1, @random(1.5,2), @random(0.2,0.8))] 20 | 3::@@bpm139@1700[#slide_y(1, @random(1.5,2), @random(0.2,0.8))] 21 | 5::@@bpm139@1700[#slide_x(1, @random(-1.5,-2), @random(0.2,0.8))] 22 | 7::$random_xy(99999, 1,1, 3000),$random_slide_x(99999,1,0.5,0.5,0.5,3000),$random_slide_y(99999,1,0.5,0.5,0.5,3000) 23 | -1::(__expression__:@wave_shape(1.0,0)) 24 | ``` 25 |
26 |
27 | 28 | #### sample 2 29 | - Loopback mode(default) + Controlnet(open pose) 30 | - All the parts except for the main prompt are taken from sample 1. 31 |
32 |
33 | 34 | #### sample 3 35 | - img2img mode + Controlnet(open pose) 36 | - The setting is exactly the same as sample2 except mode is changed to img2img. 37 |
38 |
39 | 40 | #### sample 4 41 | - SAM + Controlnet(open pose) 42 | - Using [SAM](https://github.com/continue-revolution/sd-webui-segment-anything) to dynamically generate mask from text, then inpaint with mask 43 | ``` 44 | 4-15::(__hair-female__: 1.2) 45 | 4-15::$inpaint("hair") 46 | 20-31::(__clothing__: 1.0) 47 | 20-31::$inpaint("clothing") 48 | 32::(spiderman:1.5) 49 | 33::(wonder woman:1.5) 50 | 34::(storm:1.5) 51 | 35::(harley_quinn:1.5) 52 | 32-35::$inpaint("face") 53 | ``` 54 |
55 | 56 |
57 | 58 | #### sample 5 59 | - Loopback mode(with low denoising strength) + optical flow + Controlnet(open pose + normalbae) 60 | - openpose / weight 1.0 / "My prompt is more important" 61 | - normalbae / weight 0.5 / "My prompt is more important" 62 | - (controlnet ip2p also seemed to work well with loopback) 63 |
64 | 65 |
66 | 67 | #### sample 6 68 | - openpose_full / weight 1.0 / "My prompt is more important" 69 | - reference_adain / weight 1.0 / "Balanced" / threshold_a 0.5 70 | - softedge_pidisafe / weight 0.7 / "My prompt is more important" 71 | - Fps 8 / Interpolation Multiplier 3 72 | ``` 73 | 0:: cyberpunk city 74 | 1-100::(__location__: 1.0) 75 | 1-100::(__clothing__: 1.0) 76 | -1::(__expression__:@wave_shape(1.0,0)) 77 | ``` 78 |
79 | 80 |
81 | 82 | ## Installation 83 | - Use the Extensions tab of the webui to [Install from URL] 84 | 85 |
86 |
87 | 88 | ## Basic Usage 1 (For make a loopback video without a source video) 89 | - Go to [txt2img] tab. 90 | - Generate some image. (I recommend to make with Euler a / 20 steps / cfg 7) 91 | - Press [Send to img2img] Button 92 | - Go to [img2img] tab. 93 | - Lower the [Denoising strength]. (I recommend 0.25) 94 | - Select [Loopback Music Sync Wave] in Script drop list 95 | - Copy the following text into the [Wave List (Main)] 96 | ``` 97 | 0,wave 98 | 1000,wave 99 | 2000,wave 100 | 3000,wave 101 | 3500,wave 102 | 4000,wave 103 | 4500,wave 104 | 5000,end 105 | ``` 106 | - Copy the following text into the [Extend Prompt (Main)]. The wildcards used below are those provided by default. 107 | ``` 108 | -1::__lb_vel_slow__ 109 | -1::__lb_zoom_wave__ 110 | -1::__lb_prompt_face__ 111 | ``` 112 | - Press [Generate] 113 | - (Default file output location, video encoding settings, etc. are the same as in the original script) 114 | 115 |
116 |
117 | 118 | ## Basic Usage 2 (For change the character or style of the source video using the loopback technique) 119 | - First, prepare your source video (SD or HD resolution, approx. 10 seconds in length) 120 | - Configure controlnet settings. 121 | - Extract the first frame of the source video, then Create the first input image with img2img based on it. 122 | - Set [Project Directory(optional)] / [Video File Path(optional)] / [Mode Settings] / [Optical Flow Setting] 123 | (I recommend [Frames per second] = 8 and [interpolation multiplier] = 3) 124 | - ***For this purpose, there is no need to create waves, so [Max additional denoise] should be set to 0! I think [Denoising strength] should be around 0.3*** 125 | - Press [Generate] 126 | See [Here](https://github.com/s9roll7/sd_loopback_music_sync_wave/wiki/loopback---controlnet) and [Here](https://github.com/s9roll7/sd_loopback_music_sync_wave/wiki/loopback---controlnet---optical-flow) for more information. 127 | 128 |
129 |
130 | 131 | ## Advanced Usage 132 | 133 | ### How to generate video synchronized to music 134 | See [Here](https://github.com/s9roll7/sd_loopback_music_sync_wave/wiki/How-to-generate-video-synchronized-to-music) 135 |
136 | 137 | 138 | ### How to replace the initial image in the middle of the process 139 | See [Here](https://github.com/s9roll7/sd_loopback_music_sync_wave/wiki/How-to-replace-the-initial-image-in-the-middle-of-the-process) 140 |
141 | 142 | ### @#$function list 143 | - The list of functions and how to write the wave list are described in [Cheat Sheet]. 144 | ![Cheat Sheet](imgs/cheat_sheet.png "Cheat Sheet") 145 |
146 | 147 | ### loopback + controlnet (sample 2) 148 | See [Here](https://github.com/s9roll7/sd_loopback_music_sync_wave/wiki/loopback---controlnet) 149 |
150 | 151 | ### loopback + controlnet + optical flow (sample 5) 152 | See [Here](https://github.com/s9roll7/sd_loopback_music_sync_wave/wiki/loopback---controlnet---optical-flow) 153 |
154 | 155 | ### How to generate mask from text (sample 4) 156 | - You need [SAM Extension](https://github.com/continue-revolution/sd-webui-segment-anything) 157 | - It is necessary to be able to use SAM and GroundingDINO together 158 | - Refer to sample 4 for $inpaint function usage. 159 | 160 |
161 | 162 | ### img2img mode (sample 3) 163 | - As an added bonus, img2img mode is implemented. You can switch between loopback and img2img in [Mode Settings]. 164 | - ~~The original frame required for img2img must be generated by the following procedure. (If you want to use controlnet in loopback mode as in sample2, you should also generate frames using this procedure.)~~ 165 |
166 | 167 | 168 | ### Other Tips 169 | - If you specify the *-inputs.txt that is created at the same time as creating the video in [Load inputs txt Path], you can create the video again with the same input as last time 170 | - If you want to reuse only some of the inputs, open *-prompt.txt and copy only where you need it 171 | - If you want to add more wildcards yourself, 172 | Put them in [extensions/sd_loopback_music_sync_wave/wildcards]. If you are too lazy to make your own, you can pick them up at civitai. 173 | - If you want to make a video only for the first 5 seconds in the test, temporarily add "5000, end" to the wave list entered in [Wave List (Main)] 174 | - The unit of velocity for function parameters is the number of screens per second. For example, a speed of 1.0 in the x-axis direction means that if the screen moves at the same speed for one second, it will scroll one horizontal screen. In the case of rotation speed, it is the degree per second. 175 | - There are three ways to increase the resolution. 176 | 1. Do img2img with a higher resolution from the beginning. 177 | 2. Use [Upscale Settings]. 178 | 3. Upscale the generated video using an external tool. 179 | 1 is probably the best way to get the best results, but it also takes the most processing time. 180 | - There are three ways to increase fps for smooth animation 181 | 1. Put a larger value in [Frames per second] 182 | 2. Use interpolation with Optical Flow(When using the source video to generate) 183 | 3. Interpolate with an external tool 184 | 1 is very effective, but the processing time will be proportionally longer. 185 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/audio_analyzer.py: -------------------------------------------------------------------------------- 1 | import librosa 2 | import numpy as np 3 | import os 4 | from PIL import Image 5 | 6 | 7 | class AudioAnalyzer: 8 | def __init__(self, path, band_min, band_max, hpss_type, onset_th): 9 | 10 | aggregate_is_median = True 11 | hop_length = 512 12 | hpss_margin = 3.0 13 | max_size = 1 14 | onset_detect_normalize = True 15 | 16 | self.path = path 17 | self.band_min = band_min 18 | self.band_max = band_max 19 | self.hpss_type = hpss_type # none(0) , H(1) , P(2) 20 | self.hop_length = hop_length 21 | self.hpss_margin = hpss_margin 22 | self.max_size = max_size 23 | self.aggregate = np.median if aggregate_is_median else np.mean 24 | self.onset_detect_normalize = onset_detect_normalize 25 | self.onset_th = onset_th 26 | self.is_backtrack_reqested = False 27 | 28 | self.onset_env = None 29 | self.wave = None 30 | self.sr = None 31 | self.onset_backtrack = None 32 | 33 | self.result_onset = None 34 | self.result_beat = None 35 | self.result_beat_plp = None 36 | self.times = None 37 | self.result_bpm = -1 38 | self.length = -1 39 | 40 | print("hop_length = {}".format(self.hop_length)) 41 | print("hpss_margin = {}".format(self.hpss_margin)) 42 | print("max_size = {}".format(self.max_size)) 43 | print("aggregate = {}".format(self.aggregate)) 44 | print("onset_detect_normalize = {}".format(self.onset_detect_normalize)) 45 | 46 | self.is_loaded = False 47 | 48 | self._analyze() 49 | 50 | 51 | def GetResult(self, is_backtrack): 52 | if not self.is_loaded: 53 | return None 54 | 55 | if is_backtrack: 56 | self.is_backtrack_reqested = True 57 | return self.times[self.onset_backtrack] 58 | else: 59 | self.is_backtrack_reqested = False 60 | return self.times[self.result_beat] 61 | 62 | def GetBPM(self): 63 | return self.result_bpm 64 | 65 | def GetLength(self): 66 | return self.length 67 | 68 | def IsSuccess(self): 69 | return self.is_loaded 70 | 71 | def CreateFig(self, wave_list): 72 | import matplotlib.pyplot as plt 73 | 74 | fig = plt.Figure(dpi=100, figsize=( (self.length/1000)*4 ,3*2)) 75 | ax1 = fig.add_subplot(2, 1, 1) 76 | ax1.plot(self.times, self.onset_env/self.onset_env.max(), label='onset envelope') 77 | 78 | if self.is_backtrack_reqested: 79 | ax1.vlines(self.times[self.onset_backtrack], 0, 1, color='r', linestyle='--', label='backtrack') 80 | else: 81 | ax1.vlines(self.times[self.result_onset], 0, 1, color='r', linestyle='--', label='onsets') 82 | ax1.legend(frameon=True, framealpha=0.75) 83 | 84 | ax2 = fig.add_subplot(2, 1, 2, sharex=ax1) 85 | librosa.display.waveshow(self.wave, sr=self.sr, ax=ax2) 86 | 87 | if wave_list: 88 | ax2.vlines( wave_list, -1,1,color='r', label='wave list') 89 | 90 | ax1.set_xlim(0, self.length/1000 + 1) 91 | x_labels = np.arange(0,self.length/1000 + 1,0.5) 92 | ax1.set_xticks(x_labels,x_labels) 93 | ax1.minorticks_on() 94 | ax1.grid(which="major", color="gray", linestyle=":", axis="x") 95 | ax1.grid(which="minor", color="gray", linestyle=":", axis="x") 96 | 97 | ax2.legend(frameon=True, framealpha=0.75) 98 | ax2.minorticks_on() 99 | ax2.grid(which="major", color="gray", linestyle=":", axis="x") 100 | ax2.grid(which="minor", color="gray", linestyle=":", axis="x") 101 | 102 | plt.tight_layout() 103 | #fig.savefig(img_path) 104 | return fig 105 | 106 | def CreatePromptFig(self, plot_data): 107 | import matplotlib.pyplot as plt 108 | import matplotlib.gridspec as gridspec 109 | 110 | plt.rcParams["axes.prop_cycle"] = plt.cycler("color", plt.get_cmap("tab20").colors) 111 | 112 | fig = plt.Figure(dpi=100, figsize=( (self.length/1000)*4 ,3*2)) 113 | gs = gridspec.GridSpec(3, 1, figure=fig) 114 | ax1 = fig.add_subplot(gs[0, :]) 115 | # ax1 = fig.add_subplot(2, 1, 1) 116 | ax1.plot(self.times, self.onset_env/self.onset_env.max(), label='onset envelope') 117 | 118 | if self.is_backtrack_reqested: 119 | ax1.vlines(self.times[self.onset_backtrack], 0, 1, color='r', linestyle='--', label='backtrack') 120 | else: 121 | ax1.vlines(self.times[self.result_onset], 0, 1, color='r', linestyle='--', label='onsets') 122 | ax1.legend(frameon=True, framealpha=0.75) 123 | 124 | ax2 = fig.add_subplot(gs[1:, :],sharex=ax1) 125 | # ax2 = fig.add_subplot(2, 1, 2, sharex=ax1) 126 | librosa.display.waveshow(self.wave, sr=self.sr, ax=ax2) 127 | 128 | ax2.vlines( plot_data["wave"], -1,1,color='r', label='wave list') 129 | 130 | # plot data 131 | for d in plot_data["data"]: 132 | ax2.plot(plot_data["time"], plot_data["data"][d], label=d) 133 | 134 | ax1.set_xlim(0, self.length/1000 + 1) 135 | x_labels = np.arange(0,self.length/1000 + 1,0.5) 136 | ax1.set_xticks(x_labels,x_labels) 137 | ax1.minorticks_on() 138 | ax1.grid(which="major", color="gray", linestyle=":", axis="x") 139 | ax1.grid(which="minor", color="gray", linestyle=":", axis="x") 140 | 141 | ax2.legend(frameon=True, framealpha=0.75) 142 | ax2.minorticks_on() 143 | ax2.grid(which="major", color="gray", linestyle=":", axis="x") 144 | ax2.grid(which="minor", color="gray", linestyle=":", axis="x") 145 | 146 | plt.tight_layout() 147 | return fig 148 | 149 | 150 | def _get_onset_env_multi(self, wave, sr): 151 | channels = [self.band_min, self.band_max] 152 | onset_envelope_multi = librosa.onset.onset_strength_multi(y=wave, sr=sr, channels=channels, hop_length=self.hop_length, aggregate=self.aggregate, max_size=self.max_size) 153 | return onset_envelope_multi[0] 154 | 155 | def _get_onset_env(self, wave, sr): 156 | return librosa.onset.onset_strength(y=wave, sr=sr, hop_length=self.hop_length, aggregate=self.aggregate, max_size=self.max_size) 157 | 158 | def _get_wave(self): 159 | wave, sr = librosa.load(self.path) 160 | 161 | self.length = librosa.get_duration(y=wave, sr=sr) 162 | self.length = int(self.length * 1000) 163 | 164 | if self.hpss_type == 1: 165 | y_harm, y_perc = librosa.effects.hpss(wave, margin=self.hpss_margin) 166 | return y_harm,sr 167 | elif self.hpss_type == 2: 168 | y_harm, y_perc = librosa.effects.hpss(wave, margin=self.hpss_margin) 169 | return y_perc,sr 170 | else: 171 | return wave,sr 172 | 173 | 174 | 175 | def _analyze(self): 176 | if (not self.path) or (not os.path.isfile(self.path)): 177 | print("File not found : ", self.path) 178 | return 179 | 180 | wave, sr = self.wave, self.sr = self._get_wave() 181 | 182 | 183 | if self.band_min == -1 or self.band_max == -1: 184 | self.onset_env = self._get_onset_env(wave, sr) 185 | else: 186 | self.onset_env = self._get_onset_env_multi(wave, sr) 187 | 188 | self.times = librosa.times_like(self.onset_env, sr=sr, hop_length=self.hop_length) 189 | 190 | self.result_onset = librosa.onset.onset_detect(onset_envelope=self.onset_env, sr=sr, hop_length=self.hop_length, normalize = self.onset_detect_normalize, delta=self.onset_th) 191 | 192 | self.onset_backtrack = librosa.onset.onset_backtrack(self.result_onset, self.onset_env) 193 | 194 | tempo, self.result_beat = librosa.beat.beat_track(onset_envelope=self.onset_env, sr=sr) 195 | 196 | pulse = librosa.beat.plp(onset_envelope=self.onset_env, sr=sr, hop_length = self.hop_length) 197 | 198 | self.result_beat_plp = np.flatnonzero(librosa.util.localmax(pulse)) 199 | 200 | self.result_bpm = tempo 201 | 202 | self.is_loaded = True 203 | 204 | 205 | 206 | def create_onset_wave_list(onset_timing, length, default_type, default_strength, offset_time): 207 | #start_time,type,(strength) 208 | 209 | onset_timing = [int(i*1000) for i in onset_timing] 210 | print("onset_timing : ",onset_timing) 211 | onset_timing = [ i + offset_time for i in onset_timing if 0 < (i + offset_time) < length ] 212 | print("onset_timing + offset : ",onset_timing) 213 | 214 | wave_list = [] 215 | 216 | if 0 < onset_timing[0]: 217 | wave_list.append( ( 0, "zero", 1.0 ) ) 218 | 219 | for cur in onset_timing: 220 | wave_list.append( ( cur, default_type, default_strength ) ) 221 | 222 | wave_list.append( ( length, "end", 1.0 ) ) 223 | 224 | 225 | wave_str_list=[] 226 | 227 | for w in wave_list: 228 | if w[1] in ("zero", "end") or w[2] == 1.0: 229 | wave_str_list.append( f"{w[0]},{w[1]}" ) 230 | else: 231 | wave_str_list.append( f"{w[0]},{w[1]},{w[2]}" ) 232 | 233 | print(wave_str_list) 234 | 235 | return "\n".join(wave_str_list) 236 | 237 | def create_figure(wave_list): 238 | if not _aa_cache: 239 | return None 240 | return _aa_cache.CreateFig(wave_list) 241 | 242 | def create_prompt_figure(plot_data): 243 | if not _aa_cache: 244 | return None 245 | return _aa_cache.CreatePromptFig(plot_data) 246 | 247 | _aa_cache = None 248 | 249 | def audio_analyzer_process(audio_file:str, offset:int, band_min:int, band_max:int, hpss_type:int, onset_th:float, default_type:str, default_strength:float, is_backtrack:bool): 250 | global _aa_cache 251 | 252 | print("audio_file : ",audio_file) 253 | 254 | aa = None 255 | 256 | if _aa_cache: 257 | if _aa_cache.path == audio_file and\ 258 | _aa_cache.band_min == band_min and\ 259 | _aa_cache.band_max == band_max and\ 260 | _aa_cache.hpss_type == hpss_type and\ 261 | _aa_cache.onset_th == onset_th: 262 | print("use cache") 263 | aa = _aa_cache 264 | 265 | if not aa: 266 | aa = AudioAnalyzer(audio_file, band_min, band_max, hpss_type, onset_th) 267 | 268 | if not aa.IsSuccess(): 269 | return -1,-1,"",None, " " 270 | 271 | bpm = aa.GetBPM() 272 | length_msec = aa.GetLength() 273 | list_txt = create_onset_wave_list(aa.GetResult(is_backtrack), length_msec, default_type, default_strength, offset) 274 | 275 | _aa_cache = aa 276 | 277 | fig = create_figure(None) 278 | 279 | return bpm, length_msec, list_txt, fig, " " 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/raft.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | from PIL import Image 4 | import numpy as np 5 | import cv2 6 | import torch 7 | import time 8 | 9 | from torchvision.models.optical_flow import Raft_Large_Weights 10 | from torchvision.models.optical_flow import raft_large 11 | from torchvision.utils import flow_to_image 12 | import torchvision.transforms.functional as F 13 | 14 | img_batch_size = 1 15 | 16 | 17 | debug_c = 0 18 | def debug_save_img(img,comment): 19 | global debug_c 20 | img.save( f"scripts/testpngs/{debug_c}_{comment}.png") 21 | 22 | debug_c += 1 23 | 24 | 25 | def resize_img_array(img_array, w, h): 26 | if img_array.shape[0] + img_array.shape[1] < h + w: 27 | interpolation = interpolation=cv2.INTER_CUBIC 28 | else: 29 | interpolation = interpolation=cv2.INTER_AREA 30 | return cv2.resize(img_array, (w, h), interpolation=interpolation) 31 | 32 | 33 | def pathlist_to_stack(img_path_list): 34 | img_list = [np.array(Image.open(f)) for f in img_path_list] 35 | vframes = torch.as_tensor(np.stack(img_list)) 36 | vframes = vframes.permute(0, 3, 1, 2) 37 | return vframes 38 | 39 | def remove_files_in_dir(path, pattern): 40 | if not os.path.isdir(path): 41 | return 42 | pngs = glob.glob( os.path.join(path, pattern) ) 43 | for png in pngs: 44 | os.remove(png) 45 | 46 | 47 | ############# 48 | # from https://github.com/princeton-vl/RAFT/issues/57 49 | ############# 50 | def bilinear_sampler(img, coords, mode='bilinear', mask=False): 51 | """ Wrapper for grid_sample, uses pixel coordinates """ 52 | H, W = img.shape[-2:] 53 | xgrid, ygrid = coords.split([1,1], dim=-1) 54 | xgrid = 2*xgrid/(W-1) - 1 55 | ygrid = 2*ygrid/(H-1) - 1 56 | 57 | grid = torch.cat([xgrid, ygrid], dim=-1) 58 | img = torch.nn.functional.grid_sample(img, grid, align_corners=True) 59 | 60 | if mask: 61 | mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1) 62 | return img, mask.float() 63 | 64 | return img 65 | 66 | def coords_grid(batch, ht, wd, device="cpu"): 67 | coords = torch.meshgrid(torch.arange(ht, device=device), torch.arange(wd, device=device)) 68 | coords = torch.stack(coords[::-1], dim=0).float() 69 | return coords[None].repeat(batch, 1, 1, 1) 70 | 71 | 72 | def create_occ_mask(v1,v2,size,device,thresh = 2.0): 73 | H,W = size 74 | 75 | coords0 = coords_grid(1, H, W, device) 76 | coords1 = coords0 + v1 77 | coords2 = coords1 + bilinear_sampler(v2, coords1.permute(0,2,3,1)) 78 | 79 | err = (coords0 - coords2).norm(dim=1) 80 | occ = (err[0] > thresh).float().cpu().numpy() 81 | 82 | return occ * 255 83 | ############# 84 | 85 | 86 | 87 | def create_optical_flow(v_path, o_path, m_path, use_cache, size_hw=None, occ_th=2.0): 88 | from modules import devices 89 | 90 | os.makedirs(o_path, exist_ok=True) 91 | 92 | if m_path: 93 | os.makedirs(m_path, exist_ok=True) 94 | 95 | if use_cache: 96 | npys = glob.glob( os.path.join(o_path ,"[0-9]*.npy"), recursive=False) 97 | if npys: 98 | print("npy file found. skip create optical flow") 99 | return 100 | else: 101 | remove_files_in_dir(o_path, "*.npy") 102 | if m_path: 103 | remove_files_in_dir(m_path, "*.png") 104 | 105 | pngs = glob.glob( os.path.join(v_path ,"[0-9]*.png"), recursive=False) 106 | 107 | devices.torch_gc() 108 | 109 | weights = Raft_Large_Weights.DEFAULT 110 | transforms = weights.transforms() 111 | 112 | print("create_optical_flow") 113 | 114 | if size_hw: 115 | h, w= size_hw 116 | else: 117 | h, w, _ = np.array(Image.open(pngs[0])).shape 118 | H = h//8*8 119 | W = w//8*8 120 | 121 | def preprocess(img1_path, img2_path): 122 | img1_batch = pathlist_to_stack(img1_path) 123 | img2_batch = pathlist_to_stack(img2_path) 124 | 125 | img1_batch = F.resize(img1_batch, size=[H, W], antialias=False) 126 | img2_batch = F.resize(img2_batch, size=[H, W], antialias=False) 127 | 128 | return transforms(img1_batch, img2_batch) 129 | 130 | # If you can, run this example on a GPU, it will be a lot faster. 131 | device = "cuda" if torch.cuda.is_available() else "cpu" 132 | model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device) 133 | model = model.eval() 134 | 135 | for i in range(0, len(pngs)-1 ,img_batch_size): 136 | print("i = ",i) 137 | img_path_1 = pngs[i:i+img_batch_size] 138 | img_path_2 = pngs[i+1:i+1+img_batch_size] 139 | 140 | img1_batch, img2_batch = preprocess(img_path_1, img_path_2) 141 | 142 | with torch.no_grad(): 143 | list_of_flows = model(img1_batch.to(device), img2_batch.to(device)) 144 | 145 | predicted_flows = list_of_flows[-1] 146 | 147 | if m_path: 148 | with torch.no_grad(): 149 | rev_list_of_flows = model(img2_batch.to(device), img1_batch.to(device)) 150 | 151 | rev_predicted_flows = rev_list_of_flows[-1] 152 | 153 | for j in range(len(predicted_flows)): 154 | fl = predicted_flows[j] 155 | rev_fl = rev_predicted_flows[j] 156 | 157 | fl = fl.permute(0, 1, 2).unsqueeze(0).contiguous() 158 | rev_fl = rev_fl.permute(0, 1, 2).unsqueeze(0).contiguous() 159 | 160 | occ = create_occ_mask(fl,rev_fl,(H,W), device, occ_th) 161 | out_path = os.path.join(m_path, f"{str(i+j + 1).zfill(5)}.png") 162 | Image.fromarray(occ).convert("L").save(out_path) 163 | 164 | predicted_flows = predicted_flows.to("cpu").detach().numpy() 165 | 166 | for j in range(len(predicted_flows)): 167 | fl = predicted_flows[j] 168 | out_path = os.path.join(o_path, f"{str(i+j + 1).zfill(5)}.npy") 169 | np.save(out_path, fl) 170 | print("output : ", out_path) 171 | 172 | devices.torch_gc() 173 | 174 | 175 | def warp_img(processed_array, flow): 176 | flow = flow.transpose(1,2,0) 177 | 178 | h = flow.shape[0] 179 | w = flow.shape[1] 180 | flow = -flow 181 | flow[:,:,0] += np.arange(w) 182 | flow[:,:,1] += np.arange(h)[:,np.newaxis] 183 | 184 | org_h,org_w,_ = processed_array.shape 185 | 186 | processed_array = resize_img_array(processed_array, w, h) 187 | 188 | result = cv2.remap(processed_array, flow, None, cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101) 189 | 190 | return resize_img_array(result, org_w, org_h) 191 | 192 | def apply_flow_single(processed_img, o_path, is_reverse = False, add_flow_path = None, rate = 0.0, use_inpaint = True): 193 | if use_inpaint: 194 | img = processed_img.convert('RGBA') 195 | img.putalpha(255) 196 | else: 197 | img = processed_img 198 | 199 | if not o_path: 200 | print("o_path is empty") 201 | return processed_img 202 | 203 | flow = np.load(o_path) 204 | 205 | if is_reverse: 206 | flow = -flow 207 | 208 | if add_flow_path: 209 | add_flow = np.load(add_flow_path) 210 | if is_reverse: 211 | add_flow = -add_flow 212 | add_flow = add_flow * rate 213 | 214 | # flow = flow * 0.5 + add_flow * 0.5 215 | th = np.mean(np.abs(flow)) / 2 216 | mask = (np.abs(flow) < th) 217 | np.putmask( flow, mask, add_flow) 218 | 219 | # debug_save_img(img, "pre") 220 | a_array = warp_img(np.array(img), flow) 221 | 222 | if use_inpaint: 223 | org_mask_array = a_array[:, :, 3] 224 | org_mask_array = 255 - org_mask_array 225 | 226 | a_array = cv2.inpaint(a_array[:, :, 0:3],org_mask_array,3,cv2.INPAINT_TELEA) 227 | 228 | return Image.fromarray(a_array) 229 | 230 | 231 | def apply_flow(base_img, flow_path_list, mask_path_list): 232 | 233 | img = base_img 234 | W, H = img.size 235 | mask_array = None 236 | 237 | for f,m in zip(flow_path_list, mask_path_list): 238 | if not os.path.isfile(f): 239 | return img, cv2.resize( mask_array, (W,H), interpolation = cv2.INTER_CUBIC) if mask_array is not None else None 240 | img = apply_flow_single(img, f, False, None, 0, False) 241 | 242 | if mask_array is None: 243 | mask_array = np.array(Image.open(m)) 244 | else: 245 | mask_array = mask_array + np.array(Image.open(m)) 246 | 247 | return img, cv2.resize( mask_array, (W,H), interpolation = cv2.INTER_CUBIC) if mask_array is not None else None 248 | 249 | def get_scene_detection_list(detection_th, interpolation_multi, mask_path_list): 250 | result = [False] 251 | all_pixels = -1 252 | value_list = [0] 253 | 254 | for i in range(0, len(mask_path_list), interpolation_multi): 255 | v = 0 256 | for m in mask_path_list[i:i+interpolation_multi]: 257 | mask_array = np.array(Image.open(m)) 258 | bad_pixels = np.count_nonzero(mask_array > 0) 259 | if all_pixels == -1: 260 | all_pixels = (mask_array.shape[0] * mask_array.shape[1]) 261 | bad_rate = bad_pixels / all_pixels 262 | if bad_rate > v: 263 | v = bad_rate 264 | result.append( v > detection_th ) 265 | value_list.append(v) 266 | 267 | 268 | for i, r in enumerate(result): 269 | if r: 270 | print(f"{i} : {r} ({value_list[i]})") 271 | 272 | return result 273 | 274 | 275 | 276 | def interpolate_frame(head_img, tail_img, cur_flow, add_flow): 277 | list_a = [] 278 | list_b = [] 279 | 280 | img = head_img 281 | i = 0 282 | for f in cur_flow[:-1]: 283 | img = apply_flow_single(img, f, False, add_flow, (i+1)/ len(cur_flow)) 284 | list_a.append(img) 285 | 286 | img = tail_img 287 | i = 0 288 | for f in cur_flow[:0:-1]: 289 | img = apply_flow_single(img, f, True, add_flow, (i+1)/ len(cur_flow)) 290 | list_b.append(img) 291 | 292 | result = [head_img] 293 | i = 0 294 | 295 | for h,t in zip(list_a, list_b[::-1]): 296 | i_frame = Image.blend(h,t, (i+1)/ len(cur_flow) ) 297 | result.append( i_frame ) 298 | i+=1 299 | 300 | return result 301 | 302 | def interpolate_frame2(head_img, tail_img, num_of_frames): 303 | result = [] 304 | 305 | for i in range(num_of_frames): 306 | i_frame = Image.blend(head_img,tail_img, (i)/ num_of_frames ) 307 | result.append( i_frame ) 308 | i+=1 309 | 310 | return result 311 | 312 | def interpolate(org_frame_path, out_frame_path, flow_interpolation_multi, flow_path, scene_changed_list ): 313 | 314 | print("interpolate start") 315 | 316 | calc_time_start = time.perf_counter() 317 | 318 | org_frames = sorted(glob.glob( os.path.join(org_frame_path ,"[0-9]*.png"), recursive=False)) 319 | flows = sorted(glob.glob( os.path.join(flow_path ,"[0-9]*.npy"), recursive=False)) 320 | _, FLOW_H, FLOW_W = np.load(flows[0]).shape # 2, H, W 321 | 322 | flows = iter(flows) 323 | 324 | tmp_flow_path = os.path.join(org_frame_path ,"tmp_flow") 325 | create_optical_flow(org_frame_path, tmp_flow_path, None, False, (FLOW_H, FLOW_W) ) 326 | 327 | tmp_flows = sorted(glob.glob( os.path.join(tmp_flow_path ,"[0-9]*.npy"), recursive=False)) 328 | 329 | tmp_flows = iter(tmp_flows) 330 | 331 | os.makedirs(out_frame_path, exist_ok=True) 332 | 333 | i = 0 334 | tail_img=None 335 | 336 | for org_i, (head, tail) in enumerate(zip(org_frames, org_frames[1:])): 337 | cur_flow = [next(flows, None) for x in range(flow_interpolation_multi)] 338 | 339 | cur_flow = [ x for x in cur_flow if i is not None] 340 | 341 | tmp_flow = next(tmp_flows, None) 342 | 343 | head_img = Image.open( head ) 344 | tail_img = Image.open( tail ) 345 | 346 | if scene_changed_list[org_i+1]: 347 | # result_imgs = [ (head_img if f<(flow_interpolation_multi/2) else tail_img) for f in range(flow_interpolation_multi)] 348 | result_imgs = interpolate_frame2(head_img, tail_img, flow_interpolation_multi) 349 | else: 350 | result_imgs = interpolate_frame(head_img, tail_img, cur_flow, tmp_flow) 351 | 352 | for f in result_imgs: 353 | output_img_path = os.path.join(out_frame_path, f"{str(i).zfill(5)}.png" ) 354 | f.save( output_img_path ) 355 | 356 | print("output : ",i) 357 | 358 | i += 1 359 | 360 | output_img_path = os.path.join(out_frame_path, f"{str(i).zfill(5)}.png" ) 361 | tail_img.save( output_img_path ) 362 | 363 | print("output : ",i) 364 | 365 | calc_time_end = time.perf_counter() 366 | print("interpolate elapsed_time (sec) : ", calc_time_end - calc_time_start) 367 | 368 | 369 | 370 | -------------------------------------------------------------------------------- /scripts/loopback_music_sync_wave_ui.py: -------------------------------------------------------------------------------- 1 | 2 | import gradio as gr 3 | 4 | from modules import script_callbacks 5 | from modules.call_queue import wrap_gradio_gpu_call 6 | 7 | from scripts.loopback_music_sync_wave import get_wave_type_list,str_to_wave_list,wave_list_to_str,merge_wave_list 8 | from scripts.util_sd_loopback_music_sync_wave.wave_generator import wave_generator_process,f2w_generator_process 9 | from scripts.util_sd_loopback_music_sync_wave.audio_analyzer import audio_analyzer_process 10 | from scripts.util_sd_loopback_music_sync_wave.wave_list_test import wave_list_test_process 11 | from scripts.util_sd_loopback_music_sync_wave.frame_extractor import frame_extract_one,frame_extract_per_wave,frame_extract_scene_change 12 | from scripts.util_sd_loopback_music_sync_wave.prompt_test import prompt_test_process 13 | 14 | def on_ui_tabs(): 15 | 16 | with gr.Blocks(analytics_enabled=False) as wave_generation_interface: 17 | with gr.Tabs(): 18 | with gr.TabItem('Wave List Generate'): 19 | with gr.Row().style(equal_height=True): 20 | with gr.Column(variant='panel'): 21 | with gr.Accordion(label="Common Input", open=True): 22 | input_audio_path = gr.Textbox(label='Audio File Path', lines=1) 23 | 24 | input_audio = gr.Audio(interactive=True, mirror_webcam=False, type="filepath") 25 | def fn_upload_org_audio(a): 26 | return a 27 | input_audio.upload(fn_upload_org_audio, input_audio, input_audio_path) 28 | gr.HTML(value="

\ 29 | If you have trouble entering the audio file path manually, you can also use drag and drop.\ 30 |

") 31 | 32 | with gr.Row(): 33 | with gr.Tabs(elem_id="lmsw_settings"): 34 | with gr.TabItem('Wave List Generator'): 35 | 36 | with gr.Accordion(label="Input", open=True): 37 | wave_bpm = gr.Slider(minimum=1, maximum=300, step=0.01, label='BPM', value=120.0) 38 | 39 | wave_beat_per_wave = gr.Slider(minimum=1, maximum=32, step=1, label='Beat Per Wave', value=4) 40 | 41 | wave_start_msec = gr.Number(value=0, label="Start Time (millisecond)", precision=0, interactive=True) 42 | wave_end_msec = gr.Number(value=5000, label="End Time (millisecond)", precision=0, interactive=True) 43 | 44 | sels = get_wave_type_list() 45 | wave_default_type = gr.Radio(label='Default Wave Type', choices=sels, value=sels[2], type="value") 46 | wave_default_strength = gr.Slider(minimum=0, maximum=3.0, step=0.1, label='Default Wave Strength', value=1.0) 47 | 48 | with gr.Row(): 49 | wave_generate_btn = gr.Button('Generate', variant='primary') 50 | 51 | with gr.Accordion(label="Result", open=True): 52 | with gr.Row(): 53 | wave_list_txt = gr.Textbox(label='Wave List', lines=30, interactive=True) 54 | with gr.Row(): 55 | test_generate_btn = gr.Button('Generate Test Audio', variant='primary') 56 | send_to_extract_btn = gr.Button('Send to Frame Extract', variant='primary') 57 | with gr.Row(): 58 | prompt_test_txt = gr.Textbox(label='Input Prompt you want to test(Extend Prompt format)', lines=5, interactive=True) 59 | with gr.Row(): 60 | prompt_test_btn = gr.Button('Prompt Test', variant='primary') 61 | 62 | with gr.TabItem('Frame List To Wave List'): 63 | with gr.Accordion(label="Input", open=True): 64 | f2w_fps = gr.Slider(minimum=1, maximum=240, step=1, label='FPS including interpolated frames', value=24) 65 | gr.HTML(value="

\ 66 | [Frames per second] x [Optical Flow Settings -> Interpolation Multiplier] = FPS including interpolated frames \ 67 |

") 68 | sels = get_wave_type_list() 69 | f2w_default_type = gr.Radio(label='Default Wave Type', choices=sels, value=sels[2], type="value") 70 | f2w_default_strength = gr.Slider(minimum=0, maximum=3.0, step=0.1, label='Default Wave Strength', value=1.0) 71 | with gr.Row(): 72 | f2w_frame_list_txt = gr.Textbox(label='Frame List', lines=5, interactive=True) 73 | gr.HTML(value="

\ 74 | Example: If you want the 160th, 354th, 1125th, and 1650th frames to be the beginning of the wave in a video that has 2000 frames in total when calculated at the fps specified above \ 75 |
\ 76 | 160,354,1125,1650,2000\ 77 |

") 78 | with gr.Row(): 79 | f2w_generate_btn = gr.Button('Generate', variant='primary') 80 | 81 | with gr.Accordion(label="Result", open=True): 82 | with gr.Row(): 83 | f2w_wave_list_txt = gr.Textbox(label='Wave List', lines=30, interactive=True) 84 | 85 | 86 | with gr.Tabs(elem_id="lmsw_settings2"): 87 | with gr.TabItem('Audio Analyzer'): 88 | 89 | with gr.Accordion(label="Input", open=True): 90 | aa_offset = gr.Slider(minimum=-1000, maximum=1000, step=1, label='Offset Time', value=0) 91 | 92 | 93 | with gr.Accordion(label="Advanced Settings", open=False): 94 | aa_band_min = gr.Slider(minimum=-1, maximum=128, step=1, label='Min Band', value=-1) 95 | aa_band_max = gr.Slider(minimum=-1, maximum=128, step=1, label='Max Band', value=-1) 96 | aa_hpss_type = gr.Radio(label='HPSS', choices=["none","H","P"], value="none", type="index") 97 | aa_onset_th = gr.Slider(minimum=0.01, maximum=1.0, step=0.01, label='Onset Threshold', value=0.07) 98 | aa_is_backtrack = gr.Checkbox(label='Get Backtracked Onsets', value=True) 99 | 100 | with gr.Row(): 101 | aa_generate_btn = gr.Button('Run', variant='primary') 102 | 103 | with gr.Accordion(label="Result", open=True): 104 | aa_bpm = gr.Slider(minimum=1, maximum=300, step=0.5, label='BPM', value=1, interactive=False) 105 | aa_length_msec = gr.Number(value=0, label="End Time (millisecond)", precision=0, interactive=False) 106 | 107 | with gr.Row(): 108 | aa_list_txt = gr.Textbox(label='Wave List(Generated from Onset Time)', lines=30, interactive=True) 109 | with gr.Row(): 110 | merge_start = gr.Number(value=0, label="Merge Start Time (millisecond)", precision=0, interactive=True) 111 | merge_end = gr.Number(value=0, label="Merge End Time (millisecond)", precision=0, interactive=True) 112 | merge_wave_btn = gr.Button('Merge Wave List', variant='primary') 113 | with gr.Row(): 114 | test_generate_btn2 = gr.Button('Generate Test Audio', variant='primary') 115 | 116 | 117 | with gr.Column(variant='panel'): 118 | test_result_audio = gr.Audio(interactive=False, mirror_webcam=False, type="filepath") 119 | 120 | wave_plt = gr.Plot(elem_id='lmsw_wave_plot') 121 | html_info = gr.HTML(visible=False) 122 | 123 | with gr.TabItem('Frame Extract'): 124 | with gr.Accordion(label="Input", open=True): 125 | fe_project_dir = gr.Textbox(label='Project directory', lines=1) 126 | fe_movie_path = gr.Textbox(label='Movie Path', lines=1) 127 | 128 | fe_video = gr.Video(interactive=True, mirror_webcam=False) 129 | def fn_upload_org_video(video): 130 | return video 131 | fe_video.upload(fn_upload_org_video, fe_video, fe_movie_path) 132 | gr.HTML(value="

\ 133 | If you have trouble entering the video path manually, you can also use drag and drop.For large videos, please enter the path manually. \ 134 |

") 135 | 136 | fe_ffmpeg_path = gr.Textbox(label="ffmpeg binary. Only set this if it fails otherwise.", lines=1, value="") 137 | 138 | with gr.Tabs(): 139 | with gr.TabItem('Extract first frame'): 140 | fe_fps = gr.Slider(minimum=-1, maximum=240, step=1, label='FPS including interpolated frames(Optional)', value=-1) 141 | with gr.Row(): 142 | extract_one_btn = gr.Button('Extract', variant='primary') 143 | with gr.TabItem('Frame Extract For Initial image switching per wave'): 144 | with gr.Row(): 145 | per_wave_extract_list_txt = gr.Textbox(label='Wave List', lines=30, interactive=True) 146 | with gr.Row(): 147 | per_wave_extract_btn = gr.Button('Extract', variant='primary') 148 | with gr.TabItem('Extract Scene Change frame'): 149 | sc_fe_fps = gr.Slider(minimum=1, maximum=240, step=1, label='FPS including interpolated frames', value=24) 150 | sc_use_optical_flow_cache = gr.Checkbox(label='Use Optical Flow Cache', value=True) 151 | sc_flow_occ_detect_th = gr.Slider(minimum=0.1, maximum=5.0, step=0.01, label='Occlusion area detection threshold.', value=1.0) 152 | sc_sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Scene Detection threshold', value=0.85) 153 | with gr.Row(): 154 | sc_extract_btn = gr.Button('Extract', variant='primary') 155 | 156 | 157 | def send_to_extract(list_txt): 158 | return list_txt 159 | 160 | send_to_extract_btn.click(fn=send_to_extract, inputs=wave_list_txt, outputs=per_wave_extract_list_txt) 161 | 162 | def merge_btn_func(org_list,add_list,start,end): 163 | start,end = min(start,end),max(start,end) 164 | a = str_to_wave_list(org_list) 165 | b = str_to_wave_list(add_list) 166 | c = merge_wave_list(a,b,start,end) 167 | return wave_list_to_str(c) 168 | 169 | merge_wave_btn.click(fn=merge_btn_func, inputs=[wave_list_txt, aa_list_txt, merge_start, merge_end], outputs=wave_list_txt) 170 | 171 | wave_gen_args = dict( 172 | fn=wrap_gradio_gpu_call(wave_generator_process), 173 | inputs=[ 174 | wave_bpm, 175 | wave_beat_per_wave, 176 | wave_start_msec, 177 | wave_end_msec, 178 | wave_default_type, 179 | wave_default_strength, 180 | 181 | ], 182 | outputs=[ 183 | wave_list_txt, 184 | wave_plt, 185 | html_info 186 | ], 187 | show_progress=False, 188 | ) 189 | wave_generate_btn.click(**wave_gen_args) 190 | 191 | aa_gen_args = dict( 192 | fn=wrap_gradio_gpu_call(audio_analyzer_process), 193 | inputs=[ 194 | input_audio_path, 195 | aa_offset, 196 | aa_band_min, 197 | aa_band_max, 198 | aa_hpss_type, 199 | aa_onset_th, 200 | wave_default_type, 201 | wave_default_strength, 202 | aa_is_backtrack, 203 | ], 204 | outputs=[ 205 | aa_bpm, 206 | aa_length_msec, 207 | aa_list_txt, 208 | wave_plt, 209 | html_info 210 | ], 211 | show_progress=False, 212 | ) 213 | aa_generate_btn.click(**aa_gen_args) 214 | 215 | test_gen_args = dict( 216 | fn=wrap_gradio_gpu_call(wave_list_test_process), 217 | inputs=[ 218 | input_audio_path, 219 | wave_list_txt, 220 | ], 221 | outputs=[ 222 | test_result_audio, 223 | html_info 224 | ], 225 | show_progress=False, 226 | ) 227 | test_generate_btn.click(**test_gen_args) 228 | 229 | test_gen2_args = dict( 230 | fn=wrap_gradio_gpu_call(wave_list_test_process), 231 | inputs=[ 232 | input_audio_path, 233 | aa_list_txt, 234 | ], 235 | outputs=[ 236 | test_result_audio, 237 | html_info 238 | ], 239 | show_progress=False, 240 | ) 241 | test_generate_btn2.click(**test_gen2_args) 242 | 243 | f2w_gen_args = dict( 244 | fn=wrap_gradio_gpu_call(f2w_generator_process), 245 | inputs=[ 246 | f2w_fps, 247 | f2w_default_type, 248 | f2w_default_strength, 249 | f2w_frame_list_txt, 250 | ], 251 | outputs=[ 252 | f2w_wave_list_txt, 253 | html_info 254 | ], 255 | show_progress=False, 256 | ) 257 | f2w_generate_btn.click(**f2w_gen_args) 258 | 259 | 260 | fe_one_gen_args = dict( 261 | fn=wrap_gradio_gpu_call(frame_extract_one), 262 | inputs=[ 263 | fe_project_dir, 264 | fe_movie_path, 265 | fe_ffmpeg_path, 266 | fe_fps, 267 | ], 268 | outputs=[ 269 | html_info 270 | ], 271 | show_progress=False, 272 | ) 273 | extract_one_btn.click(**fe_one_gen_args) 274 | 275 | fe_per_wave_gen_args = dict( 276 | fn=wrap_gradio_gpu_call(frame_extract_per_wave), 277 | inputs=[ 278 | fe_project_dir, 279 | fe_movie_path, 280 | fe_ffmpeg_path, 281 | per_wave_extract_list_txt, 282 | ], 283 | outputs=[ 284 | html_info 285 | ], 286 | show_progress=False, 287 | ) 288 | per_wave_extract_btn.click(**fe_per_wave_gen_args) 289 | 290 | sc_fe_gen_args = dict( 291 | fn=wrap_gradio_gpu_call(frame_extract_scene_change), 292 | inputs=[ 293 | fe_project_dir, 294 | fe_movie_path, 295 | fe_ffmpeg_path, 296 | sc_fe_fps, 297 | sc_use_optical_flow_cache, 298 | sc_flow_occ_detect_th, 299 | sc_sd_threshold, 300 | ], 301 | outputs=[ 302 | html_info 303 | ], 304 | show_progress=False, 305 | ) 306 | sc_extract_btn.click(**sc_fe_gen_args) 307 | 308 | prompt_test_args = dict( 309 | fn=wrap_gradio_gpu_call(prompt_test_process), 310 | inputs=[ 311 | wave_list_txt, 312 | prompt_test_txt, 313 | ], 314 | outputs=[ 315 | wave_plt, 316 | html_info 317 | ], 318 | show_progress=False, 319 | ) 320 | prompt_test_btn.click(**prompt_test_args) 321 | 322 | return (wave_generation_interface, "Loopback Music Sync Wave", "wave_generation_interface"), 323 | 324 | 325 | script_callbacks.on_ui_tabs(on_ui_tabs) 326 | 327 | -------------------------------------------------------------------------------- /scripts/util_sd_loopback_music_sync_wave/sync_effect.py: -------------------------------------------------------------------------------- 1 | import re 2 | from scripts.util_sd_loopback_music_sync_wave import perlin 3 | from scripts.util_sd_loopback_music_sync_wave.regex import create_regex, create_regex_text 4 | 5 | def quadratic_generator(init,top,count): 6 | count = int(count) 7 | b = 4*(top - init)/count 8 | a = -1*b / count 9 | c = init 10 | 11 | i = 0 12 | while count > i: 13 | i += 1 14 | # y = ax^2 + bx + c 15 | v = a * i*i + b*i + c 16 | 17 | yield v 18 | 19 | def warped_quadratic_generator(init,top,count,ratio): 20 | first_half = max(1, int(count * ratio)) 21 | second_half = max(1, int(count - first_half)) 22 | 23 | lst = list(quadratic_generator(init, top, first_half*2)) 24 | for i in range(first_half): 25 | yield lst[i] 26 | 27 | lst = list(quadratic_generator(top, init, second_half*2)) 28 | for i in range(second_half): 29 | yield lst[i] 30 | 31 | 32 | def pendulum_generator(init,first,second,count): 33 | first_half = max(1, int(count * 0.5)) 34 | second_half = max(1, int(count - first_half)) 35 | 36 | lst = list(quadratic_generator(init, first, first_half)) 37 | for i in range(first_half): 38 | yield lst[i] 39 | 40 | lst = list(quadratic_generator(init, second, second_half)) 41 | for i in range(second_half): 42 | yield lst[i] 43 | 44 | def shake_generator(init, top, count, attenuation_rate=0.7): 45 | count = int(count) 46 | cur = top 47 | for i in range(count-1): 48 | yield cur + init 49 | cur *= -1 * attenuation_rate 50 | 51 | yield init 52 | 53 | def pos_2_vel(pos_list, init): 54 | vel_list = [] 55 | 56 | cur = init 57 | for pos in pos_list: 58 | vel = pos - cur 59 | vel_list.append(vel) 60 | cur = pos 61 | 62 | return vel_list 63 | 64 | def norm_pnf(pnf): 65 | return abs( (pnf + 0.866) / (0.866*2) ) 66 | def norm_pnf2(pnf): 67 | return pnf / 0.866 68 | 69 | 70 | ######################### 71 | 72 | def shake_x_generator(frames, fps, _, amp): 73 | if frames < 2: 74 | yield -1 75 | 76 | amp *= fps 77 | 78 | gen = shake_generator(0,amp,frames) 79 | result_pos = list(gen) 80 | result_vel = pos_2_vel(result_pos, 0) 81 | 82 | print("shake_x : ",result_vel) 83 | 84 | for r in result_vel: 85 | yield f"#vel_x({r:.3f})" 86 | yield -1 87 | 88 | 89 | def shake_y_generator(frames, fps, _, amp): 90 | if frames < 2: 91 | yield -1 92 | 93 | amp *= fps 94 | 95 | gen = shake_generator(0,amp,frames) 96 | result_pos = list(gen) 97 | result_vel = pos_2_vel(result_pos, 0) 98 | 99 | print("shake_y : ",result_vel) 100 | 101 | for r in result_vel: 102 | yield f"#vel_y({r:.3f})" 103 | yield -1 104 | 105 | def shake_rot_generator(frames, fps, _, amp): 106 | if frames < 2: 107 | yield -1 108 | 109 | amp *= fps 110 | 111 | gen = shake_generator(0,amp,frames) 112 | result_pos = list(gen) 113 | result_vel = pos_2_vel(result_pos, 0) 114 | 115 | print("shake_rot : ",result_vel) 116 | 117 | for r in result_vel: 118 | yield f"#rot({r:.3f})" 119 | yield -1 120 | 121 | def shake_rot_x_generator(frames, fps, _, amp): 122 | if frames < 2: 123 | yield -1 124 | 125 | amp *= fps 126 | 127 | gen = shake_generator(0,amp,frames) 128 | result_pos = list(gen) 129 | result_vel = pos_2_vel(result_pos, 0) 130 | 131 | print("shake_rot_x : ",result_vel) 132 | 133 | for r in result_vel: 134 | yield f"#rot_x({r:.3f})" 135 | yield -1 136 | 137 | def shake_rot_y_generator(frames, fps, _, amp): 138 | if frames < 2: 139 | yield -1 140 | 141 | amp *= fps 142 | 143 | gen = shake_generator(0,amp,frames) 144 | result_pos = list(gen) 145 | result_vel = pos_2_vel(result_pos, 0) 146 | 147 | print("shake_rot_Y : ",result_vel) 148 | 149 | for r in result_vel: 150 | yield f"#rot_y({r:.3f})" 151 | yield -1 152 | 153 | def shake_zoom_generator(frames, fps, _, amp): 154 | if frames < 2: 155 | yield -1 156 | 157 | amp = (amp-1)*fps + 1 158 | 159 | gen = shake_generator(0,amp-1.0,frames) 160 | result_pos = list(gen) 161 | result_vel = pos_2_vel(result_pos, 0) 162 | 163 | result_vel = [x+1.0 for x in result_vel] 164 | 165 | print("shake_zoom : ",result_vel) 166 | 167 | for r in result_vel: 168 | yield f"#zoom({r:.3f})" 169 | yield -1 170 | 171 | def vibration_generator(frames, fps, _, max): 172 | if frames < 2: 173 | yield -1 174 | 175 | max = (max-1)*fps + 1 176 | 177 | gen = quadratic_generator(0, max - 1.0, frames) 178 | 179 | result_pos = list(gen) 180 | result_vel = pos_2_vel(result_pos, 0) 181 | result_vel = [x+1.0 for x in result_vel] 182 | 183 | print("vibration : ",result_vel) 184 | 185 | for r in result_vel: 186 | yield f"#zoom({r:.3f})" 187 | yield -1 188 | 189 | def random_xy_generator(frames, fps, _, amp_x, amp_y, resolution_msec): 190 | amp_x *= fps 191 | amp_y *= fps 192 | 193 | resolution = int(resolution_msec * fps / 1000) 194 | pnf = perlin.PerlinNoiseFactory(2, octaves=4, tile=(frames//resolution, frames//resolution), unbias = True) 195 | 196 | result_pos = [norm_pnf2(pnf(0, x/resolution)) for x in range(frames)] 197 | result_vel = pos_2_vel(result_pos, 0) 198 | result_pos2 = [norm_pnf2(pnf(x/resolution,0)) for x in range(frames)] 199 | result_vel2 = pos_2_vel(result_pos2, 0) 200 | 201 | print("random_xy : ",result_vel) 202 | print("random_xy : ",result_vel2) 203 | 204 | for x, y in zip( result_vel, result_vel2): 205 | yield f"#vel_x({x * amp_x:.3f}),#vel_y({y * amp_y:.3f})" 206 | yield -1 207 | 208 | def random_z_generator(frames, fps, _, amp_z, resolution_msec): 209 | amp_z = (amp_z-1)*fps + 1 210 | 211 | resolution = int(resolution_msec * fps / 1000) 212 | pnf = perlin.PerlinNoiseFactory(1, octaves=4, tile=(frames//resolution,), unbias = True) 213 | 214 | result_pos = [norm_pnf2(pnf(x/resolution)) for x in range(frames)] 215 | result_vel = pos_2_vel(result_pos, 0) 216 | 217 | print("random_z : ",result_vel) 218 | 219 | for z in result_vel: 220 | yield f"#zoom({z * (amp_z-1) + 1:.3f})" 221 | yield -1 222 | 223 | def random_rot_generator(frames, fps, _, amp_r, resolution_msec): 224 | amp_r *= fps 225 | 226 | resolution = int(resolution_msec * fps / 1000) 227 | pnf = perlin.PerlinNoiseFactory(1, octaves=4, tile=(frames//resolution,), unbias = True) 228 | 229 | result_pos = [norm_pnf2(pnf(x/resolution)) for x in range(frames)] 230 | result_vel = pos_2_vel(result_pos, 0) 231 | 232 | print("random_rot : ",result_vel) 233 | 234 | for r in result_vel: 235 | yield f"#rot({amp_r * r:.3f})" 236 | yield -1 237 | 238 | def random_rot_x_generator(frames, fps, _, amp_r, resolution_msec): 239 | amp_r *= fps 240 | 241 | resolution = int(resolution_msec * fps / 1000) 242 | pnf = perlin.PerlinNoiseFactory(1, octaves=4, tile=(frames//resolution,), unbias = True) 243 | 244 | result_pos = [norm_pnf2(pnf(x/resolution)) for x in range(frames)] 245 | result_vel = pos_2_vel(result_pos, 0) 246 | 247 | print("random_rot_x : ",result_vel) 248 | 249 | for r in result_vel: 250 | yield f"#rot_x({amp_r * r:.3f})" 251 | yield -1 252 | 253 | def random_rot_y_generator(frames, fps, _, amp_r, resolution_msec): 254 | amp_r *= fps 255 | 256 | resolution = int(resolution_msec * fps / 1000) 257 | pnf = perlin.PerlinNoiseFactory(1, octaves=4, tile=(frames//resolution,), unbias = True) 258 | 259 | result_pos = [norm_pnf2(pnf(x/resolution)) for x in range(frames)] 260 | result_vel = pos_2_vel(result_pos, 0) 261 | 262 | print("random_rot_y : ",result_vel) 263 | 264 | for r in result_vel: 265 | yield f"#rot_y({amp_r * r:.3f})" 266 | yield -1 267 | 268 | 269 | def random_c_generator(frames, fps, _, amp_x, amp_y, cx, cy, resolution_msec): 270 | resolution = int(resolution_msec * fps / 1000) 271 | pnf = perlin.PerlinNoiseFactory(2, octaves=4, tile=(frames//resolution, frames//resolution), unbias = True) 272 | 273 | result_pos = [norm_pnf2(pnf(0, x/resolution)) for x in range(frames)] 274 | # result_vel = pos_2_vel(result_pos, 0) 275 | result_pos2 = [norm_pnf2(pnf(x/resolution,0)) for x in range(frames)] 276 | # result_vel2 = pos_2_vel(result_pos2, 0) 277 | 278 | # print("random_c : ",result_vel) 279 | # print("random_c : ",result_vel2) 280 | print("random_c : ",result_pos) 281 | print("random_c : ",result_pos2) 282 | 283 | for x, y in zip( result_pos, result_pos2): 284 | yield f"#center({cx + x * amp_x:.3f},{cy + y * amp_y:.3f})" 285 | yield -1 286 | 287 | 288 | def pendulum_xy_generator(frames, fps, _, x1, x2, y1, y2): 289 | x1 *= fps 290 | x2 *= fps 291 | y1 *= fps 292 | y2 *= fps 293 | 294 | gen = pendulum_generator( 0 ,x1,x2,frames) 295 | result_pos = list(gen) 296 | result_vel = pos_2_vel(result_pos, 0) 297 | gen = pendulum_generator( 0, y1,y2,frames) 298 | result_pos2 = list(gen) 299 | result_vel2 = pos_2_vel(result_pos2, 0) 300 | 301 | print("pendulum_xy : ",result_vel) 302 | print("pendulum_xy : ",result_vel2) 303 | 304 | yield "" # first frame 305 | 306 | for x, y in zip( result_vel, result_vel2): 307 | yield f"#vel_x({x:.3f}),#vel_y({y:.3f})" 308 | yield -1 309 | 310 | def pendulum_rot_generator(frames, fps, _, angle1, angle2): 311 | angle1 *= fps 312 | angle2 *= fps 313 | 314 | gen = pendulum_generator(0,angle1,angle2,frames) 315 | result_pos = list(gen) 316 | result_vel = pos_2_vel(result_pos, 0) 317 | 318 | print("pendulum_rot : ",result_vel) 319 | 320 | yield "" # first frame 321 | 322 | for r in result_vel: 323 | yield f"#rot({r:.3f})" 324 | yield -1 325 | 326 | def pendulum_rot_x_generator(frames, fps, _, angle1, angle2): 327 | angle1 *= fps 328 | angle2 *= fps 329 | 330 | gen = pendulum_generator(0,angle1,angle2,frames) 331 | result_pos = list(gen) 332 | result_vel = pos_2_vel(result_pos, 0) 333 | 334 | print("pendulum_rot_x : ",result_vel) 335 | 336 | yield "" # first frame 337 | 338 | for r in result_vel: 339 | yield f"#rot_x({r:.3f})" 340 | yield -1 341 | 342 | def pendulum_rot_y_generator(frames, fps, _, angle1, angle2): 343 | angle1 *= fps 344 | angle2 *= fps 345 | 346 | gen = pendulum_generator(0,angle1,angle2,frames) 347 | result_pos = list(gen) 348 | result_vel = pos_2_vel(result_pos, 0) 349 | 350 | print("pendulum_rot_y : ",result_vel) 351 | 352 | yield "" # first frame 353 | 354 | for r in result_vel: 355 | yield f"#rot_y({r:.3f})" 356 | yield -1 357 | 358 | def pendulum_zoom_generator(frames, fps, _, z1, z2): 359 | z1 = (z1-1)*fps + 1 360 | z2 = (z2-1)*fps + 1 361 | 362 | gen = pendulum_generator(0,z1-1.0,z2-1.0,frames) 363 | result_pos = list(gen) 364 | result_vel = pos_2_vel(result_pos, 0) 365 | 366 | result_vel = [x+1.0 for x in result_vel] 367 | 368 | print("pendulum_zoom : ",result_vel) 369 | 370 | yield "" # first frame 371 | 372 | for r in result_vel: 373 | yield f"#zoom({r:.3f})" 374 | yield -1 375 | 376 | def pendulum_center_generator(frames, fps, _, cx1, cx2, cy1, cy2): 377 | 378 | gen = pendulum_generator((cx1+cx2)/2,cx1,cx2,frames) 379 | result_pos = list(gen) 380 | gen = pendulum_generator((cy1+cy2)/2,cy1,cy2,frames) 381 | result_pos2 = list(gen) 382 | 383 | print("pendulum_center : ",result_pos) 384 | print("pendulum_center : ",result_pos2) 385 | 386 | yield "" # first frame 387 | 388 | for cx, cy in zip( result_pos, result_pos2): 389 | yield f"#center({cx:.3f},{cy:.3f})" 390 | yield -1 391 | 392 | 393 | def beat_blur_generator(frames, fps, _, amp_str): 394 | 395 | gen = warped_quadratic_generator(0, amp_str, frames, 0.1) 396 | result_pos = list(gen) 397 | 398 | print("beat_blur : ",result_pos) 399 | 400 | yield "" # first frame 401 | 402 | for s in result_pos: 403 | yield f"#blur({s:.3f})" 404 | yield -1 405 | 406 | def random_blur_generator(frames, fps, _, amp_str, resolution_msec): 407 | 408 | resolution = int(resolution_msec * fps / 1000) 409 | pnf = perlin.PerlinNoiseFactory(1, octaves=4, tile=(frames//resolution,), unbias = True) 410 | 411 | result_pos = [norm_pnf2(pnf(x/resolution)) for x in range(frames)] 412 | 413 | print("random_blur : ",result_pos) 414 | 415 | for r in result_pos: 416 | yield f"#blur({amp_str * r:.3f})" 417 | yield -1 418 | 419 | def pendulum_hue_generator(frames, fps, _,type,angle1,angle2): 420 | 421 | gen = pendulum_generator((angle1+angle2)/2, angle1, angle2, frames) 422 | result_pos = list(gen) 423 | 424 | print("pendulum_hue : ",result_pos) 425 | 426 | yield "" # first frame 427 | 428 | for s in result_pos: 429 | yield f"#hue({type:.3f},{s:.3f})" 430 | yield -1 431 | 432 | def random_hue_generator(frames, fps, _, type, start_angle, amp_angle, resolution_msec): 433 | 434 | resolution = int(resolution_msec * fps / 1000) 435 | pnf = perlin.PerlinNoiseFactory(1, octaves=4, tile=(frames//resolution,), unbias = True) 436 | 437 | result_pos = [norm_pnf2(pnf(x/resolution)) for x in range(frames)] 438 | 439 | print("random_hue : ",result_pos) 440 | 441 | for s in result_pos: 442 | yield f"#hue({type:.3f},{start_angle + s * amp_angle:.3f})" 443 | yield -1 444 | 445 | def beat_slide_x_generator(frames, fps, _, type, amp_slide_val, border_pos, amp_border): 446 | 447 | amp_slide_val *= fps 448 | 449 | gen = warped_quadratic_generator(0, amp_slide_val, frames, 0.1) 450 | result_pos = list(gen) 451 | result_vel = pos_2_vel(result_pos, 0) 452 | 453 | gen = warped_quadratic_generator(0, amp_border, frames, 0.1) 454 | result_pos2 = list(gen) 455 | 456 | print("beat_slide_x : ",result_vel) 457 | print("beat_slide_x : ",result_pos2) 458 | 459 | for x, y in zip( result_vel, result_pos2): 460 | yield f"#slide_x({type:.3f},{x:.3f},{border_pos + y:.3f})" 461 | yield -1 462 | 463 | def beat_slide_y_generator(frames, fps, _, type, amp_slide_val, border_pos, amp_border): 464 | 465 | amp_slide_val *= fps 466 | 467 | gen = warped_quadratic_generator(0, amp_slide_val, frames, 0.1) 468 | result_pos = list(gen) 469 | result_vel = pos_2_vel(result_pos, 0) 470 | 471 | gen = warped_quadratic_generator(0, amp_border, frames, 0.1) 472 | result_pos2 = list(gen) 473 | 474 | print("beat_slide_y : ",result_vel) 475 | print("beat_slide_y : ",result_pos2) 476 | 477 | for x, y in zip( result_vel, result_pos2): 478 | yield f"#slide_y({type:.3f},{x:.3f},{border_pos + y:.3f})" 479 | yield -1 480 | 481 | 482 | def random_slide_x_generator(frames, fps, _, type, amp_slide_val, border_pos, amp_border, resolution_msec): 483 | amp_slide_val *= fps 484 | 485 | resolution = int(resolution_msec * fps / 1000) 486 | pnf = perlin.PerlinNoiseFactory(2, octaves=4, tile=(frames//resolution, frames//resolution), unbias = True) 487 | 488 | result_pos = [norm_pnf2(pnf(0, x/resolution)) for x in range(frames)] 489 | result_vel = pos_2_vel(result_pos, 0) 490 | result_pos2 = [norm_pnf2(pnf(x/resolution,0)) for x in range(frames)] 491 | 492 | print("random_slide_x : ",result_vel) 493 | print("random_slide_x : ",result_pos2) 494 | 495 | for x, y in zip( result_vel, result_pos2): 496 | yield f"#slide_x({type:.3f},{x * amp_slide_val:.3f},{border_pos + y*amp_border:.3f})" 497 | yield -1 498 | 499 | def random_slide_y_generator(frames, fps, _, type, amp_slide_val, border_pos, amp_border, resolution_msec): 500 | amp_slide_val *= fps 501 | 502 | resolution = int(resolution_msec * fps / 1000) 503 | pnf = perlin.PerlinNoiseFactory(2, octaves=4, tile=(frames//resolution, frames//resolution), unbias = True) 504 | 505 | result_pos = [norm_pnf2(pnf(0, x/resolution)) for x in range(frames)] 506 | result_vel = pos_2_vel(result_pos, 0) 507 | result_pos2 = [norm_pnf2(pnf(x/resolution,0)) for x in range(frames)] 508 | 509 | print("random_slide_y : ",result_vel) 510 | print("random_slide_y : ",result_pos2) 511 | 512 | for x, y in zip( result_vel, result_pos2): 513 | yield f"#slide_y({type:.3f},{x * amp_slide_val:.3f},{border_pos + y*amp_border:.3f})" 514 | yield -1 515 | 516 | def inpaint_generator(frames, fps, _, mask_prompt, inpaint_prompt): 517 | 518 | print("inpaint mask_prompt : ",mask_prompt) 519 | print("inpaint inpaint_prompt : ",inpaint_prompt) 520 | 521 | if inpaint_prompt: 522 | yield f"#__inpaint(\"{mask_prompt}\",\"{inpaint_prompt}\")" 523 | else: 524 | yield f"#__inpaint(\"{mask_prompt}\")" 525 | 526 | yield -1 527 | 528 | 529 | effect_map={ 530 | "shake_x":shake_x_generator, 531 | "shake_y":shake_y_generator, 532 | "shake_rot":shake_rot_generator, 533 | "shake_rot_x":shake_rot_x_generator, 534 | "shake_rot_y":shake_rot_y_generator, 535 | "shake_zoom":shake_zoom_generator, 536 | "vibration":vibration_generator, 537 | "random_xy":random_xy_generator, 538 | "random_z":random_z_generator, 539 | "random_rot":random_rot_generator, 540 | "random_rot_x":random_rot_x_generator, 541 | "random_rot_y":random_rot_y_generator, 542 | "random_c":random_c_generator, 543 | "pendulum_xy":pendulum_xy_generator, 544 | "pendulum_rot":pendulum_rot_generator, 545 | "pendulum_rot_x":pendulum_rot_x_generator, 546 | "pendulum_rot_y":pendulum_rot_y_generator, 547 | "pendulum_zoom":pendulum_zoom_generator, 548 | "pendulum_center":pendulum_center_generator, 549 | 550 | "beat_blur":beat_blur_generator, 551 | "random_blur":random_blur_generator, 552 | 553 | "pendulum_hue":pendulum_hue_generator, 554 | "random_hue":random_hue_generator, 555 | 556 | "beat_slide_x":beat_slide_x_generator, 557 | "beat_slide_y":beat_slide_y_generator, 558 | "random_slide_x":random_slide_x_generator, 559 | "random_slide_y":random_slide_y_generator, 560 | 561 | "inpaint":inpaint_generator, 562 | } 563 | 564 | 565 | 566 | # $func 567 | shake_x_regex = create_regex(r'\$','shake_x', 2) 568 | shake_y_regex = create_regex(r'\$','shake_y', 2) 569 | shake_rot_regex = create_regex(r'\$','shake_rot', 2) 570 | shake_rot_x_regex = create_regex(r'\$','shake_rot_x', 2) 571 | shake_rot_y_regex = create_regex(r'\$','shake_rot_y', 2) 572 | shake_zoom_regex = create_regex(r'\$','shake_zoom', 2) 573 | 574 | vibration_regex = create_regex(r'\$','vibration', 2) 575 | 576 | random_xy_regex = create_regex(r'\$','random_xy', 3,1) 577 | random_z_regex = create_regex(r'\$','random_zoom', 2,1) 578 | random_rot_regex = create_regex(r'\$','random_rot', 2,1) 579 | random_rot_x_regex = create_regex(r'\$','random_rot_x', 2,1) 580 | random_rot_y_regex = create_regex(r'\$','random_rot_y', 2,1) 581 | random_c_regex = create_regex(r'\$','random_center', 3,3) 582 | 583 | pendulum_xy_regex = create_regex(r'\$','pendulum_xy', 5) 584 | pendulum_rot_regex = create_regex(r'\$','pendulum_rot', 3) 585 | pendulum_rot_x_regex = create_regex(r'\$','pendulum_rot_x', 3) 586 | pendulum_rot_y_regex = create_regex(r'\$','pendulum_rot_y', 3) 587 | pendulum_zoom_regex = create_regex(r'\$','pendulum_zoom', 3) 588 | pendulum_center_regex = create_regex(r'\$','pendulum_center', 5) 589 | 590 | beat_blur_regex = create_regex(r'\$','beat_blur', 2) 591 | random_blur_regex = create_regex(r'\$','random_blur', 2,1) 592 | 593 | pendulum_hue_regex = create_regex(r'\$','pendulum_hue', 4) 594 | random_hue_regex = create_regex(r'\$','random_hue', 4,1) 595 | 596 | beat_slide_x_regex = create_regex(r'\$','beat_slide_x', 3,2) 597 | beat_slide_y_regex = create_regex(r'\$','beat_slide_y', 3,2) 598 | 599 | random_slide_x_regex = create_regex(r'\$','random_slide_x', 3,3) 600 | random_slide_y_regex = create_regex(r'\$','random_slide_y', 3,3) 601 | 602 | 603 | inpaint_regex = create_regex_text(r'\$',"inpaint",1,1) 604 | 605 | 606 | def effect_create(match_obj, eff, eff_name, default_vals): 607 | dur = 0 608 | # first token is duration 609 | if match_obj.group(1) is not None: 610 | dur = int(match_obj.group(1)) 611 | 612 | for i in range(1, len(match_obj.groups())): 613 | if match_obj.group(i+1) is not None: 614 | default_vals[i-1] = float(match_obj.group(i+1)) 615 | 616 | eff.add_effect( dur, eff_name, *default_vals) 617 | 618 | return "" 619 | 620 | 621 | def effect_create2(match_obj, eff, eff_name, default_vals): 622 | for i in range(0, len(match_obj.groups())): 623 | if match_obj.group(i+1) is not None: 624 | default_vals[i] = str(match_obj.group(i+1)) 625 | 626 | eff.add_effect2( eff_name, *default_vals) 627 | 628 | return "" 629 | 630 | 631 | # $shake_x(dur, amp) 632 | # $shake_y(dur, amp) 633 | # $shake_rot(dur, amp) 634 | # $shake_zoom(dur, amp) 635 | # $vibration(dur, amp) 636 | # $random_xy(dur, x_amp, y_amp) 637 | # $random_zoom(dur, z_amp) 638 | # $random_rot(dur, r_amp) 639 | # $random_center(dur, amp_x, amp_y, cx, cy) 640 | 641 | 642 | 643 | 644 | class SyncEffect: 645 | def __init__(self, fps): 646 | self.fps = fps 647 | self.effect_list = [] 648 | 649 | def add_effect(self, duration, *effect_param): 650 | frames = self.fps * duration / 1000 651 | self.effect_list.append(effect_map[effect_param[0]](int(frames), self.fps, *effect_param)) 652 | 653 | def add_effect2(self, *effect_param): 654 | self.effect_list.append(effect_map[effect_param[0]](int(1), self.fps, *effect_param)) 655 | 656 | def get_current_prompt(self): 657 | remove_index = [] 658 | prompt = "" 659 | 660 | for i, ef in enumerate( self.effect_list ): 661 | result = next(ef) 662 | if result == -1: 663 | remove_index.append(i) 664 | else: 665 | if result: 666 | if prompt: 667 | prompt += "," + result 668 | else: 669 | prompt += result 670 | 671 | for i in reversed(remove_index): 672 | self.effect_list.pop(i) 673 | 674 | print("effect prompt : ", prompt) 675 | return prompt 676 | 677 | 678 | 679 | 680 | def parse_prompt(self, prompt): 681 | 682 | # $shake_x(dur, amp) 683 | prompt = re.sub(shake_x_regex, lambda x: effect_create(x, self, "shake_x", [-1]), prompt) 684 | # $shake_y(dur, amp) 685 | prompt = re.sub(shake_y_regex, lambda x: effect_create(x, self, "shake_y", [-1]), prompt) 686 | # $shake_rot(dur, amp) 687 | prompt = re.sub(shake_rot_regex, lambda x: effect_create(x, self, "shake_rot", [-1]), prompt) 688 | # $shake_rot_x(dur, amp) 689 | prompt = re.sub(shake_rot_x_regex, lambda x: effect_create(x, self, "shake_rot_x", [-1]), prompt) 690 | # $shake_rot_y(dur, amp) 691 | prompt = re.sub(shake_rot_y_regex, lambda x: effect_create(x, self, "shake_rot_y", [-1]), prompt) 692 | # $shake_zoom(dur, amp) 693 | prompt = re.sub(shake_zoom_regex, lambda x: effect_create(x, self, "shake_zoom", [-1]), prompt) 694 | # $vibration(dur, amp) 695 | prompt = re.sub(vibration_regex, lambda x: effect_create(x, self, "vibration", [-1]), prompt) 696 | 697 | # $random_xy(dur, x_amp, y_amp, resolution_msec=1000) 698 | prompt = re.sub(random_xy_regex, lambda x: effect_create(x, self, "random_xy", [-1,-1,1000]), prompt) 699 | # $random_zoom(dur, z_amp, resolution_msec=1000) 700 | prompt = re.sub(random_z_regex, lambda x: effect_create(x, self, "random_z", [-1,1000]), prompt) 701 | # $random_rot(dur, r_amp, resolution_msec=1000) 702 | prompt = re.sub(random_rot_regex, lambda x: effect_create(x, self, "random_rot", [-1,1000]), prompt) 703 | # $random_rot_x(dur, r_amp, resolution_msec=1000) 704 | prompt = re.sub(random_rot_x_regex, lambda x: effect_create(x, self, "random_rot_x", [-1,1000]), prompt) 705 | # $random_rot_y(dur, r_amp, resolution_msec=1000) 706 | prompt = re.sub(random_rot_y_regex, lambda x: effect_create(x, self, "random_rot_y", [-1,1000]), prompt) 707 | # $random_center(dur, amp_x, amp_y, cx=0.5, cy=0.5, resolution_msec=1000 ) 708 | prompt = re.sub(random_c_regex, lambda x: effect_create(x, self, "random_c", [-1,-1,0.5,0.5,1000]), prompt) 709 | 710 | # $pendulum_xy(dur, x1, x2, y1, y2 ) 711 | prompt = re.sub(pendulum_xy_regex, lambda x: effect_create(x, self, "pendulum_xy", [-1,-1,-1,-1]), prompt) 712 | # $pendulum_rot(dur, angle1, angle2 ) 713 | prompt = re.sub(pendulum_rot_regex, lambda x: effect_create(x, self, "pendulum_rot", [-1,-1]), prompt) 714 | # $pendulum_rot_x(dur, angle1, angle2 ) 715 | prompt = re.sub(pendulum_rot_x_regex, lambda x: effect_create(x, self, "pendulum_rot_x", [-1,-1]), prompt) 716 | # $pendulum_rot_y(dur, angle1, angle2 ) 717 | prompt = re.sub(pendulum_rot_y_regex, lambda x: effect_create(x, self, "pendulum_rot_y", [-1,-1]), prompt) 718 | # $pendulum_zoom(dur, z1, z2 ) 719 | prompt = re.sub(pendulum_zoom_regex, lambda x: effect_create(x, self, "pendulum_zoom", [-1,-1]), prompt) 720 | # $pendulum_center(dur, cx1, cx2, cy1, cy2 ) 721 | prompt = re.sub(pendulum_center_regex, lambda x: effect_create(x, self, "pendulum_center", [-1,-1,-1,-1]), prompt) 722 | 723 | # $beat_blur(dur, amp) 724 | prompt = re.sub(beat_blur_regex, lambda x: effect_create(x, self, "beat_blur", [-1]), prompt) 725 | # $random_blur(dur, amp, resolution_msec=1000) 726 | prompt = re.sub(random_blur_regex, lambda x: effect_create(x, self, "random_blur", [-1,1000]), prompt) 727 | # $pendulum_hue(dur, type, angle1, angle2) 728 | prompt = re.sub(pendulum_hue_regex, lambda x: effect_create(x, self, "pendulum_hue", [-1,-1,-1]), prompt) 729 | # $random_hue(dur, type, start_angle, amp_angle, resolution_msec=1000) 730 | prompt = re.sub(random_hue_regex, lambda x: effect_create(x, self, "random_hue", [-1,-1,-1,1000]), prompt) 731 | 732 | # $beat_slide_x(dur, type, amp_slide_val, border_pos=0.5, amp_border=0) 733 | prompt = re.sub(beat_slide_x_regex, lambda x: effect_create(x, self, "beat_slide_x", [-1,-1,0.5,0]), prompt) 734 | # $beat_slide_y(dur, type, amp_slide_val, border_pos=0.5, amp_border=0) 735 | prompt = re.sub(beat_slide_y_regex, lambda x: effect_create(x, self, "beat_slide_y", [-1,-1,0.5,0]), prompt) 736 | # $random_slide_x(dur, type, amp_slide_val, border_pos=0.5, amp_border=0, resolution_msec=1000) 737 | prompt = re.sub(random_slide_x_regex, lambda x: effect_create(x, self, "random_slide_x", [-1,-1,0.5,0,1000]), prompt) 738 | # $random_slide_y(dur, type, amp_slide_val, border_pos=0.5, amp_border=0, resolution_msec=1000) 739 | prompt = re.sub(random_slide_y_regex, lambda x: effect_create(x, self, "random_slide_y", [-1,-1,0.5,0,1000]), prompt) 740 | 741 | # $inpaint(mask_prompt, inpaint_prompt) 742 | prompt = re.sub(inpaint_regex, lambda x: effect_create2(x, self, "inpaint", [-1,""]), prompt) 743 | 744 | 745 | return prompt 746 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | -------------------------------------------------------------------------------- /scripts/loopback_music_sync_wave.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import math 4 | import subprocess as sp 5 | import random 6 | import re 7 | import glob 8 | import time 9 | from PIL import Image 10 | import json 11 | import numpy as np 12 | import cv2 13 | import copy 14 | 15 | import modules.scripts 16 | import gradio as gr 17 | 18 | from modules import processing,images 19 | from modules.processing import Processed 20 | from modules.shared import opts, cmd_opts, state 21 | import modules.shared 22 | 23 | import scripts.util_sd_loopback_music_sync_wave.affine 24 | import scripts.util_sd_loopback_music_sync_wave.slide 25 | import scripts.util_sd_loopback_music_sync_wave.sync_effect 26 | import scripts.util_sd_loopback_music_sync_wave.bpm 27 | import scripts.util_sd_loopback_music_sync_wave.other_effect 28 | import scripts.util_sd_loopback_music_sync_wave.sam 29 | import scripts.util_sd_loopback_music_sync_wave.controlnet 30 | import scripts.util_sd_loopback_music_sync_wave.upscale 31 | from scripts.util_sd_loopback_music_sync_wave.regex import create_regex, create_regex_text 32 | import scripts.util_sd_loopback_music_sync_wave.raft 33 | 34 | skip_process_for_debug = False 35 | 36 | debug_c = 0 37 | 38 | def debug_save_img_array(img_array, comment): 39 | debug_save_img( Image.fromarray(img_array), comment) 40 | 41 | def debug_save_img(img:Image,comment): 42 | global debug_c 43 | img.save( f"scripts/testpngs/{debug_c}_{comment}.png") 44 | 45 | debug_c += 1 46 | 47 | 48 | 49 | 50 | # @func 51 | wave_completed_regex = create_regex(r'@','wave_completed',2) 52 | wave_remaining_regex = create_regex(r'@','wave_remaining',2) 53 | wave_amplitude_regex = create_regex(r'@','wave_amplitude',2) 54 | wave_shape_regex = create_regex(r'@','wave_shape',2) 55 | wave_progress_regex = create_regex(r'@','wave_progress',2) 56 | total_progress_regex = create_regex(r'@','total_progress',2) 57 | random_regex = create_regex(r'@','random',1,1) 58 | 59 | # #func 60 | vel_x_regex = create_regex(r'#','vel_x',1) 61 | vel_y_regex = create_regex(r'#','vel_y',1) 62 | rot_regex = create_regex(r'#','rot',1) 63 | zoom_regex = create_regex(r'#','zoom',1) 64 | center_regex = create_regex(r'#','center',2) 65 | rot_x_regex = create_regex(r'#','rot_x',1) 66 | rot_y_regex = create_regex(r'#','rot_y',1) 67 | 68 | blur_regex = create_regex(r'#','blur',1) 69 | hue_regex = create_regex(r'#','hue',2) 70 | 71 | inpaint_regex = create_regex_text(r'#','__inpaint',1,1) 72 | 73 | slide_x_regex = create_regex(r'#','slide_x',2,1) 74 | slide_y_regex = create_regex(r'#','slide_y',2,1) 75 | 76 | postprocess_regex = create_regex(r'#','post_process',1) 77 | 78 | 79 | # @@bpm[] 80 | # -> bpm.py 81 | 82 | # $func 83 | # -> sync_effect.py 84 | 85 | extend_prompt_range_regex = r'([0-9]+)\-([0-9]+)' 86 | #wild_card_regex = r'(?:\A|\W)__(\w+)__(?:\W|\Z)' 87 | wild_card_regex = r'(\A|\W)__([\w-]+)__(\W|\Z)' 88 | 89 | 90 | wave_func_map = { 91 | "zero": lambda p : 0, 92 | "one": lambda p : 1, 93 | "wave": lambda p : 1 - abs(math.cos(math.radians( (p + 0.5) * 180 ))), 94 | "wave2": lambda p : 1 - abs(math.cos(math.radians( p * 180 ))), 95 | "wave3": lambda p : p*p, 96 | "wave4": lambda p : (1-p)*(1-p), 97 | } 98 | 99 | wave_prompt_change_timing_map = { 100 | "zero": 0, 101 | "one": 0, 102 | "wave": 0, 103 | "wave2": 0.5, 104 | "wave3": 0, 105 | "wave4": 0, 106 | } 107 | 108 | def get_wave_type_list(): 109 | return list(wave_func_map.keys()) 110 | 111 | 112 | def resize_img_array(img_array, w, h): 113 | if img_array.shape[0] + img_array.shape[1] < h + w: 114 | interpolation = interpolation=cv2.INTER_CUBIC 115 | else: 116 | interpolation = interpolation=cv2.INTER_AREA 117 | return cv2.resize(img_array, (w, h), interpolation=interpolation) 118 | 119 | def resize_img(image:Image, w, h): 120 | _w,_h = image.size 121 | if _w == w and _h == h: 122 | return image 123 | 124 | im = resize_img_array(np.array(image), w, h) 125 | return Image.fromarray(im) 126 | 127 | def image_open_and_resize(path, w, h): 128 | image = Image.open(path) 129 | return resize_img(image, w, h) 130 | 131 | 132 | def get_wild_card_dir(): 133 | path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..") 134 | path = os.path.join(path, "wildcards") 135 | return os.path.normpath(path) 136 | 137 | def get_video_frame_path(project_dir, i, fps, interpolation_multi): 138 | if not project_dir: 139 | return "" 140 | path = os.path.join(os.path.join(project_dir, "video_frame"), f"{fps * interpolation_multi}") 141 | path = os.path.join(path, f"{str(i * interpolation_multi).zfill(5)}.png") 142 | return path 143 | 144 | def get_overwrite_frame_path(project_dir, i, fps, interpolation_multi): 145 | if not project_dir: 146 | return "" 147 | path = os.path.join(os.path.join(project_dir, "overwrite_frame"), f"{fps * interpolation_multi}") 148 | if i == 0: 149 | path_list = [ os.path.join(path, f"{str(0).zfill(5)}.png") ] 150 | else: 151 | path_list = [ os.path.join(path, f"{str(n).zfill(5)}.png") for n in range((i-1) * interpolation_multi+1, (i) * interpolation_multi+1)] 152 | 153 | for p in path_list: 154 | if os.path.isfile(p): 155 | return p 156 | return "" 157 | 158 | def run_cmd(cmd, silent = False): 159 | cmd = list(map(lambda arg: str(arg), cmd)) 160 | if not silent: 161 | print("Executing %s" % " ".join(cmd)) 162 | popen_params = {"stdout": sp.DEVNULL, "stderr": sp.PIPE, "stdin": sp.DEVNULL} 163 | 164 | if os.name == "nt": 165 | popen_params["creationflags"] = 0x08000000 166 | 167 | proc = sp.Popen(cmd, **popen_params) 168 | out, err = proc.communicate() # proc.wait() 169 | proc.stderr.close() 170 | 171 | if proc.returncode: 172 | raise IOError(err.decode("utf8")) 173 | 174 | del proc 175 | 176 | def encode_video(input_pattern, starting_number, output_dir, fps, quality, encoding, create_segments, segment_duration, ffmpeg_path, sound_file_path): 177 | two_pass = (encoding == "VP9 (webm)") 178 | alpha_channel = ("webm" in encoding) 179 | suffix = "webm" if "webm" in encoding else "mp4" 180 | output_location = output_dir + f".{suffix}" 181 | 182 | encoding_lib = { 183 | "VP9 (webm)": "libvpx-vp9", 184 | "VP8 (webm)": "libvpx", 185 | "H.264 (mp4)": "libx264", 186 | "H.265 (mp4)": "libx265", 187 | }[encoding] 188 | 189 | args = [ 190 | "-framerate", fps, 191 | "-start_number", int(starting_number), 192 | "-i", input_pattern 193 | ] 194 | 195 | if sound_file_path: 196 | args += ["-i", sound_file_path] 197 | 198 | args+=[ 199 | "-c:v", encoding_lib, 200 | "-b:v","0", 201 | "-crf", quality, 202 | ] 203 | 204 | if encoding_lib == "libvpx-vp9": 205 | args += ["-pix_fmt", "yuva420p"] 206 | 207 | if(ffmpeg_path == ""): 208 | ffmpeg_path = "ffmpeg" 209 | if(platform.system == "Windows"): 210 | ffmpeg_path += ".exe" 211 | 212 | print("\n\n") 213 | if two_pass: 214 | first_pass_args = args + [ 215 | "-pass", "1", 216 | "-an", 217 | "-f", "null", 218 | os.devnull 219 | ] 220 | 221 | second_pass_args = args + [ 222 | "-pass", "2", 223 | output_location 224 | ] 225 | 226 | print("Running first pass ffmpeg encoding") 227 | 228 | run_cmd([ffmpeg_path] + first_pass_args) 229 | print("Running second pass ffmpeg encoding. This could take awhile...") 230 | run_cmd([ffmpeg_path] + second_pass_args) 231 | else: 232 | print("Running ffmpeg encoding. This could take awhile...") 233 | run_cmd([ffmpeg_path] + args + [output_location]) 234 | 235 | if(create_segments): 236 | print("Segmenting video") 237 | run_cmd([ffmpeg_path] + [ 238 | "-i", output_location, 239 | "-f", "segment", 240 | "-segment_time", segment_duration, 241 | "-vcodec", "copy", 242 | "-acodec", "copy", 243 | f"{output_dir}.%d.{suffix}" 244 | ]) 245 | 246 | def extract_sound(sound_file_path, output_dir, ffmpeg_path): 247 | ext = os.path.splitext(os.path.basename(sound_file_path))[1] 248 | 249 | if ext in (".mp4",".MP4"): 250 | 251 | if(ffmpeg_path == ""): 252 | ffmpeg_path = "ffmpeg" 253 | if(platform.system == "Windows"): 254 | ffmpeg_path += ".exe" 255 | 256 | tmp_path = os.path.join( output_dir, "sound.mp4" ) 257 | run_cmd([ffmpeg_path] + [ 258 | "-i", sound_file_path, 259 | "-vn", 260 | "-acodec", "copy", 261 | tmp_path 262 | ]) 263 | print("tmp_path : ",tmp_path) 264 | if os.path.isfile(tmp_path): 265 | sound_file_path = tmp_path 266 | 267 | return sound_file_path 268 | 269 | def remove_pngs_in_dir(path): 270 | if not os.path.isdir(path): 271 | return 272 | pngs = glob.glob( os.path.join(path, "*.png") ) 273 | for png in pngs: 274 | os.remove(png) 275 | 276 | def extract_video_frame(fe_project_dir, fe_movie_path, fps, flow_interpolation_multi,fe_ffmpeg_path): 277 | if (not fe_project_dir) or (not os.path.isdir(fe_project_dir)): 278 | print("Directory not found : ", fe_project_dir) 279 | return 280 | 281 | if (not fe_movie_path) or (not os.path.isfile(fe_movie_path)): 282 | print("Movie File not found : ", fe_movie_path) 283 | return 284 | 285 | extract_dir = os.path.join(os.path.join(fe_project_dir, "video_frame"), f"{fps * flow_interpolation_multi}") 286 | os.makedirs(extract_dir, exist_ok=True) 287 | 288 | pngs = glob.glob( os.path.join(extract_dir ,"[0-9]*.png"), recursive=False) 289 | if pngs: 290 | print("video frame found. skip extract_video_frame") 291 | return 292 | # remove_pngs_in_dir(extract_dir) 293 | 294 | args = [ 295 | "-i", fe_movie_path, 296 | "-start_number", 0, 297 | "-vf", 298 | f"fps={fps * flow_interpolation_multi}", 299 | os.path.join(extract_dir, "%05d.png") 300 | ] 301 | 302 | if(fe_ffmpeg_path == ""): 303 | fe_ffmpeg_path = "ffmpeg" 304 | if(platform.system == "Windows"): 305 | fe_ffmpeg_path += ".exe" 306 | 307 | run_cmd([fe_ffmpeg_path] + args) 308 | 309 | return 310 | 311 | 312 | def set_weights(match_obj, wave_progress): 313 | weight_0 = 0 314 | weight_1 = 0 315 | if match_obj.group(1) is not None: 316 | weight_0 = float(match_obj.group(1)) 317 | if match_obj.group(2) is not None: 318 | weight_1 = float(match_obj.group(2)) 319 | 320 | max_weight = max(weight_0, weight_1) 321 | min_weight = min(weight_0, weight_1) 322 | 323 | weight_range = max_weight - min_weight 324 | weight = min_weight + weight_range * wave_progress 325 | return f"{weight:.3f}" 326 | 327 | def set_weights2(match_obj, wave_progress): 328 | weight_0 = 0 329 | weight_1 = 0 330 | if match_obj.group(1) is not None: 331 | weight_0 = float(match_obj.group(1)) 332 | if match_obj.group(2) is not None: 333 | weight_1 = float(match_obj.group(2)) 334 | 335 | min_weight = weight_0 336 | max_weight = weight_1 337 | 338 | weight_range = max_weight - min_weight 339 | weight = min_weight + weight_range * wave_progress 340 | return f"{weight:.3f}" 341 | 342 | #def get_weights(match_obj, out_list): 343 | # out_list.append( float(match_obj.group(1)) ) 344 | # return "" 345 | 346 | #def get_weights2(match_obj, out_list1, out_list2): 347 | # out_list1.append( float(match_obj.group(1)) ) 348 | # out_list2.append( float(match_obj.group(2)) ) 349 | # return "" 350 | 351 | def get_weights(match_obj, *list_of_out_list): 352 | vals = [ float(x) for x in match_obj.groups() if x is not None ] 353 | for i, v in enumerate( vals ): 354 | list_of_out_list[i].append( v ) 355 | return "" 356 | 357 | def get_weights_text(match_obj, out_list1, out_list2): 358 | out_list1.append( str(match_obj.group(1)) ) 359 | if match_obj.group(2) is not None: 360 | out_list2.append( str(match_obj.group(2)) ) 361 | return "" 362 | 363 | def get_random_value(match_obj): 364 | m1 = float(match_obj.group(1)) 365 | m2 = 0 366 | if match_obj.group(2) is not None: 367 | m2 = float(match_obj.group(2)) 368 | 369 | v = random.uniform(m1,m2) 370 | return "{:.3f}".format(v) 371 | 372 | def replace_wild_card_token(match_obj, wild_card_map): 373 | m1 = match_obj.group(1) 374 | m3 = match_obj.group(3) 375 | 376 | dict_name = match_obj.group(2) 377 | 378 | if dict_name in wild_card_map: 379 | token_list = wild_card_map[dict_name] 380 | token = token_list[random.randint(0,len(token_list)-1)] 381 | return m1+token+m3 382 | else: 383 | return match_obj.group(0) 384 | 385 | 386 | def get_positive_prompt_from_image(img): 387 | from modules.generation_parameters_copypaste import parse_generation_parameters 388 | geninfo, _ = images.read_info_from_image( img ) 389 | res = parse_generation_parameters(geninfo) 390 | return res["Prompt"] 391 | 392 | 393 | def str_to_wave_list(raw_wave_list): 394 | if not raw_wave_list: 395 | raise IOError(f"Invalid input in wave list: {raw_wave_list}") 396 | wave_list=[] 397 | lines = raw_wave_list.split("\n") 398 | 399 | #start_msec,type,(strength) 400 | 401 | for wave_line in lines: 402 | params = wave_line.split(",") 403 | params = [x.strip() for x in params] 404 | if len(params) == 2: 405 | wave_list.append( {"start_msec": int(params[0]), "type": params[1], "strength": 1.0 }) 406 | elif len(params) == 3: 407 | wave_list.append( {"start_msec": int(params[0]), "type": params[1], "strength": float(params[2]) }) 408 | else: 409 | raise IOError(f"Invalid input in wave list line: {wave_line}") 410 | wave_list = sorted(wave_list, key=lambda x: x['start_msec']) 411 | 412 | start_times = [x["start_msec"] for x in wave_list] 413 | start_times.pop(0) 414 | start_times.append(start_times[-1] + 1) 415 | 416 | for i in range( len(wave_list) ): 417 | wave_list[i]["end_msec"] = start_times[i]-1 418 | 419 | print(wave_list) 420 | 421 | if wave_list[-1]["type"] != "end": 422 | print("!!!!!!!!!!! Warning. last element in wave list is not [end]") 423 | wave_list[-1]["type"] = "end" 424 | 425 | return wave_list 426 | 427 | def create_simple_wave_list(sound_file_path, video_file_path): 428 | sound_len_msec = -1 429 | video_len_msec = -1 430 | 431 | if sound_file_path and os.path.isfile(sound_file_path): 432 | import librosa 433 | wave, sr = librosa.load(sound_file_path) 434 | sound_len_msec = 1000 * librosa.get_duration(y=wave, sr=sr) 435 | sound_len_msec = int(sound_len_msec) 436 | 437 | if video_file_path and os.path.isfile(video_file_path): 438 | cap = cv2.VideoCapture(video_file_path) 439 | video_frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) 440 | video_fps = cap.get(cv2.CAP_PROP_FPS) 441 | video_len_msec = 1000 * video_frame_count / video_fps 442 | video_len_msec = int(video_len_msec) 443 | 444 | if sound_len_msec == -1: 445 | len_msec = video_len_msec 446 | elif video_len_msec == -1: 447 | len_msec = sound_len_msec 448 | else: 449 | len_msec = min(sound_len_msec, video_len_msec) 450 | 451 | wave_list = [ 452 | {"start_msec": 0, "type": "wave", "strength": 1.0, "end_msec":len_msec-1 }, 453 | {"start_msec": len_msec, "type": "end", "strength": 1.0, "end_msec":len_msec }, 454 | ] 455 | 456 | return wave_list 457 | 458 | 459 | def wave_list_to_str(wave_list): 460 | wave_str_list = [] 461 | for w in wave_list: 462 | if w["type"] in ("zero", "end") or w["strength"] == 1.0: 463 | wave_str_list.append( f'{w["start_msec"]},{w["type"]}' ) 464 | else: 465 | wave_str_list.append( f'{w["start_msec"] },{w["type"]},{w["strength"]}' ) 466 | 467 | return "\n".join(wave_str_list) 468 | 469 | def merge_wave_list(org_list,add_list,start_msec,end_msec): 470 | length = org_list[-1]["start_msec"] 471 | end_msec = min(length-1, end_msec) 472 | 473 | org_list = [x for x in org_list if not (start_msec <= x["start_msec"] <= end_msec)] 474 | add_list = [x for x in add_list if start_msec <= x["start_msec"] <= end_msec] 475 | 476 | wave_list = org_list + add_list 477 | 478 | wave_list = sorted(wave_list, key=lambda x: x['start_msec']) 479 | 480 | start_times = [x["start_msec"] for x in wave_list] 481 | start_times.pop(0) 482 | start_times.append(start_times[-1] + 1) 483 | 484 | for i in range( len(wave_list) ): 485 | wave_list[i]["end_msec"] = start_times[i]-1 486 | 487 | return wave_list 488 | 489 | def process_image(p, loopback_count, str_for_loopback, is_controlnet, img_for_controlnet): 490 | if skip_process_for_debug: 491 | print("skip process for debug") 492 | return processing.Processed(p,[p.init_images[0]],p.seed) 493 | 494 | if is_controlnet: 495 | scripts.util_sd_loopback_music_sync_wave.controlnet.enable_controlnet(p, img_for_controlnet) 496 | else: 497 | scripts.util_sd_loopback_music_sync_wave.controlnet.disable_controlnet(p) 498 | 499 | while True: 500 | copy_p = copy.copy(p) 501 | processed = processing.process_images(copy_p) 502 | loopback_count -= 1 503 | 504 | if loopback_count <= 0: 505 | break 506 | 507 | p.init_images = [processed.images[0]] 508 | p.seed = processed.seed + 1 509 | p.denoising_strength = str_for_loopback 510 | return processed 511 | 512 | def outpainting(p, img, org_mask_array, op_mask_blur, inpaint_full_res, op_inpainting_fill, op_str, op_seed, is_controlnet, img_for_controlnet): 513 | p.init_images = [ img ] 514 | p.mask_blur = op_mask_blur * 2 515 | p.inpainting_fill = op_inpainting_fill 516 | p.inpaint_full_res = inpaint_full_res 517 | p.denoising_strength = op_str 518 | p.seed = op_seed 519 | 520 | #image_mask 521 | k_size = int(op_mask_blur*2) // 2 * 2 + 1 522 | if k_size > 2: 523 | kernel = np.ones((k_size,k_size),np.uint8) 524 | mask_array = cv2.dilate(org_mask_array, kernel, iterations=1 ) 525 | else: 526 | mask_array = org_mask_array 527 | # debug_save_img_array(mask_array,"out_image_mask") 528 | p.image_mask = Image.fromarray(mask_array, mode="L") 529 | 530 | #latent_mask 531 | k_size = int(op_mask_blur / 2) // 2 * 2 + 1 532 | if k_size > 2: 533 | kernel = np.ones((k_size,k_size),np.uint8) 534 | mask_array = cv2.dilate(org_mask_array, kernel, iterations=1 ) 535 | else: 536 | mask_array = org_mask_array 537 | # debug_save_img_array(mask_array,"out_latent_mask") 538 | p.latent_mask = Image.fromarray(mask_array, mode="L") 539 | 540 | if is_controlnet: 541 | scripts.util_sd_loopback_music_sync_wave.controlnet.enable_controlnet(p, img_for_controlnet) 542 | else: 543 | scripts.util_sd_loopback_music_sync_wave.controlnet.disable_controlnet(p) 544 | 545 | state.job_count += 1 546 | 547 | # debug_save_img(img,"pre_out") 548 | processed = processing.process_images(p) 549 | # debug_save_img(processed.images[0],"post_out") 550 | 551 | return processed.images[0] 552 | 553 | 554 | def apply_optical_flow(_p, i, fps, interpolation_multi, flow_inpaint_method, flow_occ_area_th, project_dir, op_mask_blur, op_inpainting_fill, op_str, op_seed, is_controlnet, img_for_controlnet): 555 | print("apply_optical_flow") 556 | p = copy.copy(_p) 557 | 558 | img = p.init_images[0] 559 | 560 | if i == 0: 561 | return img 562 | 563 | def get_optical_flow_path(project_dir, i, interpolation_multi): 564 | base_path = os.path.join(os.path.join(project_dir, "optical_flow"),f"{fps * interpolation_multi}") 565 | base_path2 = os.path.join(os.path.join(project_dir, "occ_mask"), f"{fps * interpolation_multi}") 566 | nums = range((i-1)*interpolation_multi+1 , (i)*interpolation_multi+1) 567 | path = [os.path.join(base_path, f"{str(i).zfill(5)}.npy") for i in nums] 568 | path2 = [os.path.join(base_path2, f"{str(i).zfill(5)}.png") for i in nums] 569 | return path,path2 570 | 571 | o_path,m_path = get_optical_flow_path(project_dir, i, interpolation_multi) 572 | 573 | img, mask_array = scripts.util_sd_loopback_music_sync_wave.raft.apply_flow(img, o_path, m_path) 574 | 575 | if mask_array is None: 576 | return img 577 | 578 | img = img.convert("RGB") 579 | mask_array = mask_array.clip(0, 255).astype(np.uint8) 580 | 581 | if flow_inpaint_method == 3: 582 | return img 583 | 584 | if flow_inpaint_method == 0: 585 | img = Image.fromarray(cv2.inpaint( np.array(img), mask_array,3,cv2.INPAINT_TELEA)) 586 | return img 587 | 588 | if flow_inpaint_method == 2: 589 | bad_pixels = np.count_nonzero(mask_array > 0) 590 | bad_rate = bad_pixels / (mask_array.shape[0] * mask_array.shape[1]) 591 | print("bad_pixels = ",bad_pixels) 592 | print("total = ",mask_array.shape[0] * mask_array.shape[1]) 593 | print("rate = ",100 * bad_rate) 594 | if bad_rate < flow_occ_area_th: 595 | img = Image.fromarray(cv2.inpaint( np.array(img), mask_array,3,cv2.INPAINT_TELEA)) 596 | return img 597 | 598 | 599 | org_mask_array = mask_array 600 | 601 | inpaint_full_res = False 602 | 603 | return outpainting(p, img, org_mask_array, op_mask_blur, inpaint_full_res, op_inpainting_fill, op_str, op_seed, is_controlnet, img_for_controlnet) 604 | 605 | 606 | def affine_image(_p, op_mask_blur, op_inpainting_fill, op_str, op_seed, affine_input, is_controlnet, img_for_controlnet): 607 | print("affine_image") 608 | p = copy.copy(_p) 609 | 610 | img = p.init_images[0].convert('RGBA') 611 | img.putalpha(255) 612 | img = scripts.util_sd_loopback_music_sync_wave.affine.AffineImage(img, *affine_input) 613 | org_mask_array = np.array(img)[:, :, 3] 614 | 615 | if org_mask_array.min() == 255: 616 | print("skip outpainting") 617 | return img.convert("RGB") 618 | 619 | org_mask_array = 255 - org_mask_array 620 | img = img.convert("RGB") 621 | 622 | inpaint_full_res = False 623 | 624 | return outpainting(p, img, org_mask_array, op_mask_blur, inpaint_full_res, op_inpainting_fill, op_str, op_seed, is_controlnet, img_for_controlnet) 625 | 626 | def apply_slide(_p, op_mask_blur, op_inpainting_fill, op_str, op_seed, slide_inputs, is_controlnet, img_for_controlnet): 627 | print("apply_slide") 628 | p = copy.copy(_p) 629 | 630 | img = p.init_images[0].convert('RGBA') 631 | img.putalpha(255) 632 | 633 | img = scripts.util_sd_loopback_music_sync_wave.slide.SlideImage(img, *slide_inputs) 634 | 635 | org_mask_array = np.array(img)[:, :, 3] 636 | 637 | if org_mask_array.min() == 255: 638 | print("skip outpainting") 639 | return img.convert("RGB") 640 | 641 | org_mask_array = 255 - org_mask_array 642 | img = img.convert("RGB") 643 | 644 | inpaint_full_res = False 645 | 646 | return outpainting(p, img, org_mask_array, op_mask_blur, inpaint_full_res, op_inpainting_fill, op_str, op_seed, is_controlnet, img_for_controlnet) 647 | 648 | 649 | def apply_inpaint(_p, mask_prompt, inpaint_prompt, op_mask_blur, op_inpainting_fill, op_str, op_seed, is_controlnet, img_for_controlnet): 650 | print("apply_inpaint") 651 | p = copy.copy(_p) 652 | 653 | img = p.init_images[0] 654 | 655 | masks = scripts.util_sd_loopback_music_sync_wave.sam.get_mask_from_sam( img, mask_prompt, 0.3, 0 ) 656 | if not masks: 657 | print("get_mask_from_sam failed.") 658 | return img 659 | 660 | p.prompt = inpaint_prompt 661 | 662 | org_mask_array = np.asarray( masks[0] ) 663 | 664 | inpaint_full_res = False 665 | 666 | return outpainting(p, img, org_mask_array, op_mask_blur, inpaint_full_res, op_inpainting_fill, op_str, op_seed, is_controlnet, img_for_controlnet) 667 | 668 | # https://qiita.com/s-kajioka/items/9c9fc6c0e9e8a9d05800 669 | def adjust_brightness(img, type): 670 | img = np.array(img) 671 | hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) 672 | h,s,v = cv2.split(hsv) 673 | 674 | def type0(input): 675 | S = 32 676 | M = 128 677 | r = (input - np.mean(input)) / np.std(input) * S + M 678 | return r.clip(min=0,max=255).astype(np.uint8) 679 | 680 | def type1(input): 681 | clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(3, 3)) 682 | return clahe.apply(input) 683 | 684 | result = type0(v) if type==0 else type1(v) 685 | 686 | hsv = cv2.merge((h,s,result)) 687 | rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) 688 | return Image.fromarray(rgb) 689 | 690 | def debug_info_suffix(mode_setting, base_denoising_strength, additional_denoising_strength, inner_lb_count, inner_lb_str): 691 | s=[ 692 | "lb" if mode_setting == "loopback" else "i2i", 693 | f"bstr{ str(int(base_denoising_strength*100)).zfill(3) }", 694 | f"astr{ str(int(additional_denoising_strength*100)).zfill(3) }", 695 | ] 696 | 697 | if inner_lb_count > 1: 698 | s += [ 699 | f"i{ inner_lb_count }", 700 | f"istr{ str(int(inner_lb_str*100)).zfill(3) }", 701 | ] 702 | return "_".join(s) 703 | 704 | def create_output_dir(base_name, suffix, sample_path, project_dir): 705 | if(base_name==""): 706 | base_name = time.strftime("%Y%m%d-%H%M%S") 707 | else: 708 | base_name = base_name + "-" + time.strftime("%Y%m%d-%H%M%S") 709 | 710 | base_name += "_" + suffix 711 | 712 | loopback_wave_path = os.path.join(sample_path, "loopback-music-sync-wave") 713 | if os.path.isdir( project_dir ): 714 | loopback_wave_path = project_dir 715 | loopback_wave_images_path = os.path.join(loopback_wave_path, base_name) 716 | 717 | os.makedirs(loopback_wave_images_path, exist_ok=True) 718 | 719 | return loopback_wave_path, loopback_wave_images_path 720 | 721 | def main_wave_loop(p, wave_list, current_time, total_progress, mode_setting, initial_denoising_strength, denoising_strength_change_amplitude, fps, wave_status, common_prompt_map, extend_prompt_map, init_image_per_wave_map, wild_card_map, effects, bpm_event): 722 | 723 | wave_index = wave_status["wave_index"] 724 | prompt_changed = wave_status["prompt_changed"] 725 | current_common_prompt = wave_status["current_common_prompt"] 726 | current_extend_prompt = wave_status["current_extend_prompt"] 727 | 728 | init_image = None 729 | 730 | 731 | while True: 732 | wave = wave_list[wave_index] 733 | wave_start_time = wave["start_msec"] 734 | wave_end_time = wave["end_msec"] 735 | wave_strength = wave["strength"] 736 | wave_type = wave["type"] 737 | 738 | if wave_type == "end": 739 | return False 740 | 741 | wave_prompt_change_timing = wave_prompt_change_timing_map[ wave_type ] 742 | 743 | while True: 744 | 745 | wave_progress = (current_time - wave_start_time)/(wave_end_time - wave_start_time) 746 | print("wave_progress = ", wave_progress) 747 | 748 | if prompt_changed == False: 749 | if wave_progress >= wave_prompt_change_timing: 750 | print("prompt_change_timing") 751 | prompt_changed = True 752 | 753 | # prompt change 754 | if common_prompt_map: 755 | if wave_index in common_prompt_map: 756 | current_common_prompt = common_prompt_map[ wave_index ]["prompt"] 757 | 758 | if extend_prompt_map: 759 | if wave_index in extend_prompt_map: 760 | current_extend_prompt = extend_prompt_map[ wave_index ] 761 | else: 762 | current_extend_prompt = "" 763 | 764 | if mode_setting == "loopback": 765 | # force init_image change 766 | if init_image_per_wave_map: 767 | if wave_index in init_image_per_wave_map: 768 | new_init_image_path = init_image_per_wave_map[ wave_index ] 769 | init_image = Image.open(new_init_image_path) 770 | 771 | try: 772 | prompt = get_positive_prompt_from_image(init_image) 773 | if prompt: 774 | current_common_prompt = prompt 775 | except Exception as e: 776 | print("get_positive_prompt_from_image failed. ",new_init_image_path) 777 | 778 | init_image = resize_img(init_image, p.width, p.height) 779 | 780 | # register bpm event 781 | current_common_prompt = bpm_event.parse_prompt(current_common_prompt, current_time) 782 | current_extend_prompt = bpm_event.parse_prompt(current_extend_prompt, current_time) 783 | 784 | # wild card 785 | if wild_card_map: 786 | current_common_prompt = re.sub(wild_card_regex, lambda x: replace_wild_card_token(x, wild_card_map ), current_common_prompt) 787 | current_extend_prompt = re.sub(wild_card_regex, lambda x: replace_wild_card_token(x, wild_card_map ), current_extend_prompt) 788 | 789 | # random 790 | current_common_prompt = re.sub(random_regex, lambda x: get_random_value(x ), current_common_prompt) 791 | current_extend_prompt = re.sub(random_regex, lambda x: get_random_value(x ), current_extend_prompt) 792 | 793 | # effect 794 | current_common_prompt = effects.parse_prompt(current_common_prompt) 795 | current_extend_prompt = effects.parse_prompt(current_extend_prompt) 796 | 797 | print("current_common_prompt: ", current_common_prompt) 798 | print("current_extend_prompt: ", current_extend_prompt) 799 | 800 | if wave_end_time < current_time: 801 | break 802 | 803 | wave_amp = wave_func_map[ wave_type ](wave_progress) 804 | 805 | wave_amp_str = wave_amp * wave_strength 806 | 807 | denoising_strength = initial_denoising_strength + denoising_strength_change_amplitude * wave_amp_str 808 | 809 | print("wave_amp = ", wave_amp) 810 | 811 | raw_prompt = current_common_prompt + "," + current_extend_prompt 812 | 813 | # @func 814 | new_prompt = re.sub(wave_completed_regex, lambda x: set_weights(x, wave_progress), raw_prompt) 815 | new_prompt = re.sub(wave_remaining_regex, lambda x: set_weights(x, 1 - wave_progress), new_prompt) 816 | new_prompt = re.sub(wave_amplitude_regex, lambda x: set_weights2(x, wave_amp_str), new_prompt) 817 | new_prompt = re.sub(wave_shape_regex, lambda x: set_weights2(x, wave_amp), new_prompt) 818 | new_prompt = re.sub(wave_progress_regex, lambda x: set_weights2(x, wave_progress), new_prompt) 819 | new_prompt = re.sub(total_progress_regex, lambda x: set_weights2(x, total_progress), new_prompt) 820 | 821 | # exit 822 | wave_status["wave_index"] = wave_index 823 | wave_status["prompt_changed"] = prompt_changed 824 | wave_status["current_common_prompt"] = current_common_prompt 825 | wave_status["current_extend_prompt"] = current_extend_prompt 826 | 827 | wave_status["init_image"] = init_image 828 | wave_status["denoising_strength"] = denoising_strength 829 | 830 | wave_status["new_prompt"] = new_prompt 831 | 832 | return True 833 | 834 | print("main end wave ", wave_index) 835 | wave_index += 1 836 | prompt_changed = False 837 | print("main start wave ", wave_index) 838 | 839 | def sub_wave_loop(p, wave_list, current_time, total_progress, wave_status, extend_prompt_map, wild_card_map, effects, bpm_event): 840 | 841 | wave_index = wave_status["wave_index"] 842 | prompt_changed = wave_status["prompt_changed"] 843 | current_extend_prompt = wave_status["current_extend_prompt"] 844 | 845 | while True: 846 | wave = wave_list[wave_index] 847 | wave_start_time = wave["start_msec"] 848 | wave_end_time = wave["end_msec"] 849 | wave_strength = wave["strength"] 850 | wave_type = wave["type"] 851 | 852 | if wave_type == "end": 853 | return False 854 | 855 | wave_prompt_change_timing = wave_prompt_change_timing_map[ wave_type ] 856 | 857 | while True: 858 | 859 | wave_progress = (current_time - wave_start_time)/(wave_end_time - wave_start_time) 860 | print("sub wave_progress = ", wave_progress) 861 | 862 | if prompt_changed == False: 863 | if wave_progress >= wave_prompt_change_timing: 864 | print("sub prompt_change_timing") 865 | prompt_changed = True 866 | 867 | # prompt change 868 | if wave_index in extend_prompt_map: 869 | current_extend_prompt = extend_prompt_map[ wave_index ] 870 | else: 871 | current_extend_prompt = "" 872 | 873 | # register bpm event 874 | current_extend_prompt = bpm_event.parse_prompt(current_extend_prompt, current_time) 875 | 876 | # wild card 877 | current_extend_prompt = re.sub(wild_card_regex, lambda x: replace_wild_card_token(x, wild_card_map ), current_extend_prompt) 878 | 879 | # random 880 | current_extend_prompt = re.sub(random_regex, lambda x: get_random_value(x ), current_extend_prompt) 881 | 882 | # effect 883 | current_extend_prompt = effects.parse_prompt(current_extend_prompt) 884 | 885 | print("sub current_extend_prompt: ", current_extend_prompt) 886 | 887 | if wave_end_time < current_time: 888 | break 889 | 890 | wave_amp = wave_func_map[ wave_type ](wave_progress) 891 | 892 | wave_amp_str = wave_amp * wave_strength 893 | 894 | print("sub wave_amp = ", wave_amp) 895 | 896 | raw_prompt = current_extend_prompt 897 | 898 | # @func 899 | new_prompt = re.sub(wave_completed_regex, lambda x: set_weights(x, wave_progress), raw_prompt) 900 | new_prompt = re.sub(wave_remaining_regex, lambda x: set_weights(x, 1 - wave_progress), new_prompt) 901 | new_prompt = re.sub(wave_amplitude_regex, lambda x: set_weights2(x, wave_amp_str), new_prompt) 902 | new_prompt = re.sub(wave_shape_regex, lambda x: set_weights2(x, wave_amp), new_prompt) 903 | new_prompt = re.sub(wave_progress_regex, lambda x: set_weights2(x, wave_progress), new_prompt) 904 | new_prompt = re.sub(total_progress_regex, lambda x: set_weights2(x, total_progress), new_prompt) 905 | 906 | # exit 907 | wave_status["wave_index"] = wave_index 908 | wave_status["prompt_changed"] = prompt_changed 909 | wave_status["current_extend_prompt"] = current_extend_prompt 910 | 911 | wave_status["new_prompt"] = new_prompt 912 | 913 | return True 914 | 915 | print("sub end wave ", wave_index) 916 | wave_index += 1 917 | prompt_changed = False 918 | print("sub start wave ", wave_index) 919 | 920 | def create_extend_prompt_map(prompts, w_list): 921 | result_map = {} 922 | 923 | if not prompts or not w_list: 924 | return result_map 925 | 926 | lines = prompts.split("\n") 927 | for prompt_line in lines: 928 | # wave_range::ex_prompt 929 | # -1 930 | # 5 931 | # 1,3,8 932 | # 5-8 933 | 934 | params = prompt_line.split("::") 935 | if len(params) == 2: 936 | raw_range = params[0].replace(" ", "") 937 | m = re.match( extend_prompt_range_regex, raw_range ) 938 | range_list = [] 939 | if m: 940 | range_list = list(range(int(m.group(1)) , int(m.group(2)) + 1)) 941 | elif raw_range == "-1": 942 | range_list = list(range( len(w_list) )) 943 | else: 944 | range_list = [ int(x) for x in raw_range.split(",")] 945 | 946 | print(raw_range, range_list) 947 | 948 | for n in range_list: 949 | ext_prompt = "" 950 | if n in result_map: 951 | ext_prompt = result_map[n] + "," 952 | 953 | ext_prompt += params[1] 954 | result_map[n] = ext_prompt 955 | 956 | else: 957 | raise IOError(f"Invalid input in extend prompt line: {prompt_line}") 958 | 959 | for n in result_map: 960 | result_map[n] = result_map[n].replace(',', ' , ') 961 | 962 | return result_map 963 | 964 | def create_wild_card_map(wild_card_dir): 965 | result = {} 966 | if os.path.isdir(wild_card_dir): 967 | txt_list = glob.glob( os.path.join(wild_card_dir ,"**/*.txt"), recursive=True) 968 | #print("wild card txt_list : ", txt_list) 969 | for txt in txt_list: 970 | basename_without_ext = os.path.splitext(os.path.basename(txt))[0] 971 | with open(txt, encoding='utf-8') as f: 972 | try: 973 | result[basename_without_ext] = [s.rstrip() for s in f.readlines()] 974 | except Exception as e: 975 | print(e) 976 | print("can not read ", txt) 977 | return result 978 | 979 | def parse_sharp_func(prompt, fps): 980 | velx=[] 981 | vely=[] 982 | rot=[] 983 | zoom=[] 984 | cx=[] 985 | cy=[] 986 | rot_x=[] 987 | rot_y=[] 988 | 989 | prompt = re.sub(vel_x_regex, lambda x: get_weights(x, velx), prompt) 990 | prompt = re.sub(vel_y_regex, lambda x: get_weights(x, vely), prompt) 991 | prompt = re.sub(rot_regex, lambda x: get_weights(x, rot), prompt) 992 | prompt = re.sub(zoom_regex, lambda x: get_weights(x, zoom), prompt) 993 | prompt = re.sub(center_regex, lambda x: get_weights(x, cx, cy), prompt) 994 | prompt = re.sub(rot_x_regex, lambda x: get_weights(x, rot_x), prompt) 995 | prompt = re.sub(rot_y_regex, lambda x: get_weights(x, rot_y), prompt) 996 | 997 | _velx = 0 if not velx else (sum(velx) / fps) 998 | _vely = 0 if not vely else (sum(vely) / fps) 999 | _rot = 0 if not rot else (sum(rot) / fps) 1000 | _zoom = 1 if not zoom else (1 + ((sum(zoom) - len(zoom))/ fps)) 1001 | _cx = 0.5 if not cx else (sum(cx)/len(cx)) 1002 | _cy = 0.5 if not cy else (sum(cy)/len(cy)) 1003 | _rot_x = 0 if not rot_x else (sum(rot_x) / fps) 1004 | _rot_y = 0 if not rot_y else (sum(rot_y) / fps) 1005 | 1006 | affine_input = [_velx,_vely,_rot,_zoom,_cx,_cy, _rot_x, _rot_y] 1007 | 1008 | is_affine_need = (_velx != 0) or (_vely != 0) or (_rot != 0) or (_zoom != 1) or (_rot_x != 0) or (_rot_y != 0) 1009 | 1010 | return is_affine_need, affine_input, prompt 1011 | 1012 | def parse_slide_func(prompt, fps): 1013 | slide_x_type = [-1] 1014 | slide_x_speed = [-1] 1015 | slide_x_border = [0.5] 1016 | prompt = re.sub(slide_x_regex, lambda x: get_weights(x, slide_x_type, slide_x_speed, slide_x_border), prompt) 1017 | 1018 | slide_y_type = [-1] 1019 | slide_y_speed = [-1] 1020 | slide_y_border = [0.5] 1021 | prompt = re.sub(slide_y_regex, lambda x: get_weights(x, slide_y_type, slide_y_speed, slide_y_border), prompt) 1022 | slide_inputs = [(int(slide_x_type[-1]), slide_x_speed[-1]/ fps, slide_x_border[-1]), (int(slide_y_type[-1]), slide_y_speed[-1]/ fps, slide_y_border[-1]) ] 1023 | is_slide_need = (slide_x_type[-1] != -1) or (slide_y_type[-1] != -1) 1024 | 1025 | return is_slide_need, slide_inputs, prompt 1026 | 1027 | def save_param_file(file_path, params): 1028 | print("save param : ", params) 1029 | with open(file_path, 'w') as f: 1030 | json.dump(params, f, indent=4) 1031 | 1032 | def load_param_file(file_path): 1033 | params = {} 1034 | with open(file_path, "r") as f: 1035 | params = json.load(f) 1036 | print("load param : ", params) 1037 | return params 1038 | 1039 | class Script(modules.scripts.Script): 1040 | def title(self): 1041 | return "Loopback Music Sync Wave" 1042 | 1043 | def show(self, is_img2img): 1044 | return is_img2img 1045 | 1046 | def ui(self, is_img2img): 1047 | 1048 | param_file_path = gr.Textbox(label="Load inputs txt Path( Use parameters stored in *-inputs.txt )", lines=1, value="") 1049 | 1050 | cn_load_path = gr.Textbox(label="Load controlnet txt Path( Use parameters stored in *-controlnet.txt )", lines=1, value="") 1051 | 1052 | 1053 | fps = gr.Slider(minimum=1, maximum=120, step=1, label='Frames per second', value=8) 1054 | 1055 | project_dir = gr.Textbox(label="Project Directory(optional)", lines=1, value="") 1056 | sound_file_path = gr.Textbox(label="Sound File Path(optional)", lines=1, value="") 1057 | video_file_path = gr.Textbox(label="Video File Path(optional)", lines=1, value="") 1058 | 1059 | denoising_strength_change_amplitude = gr.Slider(minimum=0, maximum=1, step=0.01, label='Max additional denoise', value=0.6) 1060 | denoising_strength_add_freq = gr.Slider(minimum=1, maximum=10, step=1, label='Denoising Strength Add frequency', value=1) 1061 | 1062 | with gr.Accordion(label="Cheat Sheet", open=False): 1063 | gr.Textbox(label="ver 0.012", lines=5, interactive=False, 1064 | value= 1065 | "-------------------------------\n" 1066 | "------ Wave List format -------\n" 1067 | "-------------------------------\n" 1068 | "time,wave type,wave strength\n" 1069 | "\n" 1070 | "time ... Milliseconds from start\n" 1071 | "wave type ... Select from zero, one, wave, wave2, wave3, wave4, end\n" 1072 | "wave strength ... Default is 1.0 and is optional\n" 1073 | "\n" 1074 | "Can be generated automatically in [Loopback Music Sync Wave] Tab\n" 1075 | "\n" 1076 | "-------------------------------\n" 1077 | "----- Prompt Changes format ---\n" 1078 | "-------------------------------\n" 1079 | "index of wave list::prompt\n" 1080 | "\n" 1081 | "index of wave list ... Index of wave list starting from 0\n" 1082 | " 0:: refers to the first wave \n" 1083 | "prompt ... In this script, prompts are managed separately for common and additional parts,\n" 1084 | " where the common part is overwritten\n" 1085 | "\n" 1086 | "-------------------------------\n" 1087 | "----- Extend Prompt format ----\n" 1088 | "-------------------------------\n" 1089 | "index of wave list::prompt\n" 1090 | "\n" 1091 | "index of wave list ... Index of wave list starting from 0\n" 1092 | " 0:: refers to the first wave \n" 1093 | " 1,3,6:: refers to waves of indexes 1, 3, and 6 \n" 1094 | " 5-8:: refers to waves of indexes 5, 6, 7, and 8 \n" 1095 | " -1:: refers to the every wave \n" 1096 | "prompt ... This will be added to the additional part of the prompt.\n" 1097 | "\n" 1098 | "As an example, if you do not want to change the basic picture, \n" 1099 | "but want to change the background for each wave, \n" 1100 | "you can simply write a line like this\n" 1101 | "-1::__background__\n" 1102 | "(However, this requires a background.txt file)\n" 1103 | "\n" 1104 | "-------------------------------\n" 1105 | "---------- Wild Card ----------\n" 1106 | "-------------------------------\n" 1107 | "In this script, wildcards can be used with the following statement\n" 1108 | "(No other extensions need to be installed as they are implemented within this script)\n" 1109 | "__test__ ... which will be replaced by a random line in test.txt\n" 1110 | "\n" 1111 | "Wildcard files are searched from the following locations\n" 1112 | "extensions\sd_loopback_music_sync_wave\wildcards\n" 1113 | "\n" 1114 | "-------------------------------\n" 1115 | "---------- @function ----------\n" 1116 | "-------------------------------\n" 1117 | "@wave_completed(min,max)\n" 1118 | "@wave_remaining(min,max)\n" 1119 | "@wave_amplitude(start_val,end_val)\n" 1120 | "@wave_shape(start_val,end_val)\n" 1121 | "@wave_progress(start_val,end_val)\n" 1122 | "@total_progress(start_val,end_val)\n" 1123 | "@random(min,max)\n" 1124 | "\n" 1125 | "-------------------------------\n" 1126 | "---------- #function ----------\n" 1127 | "-------------------------------\n" 1128 | "#vel_x(x)\n" 1129 | "#vel_y(y)\n" 1130 | "#rot(deg)\n" 1131 | "#zoom(z)\n" 1132 | "#center(cx,cy)\n" 1133 | "#rot_x(deg)\n" 1134 | "#rot_y(deg)\n" 1135 | "\n" 1136 | "#slide_x(type,slide_val,border_pos)\n" 1137 | "#slide_y(type,slide_val,border_pos)\n" 1138 | "\n" 1139 | "#blur(blur_str)\n" 1140 | "#hue(type, hue)\n" 1141 | "#post_process(flag)\n" 1142 | "\n" 1143 | "-------------------------------\n" 1144 | "---------- $function ----------\n" 1145 | "-------------------------------\n" 1146 | "$shake_x(duration, amp)\n" 1147 | "$shake_y(duration, amp)\n" 1148 | "$shake_rot(duration, amp)\n" 1149 | "$shake_rot_x(duration, amp)\n" 1150 | "$shake_rot_y(duration, amp)\n" 1151 | "$shake_zoom(duration, amp)\n" 1152 | "$vibration(duration, amp)\n" 1153 | "\n" 1154 | "$random_xy(duration, x_amp, y_amp, resolution_msec=1000)\n" 1155 | "$random_zoom(duration, z_amp, resolution_msec=1000)\n" 1156 | "$random_rot(duration, r_amp, resolution_msec=1000)\n" 1157 | "$random_rot_x(duration, r_amp, resolution_msec=1000)\n" 1158 | "$random_rot_y(duration, r_amp, resolution_msec=1000)\n" 1159 | "$random_center(duration, amp_x, amp_y, cx=0.5, cy=0.5, resolution_msec=1000 )\n" 1160 | "\n" 1161 | "$pendulum_xy(duration, x1, x2, y1, y2 )\n" 1162 | "$pendulum_rot(duration, angle1, angle2 )\n" 1163 | "$pendulum_rot_x(duration, angle1, angle2 )\n" 1164 | "$pendulum_rot_y(duration, angle1, angle2 )\n" 1165 | "$pendulum_zoom(duration, z1, z2 )\n" 1166 | "$pendulum_center(duration, cx1, cx2, cy1, cy2 )\n" 1167 | "\n" 1168 | "$beat_blur(duration, amp)\n" 1169 | "$random_blur(duration, amp, resolution_msec=1000)\n" 1170 | "$pendulum_hue(duration, type, angle1, angle2)\n" 1171 | "$random_hue(duration, type, start_angle, amp_angle, resolution_msec=1000)\n" 1172 | "\n" 1173 | "$beat_slide_x(duration, type, amp_slide_val, border_pos=0.5, amp_border=0)\n" 1174 | "$beat_slide_y(duration, type, amp_slide_val, border_pos=0.5, amp_border=0)\n" 1175 | "$random_slide_x(duration, type, amp_slide_val, border_pos=0.5, amp_border=0, resolution_msec=1000)\n" 1176 | "$random_slide_y(duration, type, amp_slide_val, border_pos=0.5, amp_border=0, resolution_msec=1000)\n" 1177 | "\n" 1178 | "$inpaint(mask_prompt, inpaint_prompt)\n" 1179 | "\n" 1180 | "-------------------------------\n" 1181 | "-------- function usage -------\n" 1182 | "-------------------------------\n" 1183 | "I'm providing an example of its use as a wildcard, so take a look there.\n" 1184 | "\n" 1185 | "------------------------------------------------------------\n" 1186 | "---------- How to write prompts that ignore waves ----------\n" 1187 | "------------------------------------------------------------\n" 1188 | "You can specify a prompt that ignores the wave list with the following statement\n" 1189 | "@@bpmBPM@DURATION[prompt]\n" 1190 | "\n" 1191 | "For example, the following in Extend Prompt would mean to add a prompt at 55.3 bpm for 8 seconds after the first wave\n" 1192 | "0::@@bpm55.3@8000[$beat_slide_x(500, 1, 0.05,0.65)]\n" 1193 | "\n" 1194 | ) 1195 | 1196 | with gr.Accordion(label="Main Wave", open=True): 1197 | wave_list = gr.Textbox(label="Wave List (Main)", lines=5, value="") 1198 | common_prompts = gr.Textbox(label="Prompt Changes (Main)", lines=5, value="") 1199 | extend_prompts = gr.Textbox(label="Extend Prompt (Main)", lines=5, value="") 1200 | 1201 | with gr.Accordion(label="Sub Wave", open=True): 1202 | sub_wave_list = gr.Textbox(label="Wave List (Sub)", lines=5, value="") 1203 | sub_extend_prompts = gr.Textbox(label="Extend Prompt (Sub)", lines=5, value="") 1204 | 1205 | save_video = gr.Checkbox(label='Save results as video', value=True) 1206 | output_name = gr.Textbox(label="Video Name", lines=1, value="") 1207 | video_quality = gr.Slider(minimum=0, maximum=60, step=1, label='Video Quality (crf) ', value=22) 1208 | video_encoding = gr.Dropdown(label='Video encoding ', value="H.264 (mp4)", choices=["VP9 (webm)", "VP8 (webm)", "H.265 (mp4)", "H.264 (mp4)"]) 1209 | 1210 | with gr.Accordion(label="Mode Settings", open=True): 1211 | use_video_frame_for_controlnet_in_loopback_mode = gr.Checkbox(label='Use video_frame for controlnet in loopback mode', value=False) 1212 | mode_setting = gr.Radio(label='Mode', choices=["loopback","img2img"], value="loopback", type="value") 1213 | use_controlnet_for_lb = gr.Checkbox(label='Use Controlnet for loopback', value=False) 1214 | use_controlnet_for_img2img = gr.Checkbox(label='Use Controlnet for img2img', value=True) 1215 | use_controlnet_for_inpaint = gr.Checkbox(label='Use Controlnet for inpaint', value=True) 1216 | use_controlnet_for_occ_inpaint = gr.Checkbox(label='Use Controlnet for Occlusion inpaint', value=True) 1217 | use_controlnet_for_outpaint = gr.Checkbox(label='Use Controlnet for outpaint', value=False) 1218 | cn_ref_input_type = gr.Radio(label='Controlnet reference only Input Type', choices=["1st input img","prev frame"], value="1st input img", type="value") 1219 | gr.HTML(value="

\ 1220 | Regardless of the above selection, the image specified on the UI will be used first \ 1221 |

") 1222 | 1223 | with gr.Accordion(label="Optical Flow Settings", open=True): 1224 | use_optical_flow = gr.Checkbox(label='Use Optical Flow', value=False) 1225 | use_optical_flow_cache = gr.Checkbox(label='Use Optical Flow Cache', value=True) 1226 | flow_interpolation_multi = gr.Slider(minimum=1, maximum=5, step=1, label='Interpolation Multiplier', value=1) 1227 | flow_inpaint_method = gr.Radio(label='Optical Flow Inpaint Method ', choices=["cv2","sd","cv2 + sd","none"], value="cv2 + sd", type="index") 1228 | flow_occ_area_th = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Occlusion area threshold for(cv2 + sd)', value=0.05) 1229 | flow_occ_detect_th = gr.Slider(minimum=0.1, maximum=5.0, step=0.01, label='Occlusion area detection threshold.', value=1.0) 1230 | 1231 | with gr.Accordion(label="Scene Detection Settings", open=False): 1232 | use_scene_detection = gr.Checkbox(label='Use Scene Detection ', value=True) 1233 | sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Scene Detection threshold', value=0.85) 1234 | sd_denoising_strength = gr.Slider(minimum=0, maximum=1, step=0.01, label='Denoise for New Scene', value=0.8) 1235 | 1236 | with gr.Accordion(label="OutPainting Setting", open=True): 1237 | op_mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id=self.elem_id("mask_blur")) 1238 | op_inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='fill', type="index", elem_id=self.elem_id("inpainting_fill")) 1239 | op_str = gr.Slider(minimum=0, maximum=1, step=0.01, label='Denoising Strength for OutPainting', value=0.8) 1240 | 1241 | with gr.Accordion(label="Upscale Setting", open=False): 1242 | us_width = gr.Number(value=-1, label="Width", precision=0, interactive=True) 1243 | us_height = gr.Number(value=-1, label="Height", precision=0, interactive=True) 1244 | us_method = gr.Radio(label='Method', choices=['latent',*[x.name for x in modules.shared.sd_upscalers]], value=modules.shared.sd_upscalers[0].name, type="value") 1245 | us_denoising_strength = gr.Slider(minimum=0, maximum=1, step=0.01, label='Denoising Strength for latent method', value=0.35) 1246 | 1247 | with gr.Accordion(label="Inner Loopback", open=False): 1248 | inner_lb_count = gr.Slider(minimum=1, maximum=10, step=1, label='Inner Loopback Count', value=1) 1249 | inner_lb_str = gr.Slider(minimum=0, maximum=1, step=0.01, label='Denoising Strength for Inner Loopback', value=0.25) 1250 | 1251 | with gr.Accordion(label="Advanced Settings", open=False): 1252 | auto_brightness = gr.Checkbox(label='Auto Brightness Adjustment ', value=False) 1253 | save_prompts = gr.Checkbox(label='Save prompts as text file', value=True) 1254 | initial_image_number = gr.Number(minimum=0, label='Initial generated image number', value=0) 1255 | ffmpeg_path = gr.Textbox(label="ffmpeg binary. Only set this if it fails otherwise.", lines=1, value="") 1256 | segment_video = gr.Checkbox(label='Cut video in to segments ', value=False) 1257 | video_segment_duration = gr.Slider(minimum=10, maximum=60, step=1, label='Video Segment Duration (seconds)', value=20) 1258 | 1259 | return [param_file_path, cn_load_path, wave_list, sub_wave_list, project_dir, sound_file_path, video_file_path, mode_setting, use_optical_flow, use_optical_flow_cache, flow_interpolation_multi, flow_inpaint_method, flow_occ_area_th, flow_occ_detect_th, use_scene_detection, sd_threshold, sd_denoising_strength, use_video_frame_for_controlnet_in_loopback_mode, op_mask_blur, op_inpainting_fill, op_str, inner_lb_count, inner_lb_str, denoising_strength_change_amplitude, denoising_strength_add_freq, initial_image_number, common_prompts,extend_prompts, sub_extend_prompts, save_prompts, save_video, output_name, fps, video_quality, video_encoding, ffmpeg_path, segment_video, video_segment_duration, use_controlnet_for_lb,use_controlnet_for_img2img,use_controlnet_for_inpaint,use_controlnet_for_occ_inpaint,use_controlnet_for_outpaint,cn_ref_input_type, us_width,us_height,us_method,us_denoising_strength,auto_brightness] 1260 | 1261 | 1262 | def run(self, p, param_file_path, cn_load_path, raw_wave_list, raw_sub_wave_list, project_dir, sound_file_path, video_file_path, mode_setting, use_optical_flow, use_optical_flow_cache, flow_interpolation_multi, flow_inpaint_method, flow_occ_area_th, flow_occ_detect_th, use_scene_detection, sd_threshold, sd_denoising_strength, use_video_frame_for_controlnet_in_loopback_mode, op_mask_blur, op_inpainting_fill, op_str, inner_lb_count, inner_lb_str, denoising_strength_change_amplitude, denoising_strength_add_freq, initial_image_number, common_prompts, extend_prompts, sub_extend_prompts, save_prompts, save_video, output_name, fps, video_quality, video_encoding, ffmpeg_path, segment_video, video_segment_duration,use_controlnet_for_lb,use_controlnet_for_img2img,use_controlnet_for_inpaint,use_controlnet_for_occ_inpaint,use_controlnet_for_outpaint,cn_ref_input_type, us_width,us_height,us_method,us_denoising_strength,auto_brightness): 1263 | calc_time_start = time.perf_counter() 1264 | 1265 | processing.fix_seed(p) 1266 | 1267 | scripts.util_sd_loopback_music_sync_wave.other_effect.initialize_cache() 1268 | 1269 | if param_file_path: 1270 | if not os.path.isfile(param_file_path): 1271 | raise IOError(f"Invalid input in param_file_path: {param_file_path}") 1272 | else: 1273 | params = load_param_file(param_file_path) 1274 | raw_wave_list = params["raw_wave_list"] 1275 | raw_sub_wave_list = params["raw_sub_wave_list"] 1276 | project_dir = params["project_dir"] 1277 | sound_file_path = params["sound_file_path"] 1278 | video_file_path = params["video_file_path"] 1279 | mode_setting = params["mode_setting"] 1280 | use_optical_flow = params["use_optical_flow"] 1281 | use_optical_flow_cache = params["use_optical_flow_cache"] 1282 | flow_interpolation_multi = params["flow_interpolation_multi"] 1283 | flow_inpaint_method = params["flow_inpaint_method"] 1284 | flow_occ_area_th = params["flow_occ_area_th"] 1285 | flow_occ_detect_th = params["flow_occ_detect_th"] 1286 | 1287 | use_scene_detection = params["use_scene_detection"] 1288 | sd_threshold = params["sd_threshold"] 1289 | sd_denoising_strength = params["sd_denoising_strength"] 1290 | 1291 | use_video_frame_for_controlnet_in_loopback_mode = params["use_video_frame_for_controlnet_in_loopback_mode"] 1292 | op_mask_blur = params["op_mask_blur"] 1293 | op_inpainting_fill = params["op_inpainting_fill"] 1294 | op_str = params["op_str"] 1295 | inner_lb_count = params["inner_lb_count"] 1296 | inner_lb_str = params["inner_lb_str"] 1297 | denoising_strength_change_amplitude = params["denoising_strength_change_amplitude"] 1298 | denoising_strength_add_freq = params["denoising_strength_add_freq"] 1299 | initial_image_number = params["initial_image_number"] 1300 | common_prompts = params["common_prompts"] 1301 | extend_prompts = params["extend_prompts"] 1302 | sub_extend_prompts = params["sub_extend_prompts"] 1303 | save_prompts = params["save_prompts"] 1304 | save_video = params["save_video"] 1305 | output_name = params["output_name"] 1306 | fps = params["fps"] 1307 | video_quality = params["video_quality"] 1308 | video_encoding = params["video_encoding"] 1309 | ffmpeg_path = params["ffmpeg_path"] 1310 | segment_video = params["segment_video"] 1311 | video_segment_duration = params["video_segment_duration"] 1312 | use_controlnet_for_lb = params["use_controlnet_for_lb"] 1313 | use_controlnet_for_img2img = params["use_controlnet_for_img2img"] 1314 | use_controlnet_for_inpaint = params["use_controlnet_for_inpaint"] 1315 | use_controlnet_for_occ_inpaint = params["use_controlnet_for_occ_inpaint"] 1316 | use_controlnet_for_outpaint = params["use_controlnet_for_outpaint"] 1317 | cn_ref_input_type = params["cn_ref_input_type"] 1318 | 1319 | us_width = params["us_width"] 1320 | us_height = params["us_height"] 1321 | us_method = params["us_method"] 1322 | us_denoising_strength = params["us_denoising_strength"] 1323 | 1324 | auto_brightness = params["auto_brightness"] 1325 | 1326 | p.denoising_strength = params["p_denoising_strength"] 1327 | p.prompt = params["p_prompt"] 1328 | p.negative_prompt = params["p_negative_prompt"] 1329 | p.seed = params["p_seed"] 1330 | p.sampler_name = params["p_sampler_name"] 1331 | p.cfg_scale = params["p_cfg_scale"] 1332 | p.width = params["p_width"] 1333 | p.height = params["p_height"] 1334 | 1335 | if cn_load_path: 1336 | if not os.path.isfile(cn_load_path): 1337 | raise IOError(f"Invalid input in cn_load_path: {cn_load_path}") 1338 | 1339 | p.extra_generation_params = { 1340 | "Max Additional Denoise": denoising_strength_change_amplitude, 1341 | } 1342 | 1343 | #input validation 1344 | raw_wave_list = raw_wave_list.strip() 1345 | if raw_wave_list: 1346 | wave_list = str_to_wave_list(raw_wave_list) 1347 | else: 1348 | if (sound_file_path and os.path.isfile(sound_file_path)) or (video_file_path and os.path.isfile(video_file_path)): 1349 | wave_list = create_simple_wave_list(sound_file_path,video_file_path) 1350 | else: 1351 | raise IOError(f"Invalid input in wave list: {raw_wave_list}") 1352 | 1353 | sub_wave_list = [] 1354 | raw_sub_wave_list = raw_sub_wave_list.strip() 1355 | if raw_sub_wave_list: 1356 | sub_wave_list = str_to_wave_list(raw_sub_wave_list) 1357 | 1358 | if sound_file_path: 1359 | if not os.path.isfile(sound_file_path): 1360 | raise IOError(f"Invalid input in sound_file_path: {sound_file_path}") 1361 | 1362 | if video_file_path: 1363 | if not os.path.isfile(video_file_path): 1364 | raise IOError(f"Invalid input in video_file_path: {video_file_path}") 1365 | else: 1366 | if use_optical_flow: 1367 | raise IOError(f"optical flow requires video file") 1368 | 1369 | #calc frames 1370 | total_length = wave_list[-1]["end_msec"] 1371 | frames = total_length * fps / 1000 1372 | print( "end_time = ",total_length ) 1373 | print( "fps = ",fps ) 1374 | print( "frames = ",frames ) 1375 | frames = int(frames) 1376 | 1377 | 1378 | # We save them ourselves for the sake of ffmpeg 1379 | p.do_not_save_samples = True 1380 | 1381 | p.batch_size = 1 1382 | p.n_iter = 1 1383 | 1384 | initial_seed = None 1385 | initial_info = None 1386 | 1387 | grids = [] 1388 | all_images = [] 1389 | original_init_image = p.init_images 1390 | state.job_count = frames * inner_lb_count 1391 | 1392 | initial_color_corrections = [processing.setup_color_correction(p.init_images[0])] 1393 | initial_denoising_strength = p.denoising_strength 1394 | 1395 | suffix = debug_info_suffix(mode_setting, initial_denoising_strength, denoising_strength_change_amplitude, inner_lb_count, inner_lb_str) 1396 | loopback_wave_path,loopback_wave_images_path = create_output_dir(output_name, suffix, p.outpath_samples, project_dir) 1397 | 1398 | p.outpath_samples = loopback_wave_images_path 1399 | 1400 | org_sound_file_path = sound_file_path 1401 | sound_file_path = extract_sound(sound_file_path, loopback_wave_images_path, ffmpeg_path) 1402 | 1403 | 1404 | cn_cache_dir = os.path.join(loopback_wave_path, "cn_detect_map") 1405 | cn_cache_dir = os.path.join(cn_cache_dir, f"{fps*flow_interpolation_multi}") 1406 | 1407 | scripts.util_sd_loopback_music_sync_wave.controlnet.initialize(p, cn_cache_dir, cn_load_path) 1408 | scripts.util_sd_loopback_music_sync_wave.controlnet.dump( loopback_wave_images_path + "-controlnet.txt" ) 1409 | 1410 | common_prompts = common_prompts.strip() 1411 | extend_prompts = extend_prompts.strip() 1412 | sub_extend_prompts = sub_extend_prompts.strip() 1413 | 1414 | # output prompts txt 1415 | if save_prompts: 1416 | params = {} 1417 | params["raw_wave_list"] = raw_wave_list 1418 | params["raw_sub_wave_list"] = raw_sub_wave_list 1419 | params["project_dir"] = project_dir 1420 | params["sound_file_path"] = org_sound_file_path 1421 | params["video_file_path"] = video_file_path 1422 | params["mode_setting"] = mode_setting 1423 | params["use_optical_flow"] = use_optical_flow 1424 | params["use_optical_flow_cache"] = use_optical_flow_cache 1425 | params["flow_interpolation_multi"] = flow_interpolation_multi 1426 | params["flow_inpaint_method"] = flow_inpaint_method 1427 | params["flow_occ_area_th"] = flow_occ_area_th 1428 | params["flow_occ_detect_th"] = flow_occ_detect_th 1429 | 1430 | params["use_scene_detection"] = use_scene_detection 1431 | params["sd_threshold"] = sd_threshold 1432 | params["sd_denoising_strength"] = sd_denoising_strength 1433 | 1434 | params["use_video_frame_for_controlnet_in_loopback_mode"] = use_video_frame_for_controlnet_in_loopback_mode 1435 | params["op_mask_blur"] = op_mask_blur 1436 | params["op_inpainting_fill"] = op_inpainting_fill 1437 | params["op_str"] = op_str 1438 | params["inner_lb_count"] = inner_lb_count 1439 | params["inner_lb_str"] = inner_lb_str 1440 | params["denoising_strength_change_amplitude"] = denoising_strength_change_amplitude 1441 | params["denoising_strength_add_freq"] = denoising_strength_add_freq 1442 | params["initial_image_number"] = initial_image_number 1443 | params["common_prompts"] = common_prompts 1444 | params["extend_prompts"] = extend_prompts 1445 | params["sub_extend_prompts"] = sub_extend_prompts 1446 | params["save_prompts"] = save_prompts 1447 | params["save_video"] = save_video 1448 | params["output_name"] = output_name 1449 | params["fps"] = fps 1450 | params["video_quality"] = video_quality 1451 | params["video_encoding"] = video_encoding 1452 | params["ffmpeg_path"] = ffmpeg_path 1453 | params["segment_video"] = segment_video 1454 | params["video_segment_duration"] = video_segment_duration 1455 | params["use_controlnet_for_lb"] = use_controlnet_for_lb 1456 | params["use_controlnet_for_img2img"] = use_controlnet_for_img2img 1457 | params["use_controlnet_for_inpaint"] = use_controlnet_for_inpaint 1458 | params["use_controlnet_for_occ_inpaint"] = use_controlnet_for_occ_inpaint 1459 | params["use_controlnet_for_outpaint"] = use_controlnet_for_outpaint 1460 | params["cn_ref_input_type"] = cn_ref_input_type 1461 | 1462 | params["us_width"] = us_width 1463 | params["us_height"] = us_height 1464 | params["us_method"] = us_method 1465 | params["us_denoising_strength"] = us_denoising_strength 1466 | 1467 | auto_brightness = params["auto_brightness"] = auto_brightness 1468 | 1469 | params["p_denoising_strength"] = p.denoising_strength 1470 | params["p_prompt"] = p.prompt 1471 | params["p_negative_prompt"] = p.negative_prompt 1472 | params["p_seed"] = p.seed 1473 | params["p_sampler_name"] = p.sampler_name 1474 | params["p_cfg_scale"] = p.cfg_scale 1475 | params["p_width"] = p.width 1476 | params["p_height"] = p.height 1477 | 1478 | save_param_file(loopback_wave_images_path + "-inputs.txt", params) 1479 | 1480 | 1481 | with open(loopback_wave_images_path + "-prompts.txt", "w") as f: 1482 | generation_settings = [ 1483 | "Generation Settings", 1484 | f"Wave List: ", 1485 | f"{raw_wave_list}", 1486 | "", 1487 | f"Sub Wave List: ", 1488 | f"{raw_sub_wave_list}", 1489 | "", 1490 | f"FPS: {fps}", 1491 | f"Base Denoising Strength: {initial_denoising_strength}", 1492 | f"Max Additional Denoise: {denoising_strength_change_amplitude}", 1493 | f"Denoising Strength Add frequency: {denoising_strength_add_freq}", 1494 | f"Project Directory: {project_dir}", 1495 | f"Sound File: {org_sound_file_path}", 1496 | f"Video File: {video_file_path}", 1497 | f"Initial Image Number: {initial_image_number}", 1498 | "", 1499 | f"Mode: {mode_setting}", 1500 | f"Use Video Frame for Controlnet in Loopback mode: {use_video_frame_for_controlnet_in_loopback_mode}", 1501 | f"Use Controlnet for LoopBack: {use_controlnet_for_lb}", 1502 | f"Use Controlnet for img2img: {use_controlnet_for_img2img}", 1503 | f"Use Controlnet for inpaint: {use_controlnet_for_inpaint}", 1504 | f"Use Controlnet for occlusion inpaint: {use_controlnet_for_occ_inpaint}", 1505 | f"Use Controlnet for outpaint: {use_controlnet_for_outpaint}", 1506 | f"Controlnet reference only Input Type: {cn_ref_input_type}", 1507 | "", 1508 | "Optical Flow Settings", 1509 | f"Use Optical Flow: {use_optical_flow}", 1510 | f"Use Optical Flow Cache: {use_optical_flow_cache}", 1511 | f"Interpolation Multiplier: {flow_interpolation_multi}", 1512 | f"Inpaint Method: {flow_inpaint_method}", 1513 | f"Occlusion area threshold for (cv2 + sd): {flow_occ_area_th}", 1514 | f"Occlusion area detection threshold: {flow_occ_detect_th}", 1515 | "", 1516 | f"Use Scene Detection: {use_scene_detection}", 1517 | f"Scene Detection Threshold: {sd_threshold}", 1518 | f"Denoising Strength for New Scene: {sd_denoising_strength}", 1519 | "", 1520 | f"OutPainting Mask blur: {op_mask_blur}", 1521 | f"OutPainting Masked content: {op_inpainting_fill}", 1522 | f"OutPainting Denoising Strength: {op_str}", 1523 | "", 1524 | f"Inner Loopback Count: {inner_lb_count}", 1525 | f"Denoising Strength for Inner Loopback: {inner_lb_str}", 1526 | "", 1527 | "Video Encoding Settings", 1528 | f"Save Video: {save_video}", 1529 | "", 1530 | "Upscale Settings", 1531 | f"Width: {us_width}", 1532 | f"Height: {us_height}", 1533 | f"Upscale Method: {us_method}", 1534 | f"Denoising strength for latent: {us_denoising_strength}", 1535 | "", 1536 | f"Auto Brightness Adjustment: {auto_brightness}", 1537 | "", 1538 | ] 1539 | 1540 | if save_video: 1541 | generation_settings = generation_settings + [ 1542 | f"Framerate: {fps}", 1543 | f"Quality: {video_quality}", 1544 | f"Encoding: {video_encoding}", 1545 | f"Create Segmented Video: {segment_video}" 1546 | ] 1547 | 1548 | if segment_video: 1549 | generation_settings = generation_settings + [f"Segment Duration: {video_segment_duration}"] 1550 | 1551 | generation_settings = generation_settings + [ 1552 | "", 1553 | "Prompt Details", 1554 | "Initial Prompt:", 1555 | p.prompt, 1556 | "", 1557 | "Negative Prompt:", 1558 | p.negative_prompt, 1559 | "", 1560 | "Prompt Changes:", 1561 | common_prompts, 1562 | "", 1563 | "Extend Prompts:", 1564 | extend_prompts, 1565 | 1566 | "", 1567 | "Sub Extend Prompts:", 1568 | sub_extend_prompts 1569 | ] 1570 | 1571 | f.write('\n'.join(generation_settings)) 1572 | 1573 | # create maps 1574 | 1575 | def create_init_image_per_wave_map(root_path): 1576 | result = {} 1577 | init_image_per_wave_path = os.path.join(root_path, "video_frame_per_wave") 1578 | if os.path.isdir( init_image_per_wave_path ): 1579 | pngs = glob.glob( os.path.join(init_image_per_wave_path ,"[0-9]*.png"), recursive=False) 1580 | for png in pngs: 1581 | basename_without_ext = os.path.splitext(os.path.basename(png))[0] 1582 | result[int(basename_without_ext)] = png 1583 | return result 1584 | 1585 | init_image_per_wave_map = create_init_image_per_wave_map(loopback_wave_path) 1586 | print("init_image_per_wave_map", init_image_per_wave_map) 1587 | 1588 | def create_common_prompt_map(prompts): 1589 | result = {} 1590 | if prompts: 1591 | lines = prompts.split("\n") 1592 | for prompt_line in lines: 1593 | # wave_index::prompt 1594 | # wave_index::seed::prompt 1595 | params = prompt_line.split("::") 1596 | if len(params) == 2: 1597 | result[int(params[0])] = { "prompt": params[1] } 1598 | # elif len(params) == 3: 1599 | # result[int(params[0])] = { "seed": params[1] , "prompt": params[2] } 1600 | else: 1601 | raise IOError(f"Invalid input in common prompt line: {prompt_line}") 1602 | return result 1603 | 1604 | common_prompt_map = create_common_prompt_map(common_prompts) 1605 | print("common_prompt_map", common_prompt_map) 1606 | 1607 | 1608 | extend_prompt_map = create_extend_prompt_map(extend_prompts, wave_list) 1609 | print("extend_prompt_map", extend_prompt_map) 1610 | 1611 | sub_extend_prompt_map = create_extend_prompt_map(sub_extend_prompts, sub_wave_list) 1612 | print("sub_extend_prompt_map", sub_extend_prompt_map) 1613 | 1614 | wild_card_dir = get_wild_card_dir() 1615 | print("wild_card_dir : ", wild_card_dir) 1616 | 1617 | wild_card_map = create_wild_card_map(wild_card_dir) 1618 | # print("wild_card_map", wild_card_map) 1619 | 1620 | 1621 | history = [] 1622 | 1623 | # Reset to original init image at the start of each batch 1624 | p.init_images = original_init_image 1625 | 1626 | seed_state = "adding" 1627 | initial_seed = p.seed 1628 | 1629 | i = 0 1630 | 1631 | main_wave_status = { 1632 | "wave_index" : 0, 1633 | "prompt_changed" : False, 1634 | "current_common_prompt" : p.prompt, 1635 | "current_extend_prompt" : "", 1636 | # output 1637 | "init_image" : None, 1638 | "denoising_strength" : 0, 1639 | "new_prompt" : "", 1640 | } 1641 | 1642 | sub_wave_status = { 1643 | "wave_index" : 0, 1644 | "prompt_changed" : False, 1645 | "current_extend_prompt" : "", 1646 | # output 1647 | "new_prompt" : "", 1648 | } 1649 | 1650 | effects = scripts.util_sd_loopback_music_sync_wave.sync_effect.SyncEffect(fps) 1651 | bpm_event = scripts.util_sd_loopback_music_sync_wave.bpm.BpmEvent(fps, total_length) 1652 | 1653 | seed_for_img2img_outpainting = int(random.randrange(4294967294)) 1654 | 1655 | use_controlnet_for_main_generation = use_controlnet_for_img2img if mode_setting == "img2img" else use_controlnet_for_lb 1656 | 1657 | us_map = {} 1658 | 1659 | if not use_optical_flow: 1660 | flow_interpolation_multi = 1 1661 | 1662 | extract_video_frame(project_dir, video_file_path, fps, flow_interpolation_multi, ffmpeg_path) 1663 | 1664 | if use_optical_flow: 1665 | frame_path = get_video_frame_path(project_dir, 0, fps, flow_interpolation_multi) 1666 | if frame_path and os.path.isfile(frame_path): 1667 | v_path = os.path.join(os.path.join(project_dir, "video_frame"), f"{fps * flow_interpolation_multi}") 1668 | o_path = os.path.join(os.path.join(project_dir, "optical_flow"), f"{fps * flow_interpolation_multi}") 1669 | m_path = os.path.join(os.path.join(project_dir, "occ_mask"), f"{fps * flow_interpolation_multi}") 1670 | scripts.util_sd_loopback_music_sync_wave.raft.create_optical_flow(v_path, o_path, m_path, use_optical_flow_cache, None, flow_occ_detect_th) 1671 | else: 1672 | print("video frame not found -> use_optical_flow = False") 1673 | use_optical_flow = False 1674 | 1675 | scene_detection_list = [] 1676 | if use_scene_detection: 1677 | if use_optical_flow: 1678 | m_path = os.path.join(os.path.join(project_dir, "occ_mask"), f"{fps * flow_interpolation_multi}") 1679 | mask_path_list = sorted(glob.glob( os.path.join(m_path ,"[0-9]*.png"), recursive=False)) 1680 | scene_detection_list = scripts.util_sd_loopback_music_sync_wave.raft.get_scene_detection_list(sd_threshold, flow_interpolation_multi, mask_path_list) 1681 | else: 1682 | use_scene_detection = False 1683 | 1684 | initial_input_image = None 1685 | prev_frame_image = None 1686 | 1687 | scene_changed_list = [] 1688 | 1689 | denoising_strength_add_timing = 1 1690 | 1691 | # generation loop 1692 | while True: 1693 | 1694 | # cancelled. 1695 | if state.interrupted: 1696 | print("Generation cancelled.") 1697 | raise Exception("Generation cancelled.") 1698 | 1699 | state.job = "" 1700 | current_time = 1000 * i / fps 1701 | total_progress = i/frames 1702 | 1703 | if main_wave_loop(p, wave_list, current_time, total_progress, mode_setting, initial_denoising_strength, denoising_strength_change_amplitude, fps, 1704 | main_wave_status, common_prompt_map, extend_prompt_map, init_image_per_wave_map, wild_card_map, effects, bpm_event) == False: 1705 | break 1706 | 1707 | if main_wave_status["init_image"]: 1708 | p.init_images = [main_wave_status["init_image"]] 1709 | 1710 | p.denoising_strength = main_wave_status["denoising_strength"] 1711 | new_prompt = main_wave_status["new_prompt"] 1712 | 1713 | if sub_wave_list and sub_extend_prompt_map: 1714 | sub_wave_loop(p, sub_wave_list, current_time, total_progress, sub_wave_status, sub_extend_prompt_map, wild_card_map, effects, bpm_event) 1715 | 1716 | new_prompt += "," + sub_wave_status["new_prompt"] 1717 | 1718 | 1719 | # denoising_strength_add_freq 1720 | denoising_strength_add_timing -= 1 1721 | if denoising_strength_add_timing == 0: 1722 | denoising_strength_add_timing = denoising_strength_add_freq 1723 | else: 1724 | p.denoising_strength = initial_denoising_strength 1725 | 1726 | # override init_image for img2img 1727 | if mode_setting == "img2img": 1728 | frame_path = get_video_frame_path(project_dir, i, fps, flow_interpolation_multi) 1729 | if frame_path and os.path.isfile(frame_path): 1730 | org_frame = image_open_and_resize(frame_path, p.width, p.height) 1731 | p.init_images = [org_frame] 1732 | else: 1733 | print("Warning! File not found : ",frame_path) 1734 | 1735 | p.n_iter = 1 1736 | p.batch_size = 1 1737 | p.do_not_save_grid = True 1738 | 1739 | if opts.img2img_color_correction: 1740 | p.color_corrections = initial_color_corrections 1741 | 1742 | # bpm_event 1743 | bpm_prompt = bpm_event.get_current_prompt(current_time) 1744 | if bpm_prompt: 1745 | # wild card 1746 | bpm_prompt = re.sub(wild_card_regex, lambda x: replace_wild_card_token(x, wild_card_map ), bpm_prompt) 1747 | 1748 | # random 1749 | bpm_prompt = re.sub(random_regex, lambda x: get_random_value(x ), bpm_prompt) 1750 | 1751 | # effect 1752 | bpm_prompt = effects.parse_prompt(bpm_prompt) 1753 | 1754 | new_prompt += "," + bpm_prompt 1755 | 1756 | 1757 | ef_prompt = effects.get_current_prompt() 1758 | if ef_prompt: 1759 | new_prompt += "," + ef_prompt 1760 | 1761 | # parse #func 1762 | # affine 1763 | is_affine_need, affine_input, new_prompt = parse_sharp_func(new_prompt,fps) 1764 | print("affine_input : ", affine_input) 1765 | 1766 | # inpaint 1767 | inpaint_mask_prompt = [] 1768 | inpaint_inpaint_prompt = [] 1769 | new_prompt = re.sub(inpaint_regex, lambda x: get_weights_text(x, inpaint_mask_prompt, inpaint_inpaint_prompt), new_prompt) 1770 | print("inpaint_input : ", ( inpaint_mask_prompt, inpaint_inpaint_prompt)) 1771 | 1772 | 1773 | # slide/blind 1774 | is_slide_need, slide_inputs, new_prompt = parse_slide_func(new_prompt, fps) 1775 | print("slide_inputs : ", slide_inputs) 1776 | 1777 | 1778 | # other 1779 | blur_str=[] 1780 | hue_type=[] 1781 | hue_angle=[] 1782 | post_process=[] 1783 | 1784 | new_prompt = re.sub(blur_regex, lambda x: get_weights(x, blur_str), new_prompt) 1785 | new_prompt = re.sub(hue_regex, lambda x: get_weights(x, hue_type, hue_angle), new_prompt) 1786 | 1787 | new_prompt = re.sub(postprocess_regex, lambda x: get_weights(x, post_process), new_prompt) 1788 | 1789 | _blur_str = 0 if not blur_str else blur_str[0] 1790 | _hue_type = -1 if not hue_type else hue_type[0] 1791 | _hue_angle = 0 if not hue_angle else hue_angle[0] 1792 | _post_process = 0 if not post_process else post_process[0] 1793 | 1794 | other_effect_input = [_blur_str,_hue_type,_hue_angle] 1795 | print("other_effect_input : ", other_effect_input) 1796 | 1797 | 1798 | p.prompt = new_prompt 1799 | print(new_prompt) 1800 | 1801 | state.job += f"Iteration {i + 1}/{frames}. Denoising Strength: {p.denoising_strength}" 1802 | 1803 | if initial_input_image is None: 1804 | prev_frame_image = initial_input_image = p.init_images[0] 1805 | 1806 | input_for_cn_ref_only = initial_input_image if cn_ref_input_type == "1st input img" else prev_frame_image 1807 | 1808 | control_net_input_image = (None, input_for_cn_ref_only) 1809 | 1810 | if ((mode_setting == "loopback") and use_video_frame_for_controlnet_in_loopback_mode) or (mode_setting == "img2img"): 1811 | frame_path = get_video_frame_path(project_dir, i, fps, flow_interpolation_multi) 1812 | if frame_path and os.path.isfile(frame_path): 1813 | #org_frame = image_open_and_resize(frame_path, p.width, p.height) 1814 | control_net_input_image = (frame_path, input_for_cn_ref_only) 1815 | else: 1816 | print("!!!!!!!!!!!!! Warning! File for control_net_input_image not found : ",frame_path) 1817 | print("ran out of frames -> generation end") 1818 | break 1819 | 1820 | op_seed = p.seed if mode_setting == "loopback" else seed_for_img2img_outpainting 1821 | 1822 | 1823 | # scene_detection 1824 | auto_scene_detect = False 1825 | if mode_setting == "loopback": 1826 | if use_scene_detection: 1827 | auto_scene_detect = scene_detection_list[i] 1828 | 1829 | # overwrite_frame 1830 | overwrite_frame = "" 1831 | if mode_setting == "loopback": 1832 | overwrite_frame = get_overwrite_frame_path(project_dir, i, fps, flow_interpolation_multi) 1833 | 1834 | 1835 | if (not auto_scene_detect) and (not overwrite_frame): 1836 | # optical flow 1837 | if use_optical_flow: 1838 | p.init_images = [ apply_optical_flow(p, i, fps, flow_interpolation_multi, flow_inpaint_method, flow_occ_area_th, project_dir, op_mask_blur, op_inpainting_fill, op_str, op_seed, use_controlnet_for_occ_inpaint, control_net_input_image)] 1839 | 1840 | # affine 1841 | if is_affine_need: 1842 | p.init_images = [ affine_image(p, op_mask_blur, op_inpainting_fill, op_str, op_seed, affine_input, use_controlnet_for_outpaint, control_net_input_image)] 1843 | 1844 | # inpaint 1845 | if inpaint_mask_prompt: 1846 | if not inpaint_inpaint_prompt: 1847 | inpaint_inpaint_prompt.append(p.prompt) 1848 | 1849 | p.init_images = [ apply_inpaint(p, inpaint_mask_prompt[0], inpaint_inpaint_prompt[0], op_mask_blur, op_inpainting_fill, op_str, op_seed, use_controlnet_for_inpaint, control_net_input_image ) ] 1850 | 1851 | # slide/blind 1852 | if is_slide_need: 1853 | p.init_images = [ apply_slide(p, op_mask_blur, op_inpainting_fill, op_str, op_seed, slide_inputs, use_controlnet_for_outpaint, control_net_input_image) ] 1854 | 1855 | 1856 | # other 1857 | if _post_process == 0: 1858 | if _blur_str != 0 or _hue_type != -1: 1859 | print("apply_other_effect") 1860 | p.init_images = [ scripts.util_sd_loopback_music_sync_wave.other_effect.apply_other_effect( p.init_images[0], *other_effect_input ) ] 1861 | 1862 | if mode_setting == "img2img": 1863 | p.seed = initial_seed 1864 | 1865 | if not overwrite_frame: 1866 | # scene_detection 1867 | if auto_scene_detect: 1868 | print(f"{i} : scene change") 1869 | 1870 | scene_changed_list.append(i) 1871 | 1872 | frame_path = get_video_frame_path(project_dir, i, fps, flow_interpolation_multi) 1873 | p.init_images= [image_open_and_resize(frame_path, p.width, p.height)] 1874 | p.denoising_strength = sd_denoising_strength 1875 | 1876 | processed = process_image(p, inner_lb_count, inner_lb_str, use_controlnet_for_main_generation, control_net_input_image) 1877 | 1878 | if initial_info is None: 1879 | initial_info = processed.info 1880 | 1881 | else: 1882 | # overwrite_frame 1883 | print("overwrite frame : ",overwrite_frame) 1884 | 1885 | scene_changed_list.append(i) 1886 | 1887 | overwrite_img = Image.open(overwrite_frame) 1888 | 1889 | try: 1890 | overwrite_prompt = get_positive_prompt_from_image(overwrite_img) 1891 | if overwrite_prompt: 1892 | main_wave_status["current_common_prompt"] = overwrite_prompt 1893 | print("overwrite common prompt : ",overwrite_prompt) 1894 | except Exception as e: 1895 | print("get_positive_prompt_from_image failed. ",overwrite_frame) 1896 | 1897 | processed = processing.Processed(p=p,images_list=[ resize_img(overwrite_img, p.width, p.height) ],seed=p.seed) 1898 | 1899 | 1900 | processed_img = processed.images[0] 1901 | 1902 | if auto_brightness: 1903 | processed_img = adjust_brightness(processed_img, 1) 1904 | 1905 | # set init_image 1906 | if mode_setting == "loopback": 1907 | p.init_images = [processed_img] 1908 | else: 1909 | # Replace at the beginning of loop 1910 | pass 1911 | 1912 | prev_frame_image = processed_img 1913 | 1914 | # post process 1915 | if _post_process != 0: 1916 | if _blur_str != 0 or _hue_type != -1: 1917 | print("post apply_other_effect") 1918 | processed_img = scripts.util_sd_loopback_music_sync_wave.other_effect.apply_other_effect(processed_img, *other_effect_input ) 1919 | 1920 | image_number = int(initial_image_number + i) 1921 | 1922 | us_map[image_number] = { 1923 | "seed" : p.seed, 1924 | "prompt" : p.prompt, 1925 | "info" : processed.info, 1926 | } 1927 | 1928 | 1929 | if seed_state == "adding": 1930 | p.seed = processed.seed + 1 1931 | elif seed_state == "subtracting": 1932 | p.seed = processed.seed - 1 1933 | 1934 | images.save_image(processed_img, p.outpath_samples, "", processed.seed, processed.prompt, info=processed.info, save_to_dirs=False, forced_filename=str(image_number).zfill(5), p=p) 1935 | 1936 | history.append(processed_img) 1937 | 1938 | i+=1 1939 | 1940 | grid = images.image_grid(history, rows=1) 1941 | if opts.grid_save: 1942 | images.save_image(grid, p.outpath_grids, "grid", initial_seed, p.prompt, opts.grid_format, info=initial_info, save_to_dirs=False, short_filename=not opts.grid_extended_filename, grid=True, p=p) 1943 | grids.append(grid) 1944 | 1945 | all_images += history 1946 | 1947 | if opts.return_grid: 1948 | all_images = grids + all_images 1949 | 1950 | out_video_path = loopback_wave_images_path 1951 | 1952 | # upscale 1953 | if us_width != -1 or us_height != -1: 1954 | if us_method != 'None': 1955 | loopback_wave_images_path = os.path.join(loopback_wave_images_path, "upscale") 1956 | scripts.util_sd_loopback_music_sync_wave.upscale.upscale(p, us_map, loopback_wave_images_path, us_width, us_height, us_method, us_denoising_strength) 1957 | 1958 | 1959 | # interpolate 1960 | if use_optical_flow and flow_interpolation_multi > 1: 1961 | 1962 | sc_list = [False for n in range(i)] 1963 | for s in scene_changed_list: 1964 | sc_list[s] = True 1965 | 1966 | if save_video: 1967 | input_pattern = os.path.join(loopback_wave_images_path, "%05d.png") 1968 | encode_video(input_pattern, initial_image_number, out_video_path+"_base", fps, video_quality, video_encoding, segment_video, video_segment_duration, ffmpeg_path, sound_file_path) 1969 | 1970 | src_interpolate_path = loopback_wave_images_path 1971 | loopback_wave_images_path = os.path.join(loopback_wave_images_path, "interpolate") 1972 | flow_path = os.path.join(os.path.join(project_dir, "optical_flow"), f"{fps * flow_interpolation_multi}") 1973 | scripts.util_sd_loopback_music_sync_wave.raft.interpolate(src_interpolate_path, loopback_wave_images_path, flow_interpolation_multi, flow_path, sc_list ) 1974 | 1975 | if save_video: 1976 | input_pattern = os.path.join(loopback_wave_images_path, "%05d.png") 1977 | encode_video(input_pattern, initial_image_number, out_video_path, fps * flow_interpolation_multi, video_quality, video_encoding, segment_video, video_segment_duration, ffmpeg_path, sound_file_path) 1978 | 1979 | processed = Processed(p, all_images, initial_seed, initial_info) 1980 | 1981 | calc_time_end = time.perf_counter() 1982 | 1983 | print("elapsed_time (sec) : ", calc_time_end - calc_time_start) 1984 | 1985 | return processed 1986 | 1987 | 1988 | def fake_run(raw_wave_list, extend_prompts, fps, initial_denoising_strength, denoising_strength_change_amplitude): 1989 | 1990 | #input validation 1991 | raw_wave_list = raw_wave_list.strip() 1992 | wave_list = str_to_wave_list(raw_wave_list) 1993 | 1994 | #calc frames 1995 | total_length = wave_list[-1]["end_msec"] 1996 | frames = total_length * fps / 1000 1997 | print( "end_time = ",total_length ) 1998 | print( "fps = ",fps ) 1999 | print( "frames = ",frames ) 2000 | frames = int(frames) 2001 | 2002 | extend_prompts = extend_prompts.strip() 2003 | 2004 | # create maps 2005 | extend_prompt_map = create_extend_prompt_map(extend_prompts, wave_list) 2006 | print("extend_prompt_map", extend_prompt_map) 2007 | 2008 | wild_card_dir = get_wild_card_dir() 2009 | print("wild_card_dir : ", wild_card_dir) 2010 | 2011 | wild_card_map = create_wild_card_map(wild_card_dir) 2012 | print("wild_card_map", wild_card_map) 2013 | 2014 | i = 0 2015 | 2016 | main_wave_status = { 2017 | "wave_index" : 0, 2018 | "prompt_changed" : False, 2019 | "current_common_prompt" : "", 2020 | "current_extend_prompt" : "", 2021 | # output 2022 | "init_image" : None, 2023 | "denoising_strength" : 0, 2024 | "new_prompt" : "", 2025 | } 2026 | 2027 | effects = scripts.util_sd_loopback_music_sync_wave.sync_effect.SyncEffect(fps) 2028 | bpm_event = scripts.util_sd_loopback_music_sync_wave.bpm.BpmEvent(fps, total_length) 2029 | 2030 | stat_map = {} 2031 | 2032 | # generation loop 2033 | while True: 2034 | current_time = 1000 * i / fps 2035 | total_progress = i/frames 2036 | 2037 | if main_wave_loop(None, wave_list, current_time, total_progress, "loop_back", initial_denoising_strength, denoising_strength_change_amplitude, fps, 2038 | main_wave_status, None, extend_prompt_map, None, wild_card_map, effects, bpm_event) == False: 2039 | break 2040 | 2041 | denoising_strength = main_wave_status["denoising_strength"] 2042 | new_prompt = main_wave_status["new_prompt"] 2043 | 2044 | # bpm_event 2045 | bpm_prompt = bpm_event.get_current_prompt(current_time) 2046 | if bpm_prompt: 2047 | # wild card 2048 | bpm_prompt = re.sub(wild_card_regex, lambda x: replace_wild_card_token(x, wild_card_map ), bpm_prompt) 2049 | 2050 | # random 2051 | bpm_prompt = re.sub(random_regex, lambda x: get_random_value(x ), bpm_prompt) 2052 | 2053 | # effect 2054 | bpm_prompt = effects.parse_prompt(bpm_prompt) 2055 | 2056 | new_prompt += "," + bpm_prompt 2057 | 2058 | ef_prompt = effects.get_current_prompt() 2059 | if ef_prompt: 2060 | new_prompt += "," + ef_prompt 2061 | 2062 | 2063 | # parse #func 2064 | # affine 2065 | is_affine_need, affine_input, new_prompt = parse_sharp_func(new_prompt,fps) 2066 | print("affine_input : ", affine_input) 2067 | 2068 | # slide/blind 2069 | is_slide_need, slide_inputs, new_prompt = parse_slide_func(new_prompt, fps) 2070 | print("slide_inputs : ", slide_inputs) 2071 | 2072 | # other 2073 | blur_str=[] 2074 | hue_type=[] 2075 | hue_angle=[] 2076 | post_process=[] 2077 | 2078 | new_prompt = re.sub(blur_regex, lambda x: get_weights(x, blur_str), new_prompt) 2079 | new_prompt = re.sub(hue_regex, lambda x: get_weights(x, hue_type, hue_angle), new_prompt) 2080 | 2081 | new_prompt = re.sub(postprocess_regex, lambda x: get_weights(x, post_process), new_prompt) 2082 | 2083 | _blur_str = 0 if not blur_str else blur_str[0] 2084 | _hue_type = -1 if not hue_type else hue_type[0] 2085 | _hue_angle = 0 if not hue_angle else hue_angle[0] 2086 | _post_process = 0 if not post_process else post_process[0] 2087 | 2088 | other_effect_input = [_blur_str,_hue_type,_hue_angle] 2089 | print("other_effect_input : ", other_effect_input) 2090 | 2091 | 2092 | #new_prompt 2093 | #denoising_strength 2094 | print(new_prompt) 2095 | 2096 | stat_map[current_time/1000] = { 2097 | "prompt":new_prompt, 2098 | "denoising_strength":denoising_strength, 2099 | "affine_input":affine_input, 2100 | "slide_inputs":slide_inputs, 2101 | "other_effect_input":other_effect_input, 2102 | } 2103 | 2104 | i+=1 2105 | 2106 | 2107 | return stat_map 2108 | 2109 | --------------------------------------------------------------------------------