├── .gitattributes ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .style.yapf ├── LICENSE.ControlNet ├── README.md ├── app.py ├── model.py ├── patch ├── requirements.txt ├── share_btn.py └── style.css /.gitattributes: -------------------------------------------------------------------------------- 1 | *.7z filter=lfs diff=lfs merge=lfs -text 2 | *.arrow filter=lfs diff=lfs merge=lfs -text 3 | *.bin filter=lfs diff=lfs merge=lfs -text 4 | *.bz2 filter=lfs diff=lfs merge=lfs -text 5 | *.ckpt filter=lfs diff=lfs merge=lfs -text 6 | *.ftz filter=lfs diff=lfs merge=lfs -text 7 | *.gz filter=lfs diff=lfs merge=lfs -text 8 | *.h5 filter=lfs diff=lfs merge=lfs -text 9 | *.joblib filter=lfs diff=lfs merge=lfs -text 10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text 11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text 12 | *.model filter=lfs diff=lfs merge=lfs -text 13 | *.msgpack filter=lfs diff=lfs merge=lfs -text 14 | *.npy filter=lfs diff=lfs merge=lfs -text 15 | *.npz filter=lfs diff=lfs merge=lfs -text 16 | *.onnx filter=lfs diff=lfs merge=lfs -text 17 | *.ot filter=lfs diff=lfs merge=lfs -text 18 | *.parquet filter=lfs diff=lfs merge=lfs -text 19 | *.pb filter=lfs diff=lfs merge=lfs -text 20 | *.pickle filter=lfs diff=lfs merge=lfs -text 21 | *.pkl filter=lfs diff=lfs merge=lfs -text 22 | *.pt filter=lfs diff=lfs merge=lfs -text 23 | *.pth filter=lfs diff=lfs merge=lfs -text 24 | *.rar filter=lfs diff=lfs merge=lfs -text 25 | *.safetensors filter=lfs diff=lfs merge=lfs -text 26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text 27 | *.tar.* filter=lfs diff=lfs merge=lfs -text 28 | *.tflite filter=lfs diff=lfs merge=lfs -text 29 | *.tgz filter=lfs diff=lfs merge=lfs -text 30 | *.wasm filter=lfs diff=lfs merge=lfs -text 31 | *.xz filter=lfs diff=lfs merge=lfs -text 32 | *.zip filter=lfs diff=lfs merge=lfs -text 33 | *.zst filter=lfs diff=lfs merge=lfs -text 34 | *tfevents* filter=lfs diff=lfs merge=lfs -text 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | models/ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # poetry 100 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 101 | # This is especially recommended for binary packages to ensure reproducibility, and is more 102 | # commonly ignored for libraries. 103 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 104 | #poetry.lock 105 | 106 | # pdm 107 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 108 | #pdm.lock 109 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 110 | # in version control. 111 | # https://pdm.fming.dev/#use-with-ide 112 | .pdm.toml 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ControlNet"] 2 | path = ControlNet 3 | url = https://github.com/lllyasviel/ControlNet 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: patch 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v4.2.0 5 | hooks: 6 | - id: check-executables-have-shebangs 7 | - id: check-json 8 | - id: check-merge-conflict 9 | - id: check-shebang-scripts-are-executable 10 | - id: check-toml 11 | - id: check-yaml 12 | - id: double-quote-string-fixer 13 | - id: end-of-file-fixer 14 | - id: mixed-line-ending 15 | args: ['--fix=lf'] 16 | - id: requirements-txt-fixer 17 | - id: trailing-whitespace 18 | - repo: https://github.com/myint/docformatter 19 | rev: v1.4 20 | hooks: 21 | - id: docformatter 22 | args: ['--in-place'] 23 | - repo: https://github.com/pycqa/isort 24 | rev: 5.12.0 25 | hooks: 26 | - id: isort 27 | - repo: https://github.com/pre-commit/mirrors-mypy 28 | rev: v0.991 29 | hooks: 30 | - id: mypy 31 | args: ['--ignore-missing-imports'] 32 | additional_dependencies: ['types-python-slugify'] 33 | - repo: https://github.com/google/yapf 34 | rev: v0.32.0 35 | hooks: 36 | - id: yapf 37 | args: ['--parallel', '--in-place'] 38 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = pep8 3 | blank_line_before_nested_class_or_def = false 4 | spaces_before_comment = 2 5 | split_before_logical_operator = true 6 | -------------------------------------------------------------------------------- /LICENSE.ControlNet: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: ControlNet-Video 3 | emoji: 🕹 4 | colorFrom: pink 5 | colorTo: blue 6 | sdk: gradio 7 | sdk_version: 3.18.0 8 | python_version: 3.10.9 9 | app_file: app.py 10 | pinned: false 11 | duplicated_from: hysts/ControlNet 12 | --- 13 | 14 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference 15 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import gradio as gr 3 | import os 4 | import cv2 5 | import numpy as np 6 | from PIL import Image 7 | from moviepy.editor import * 8 | from share_btn import community_icon_html, loading_icon_html, share_js 9 | 10 | import pathlib 11 | import shlex 12 | import subprocess 13 | 14 | if os.getenv('SYSTEM') == 'spaces': 15 | with open('patch') as f: 16 | subprocess.run(shlex.split('patch -p1'), stdin=f, cwd='ControlNet') 17 | 18 | base_url = 'https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/' 19 | 20 | names = [ 21 | 'body_pose_model.pth', 22 | 'dpt_hybrid-midas-501f0c75.pt', 23 | 'hand_pose_model.pth', 24 | 'mlsd_large_512_fp32.pth', 25 | 'mlsd_tiny_512_fp32.pth', 26 | 'network-bsds500.pth', 27 | 'upernet_global_small.pth', 28 | ] 29 | 30 | for name in names: 31 | command = f'wget https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/{name} -O {name}' 32 | out_path = pathlib.Path(f'ControlNet/annotator/ckpts/{name}') 33 | if out_path.exists(): 34 | continue 35 | subprocess.run(shlex.split(command), cwd='ControlNet/annotator/ckpts/') 36 | 37 | from model import Model 38 | model = Model() 39 | 40 | 41 | def controlnet(i, prompt, control_task, seed_in, ddim_steps, scale): 42 | img= Image.open(i) 43 | np_img = np.array(img) 44 | 45 | a_prompt = "best quality, extremely detailed" 46 | n_prompt = "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality" 47 | num_samples = 1 48 | image_resolution = 512 49 | detect_resolution = 512 50 | eta = 0.0 51 | low_threshold = 100 52 | high_threshold = 200 53 | 54 | if control_task == 'Canny': 55 | result = model.process_canny(np_img, prompt, a_prompt, n_prompt, num_samples, 56 | image_resolution, ddim_steps, scale, seed_in, eta, low_threshold, high_threshold) 57 | elif control_task == 'Depth': 58 | result = model.process_depth(np_img, prompt, a_prompt, n_prompt, num_samples, 59 | image_resolution, detect_resolution, ddim_steps, scale, seed_in, eta) 60 | elif control_task == 'Pose': 61 | result = model.process_pose(np_img, prompt, a_prompt, n_prompt, num_samples, 62 | image_resolution, detect_resolution, ddim_steps, scale, seed_in, eta) 63 | 64 | #print(result[0]) 65 | im = Image.fromarray(result[1]) 66 | im.save("your_file" + str(i) + ".jpeg") 67 | return "your_file" + str(i) + ".jpeg" 68 | 69 | 70 | def get_frames(video_in): 71 | frames = [] 72 | #resize the video 73 | clip = VideoFileClip(video_in) 74 | 75 | #check fps 76 | if clip.fps > 30: 77 | print("vide rate is over 30, resetting to 30") 78 | clip_resized = clip.resize(height=512) 79 | clip_resized.write_videofile("video_resized.mp4", fps=30) 80 | else: 81 | print("video rate is OK") 82 | clip_resized = clip.resize(height=512) 83 | clip_resized.write_videofile("video_resized.mp4", fps=clip.fps) 84 | 85 | print("video resized to 512 height") 86 | 87 | # Opens the Video file with CV2 88 | cap= cv2.VideoCapture("video_resized.mp4") 89 | 90 | fps = cap.get(cv2.CAP_PROP_FPS) 91 | print("video fps: " + str(fps)) 92 | i=0 93 | while(cap.isOpened()): 94 | ret, frame = cap.read() 95 | if ret == False: 96 | break 97 | cv2.imwrite('kang'+str(i)+'.jpg',frame) 98 | frames.append('kang'+str(i)+'.jpg') 99 | i+=1 100 | 101 | cap.release() 102 | cv2.destroyAllWindows() 103 | print("broke the video into frames") 104 | 105 | return frames, fps 106 | 107 | 108 | def create_video(frames, fps): 109 | print("building video result") 110 | clip = ImageSequenceClip(frames, fps=fps) 111 | clip.write_videofile("movie.mp4", fps=fps) 112 | 113 | return 'movie.mp4' 114 | 115 | 116 | def infer(prompt,video_in, control_task, seed_in, trim_value, ddim_steps, scale): 117 | print(f""" 118 | ——————————————— 119 | {prompt} 120 | ———————————————""") 121 | 122 | # 1. break video into frames and get FPS 123 | break_vid = get_frames(video_in) 124 | frames_list= break_vid[0] 125 | fps = break_vid[1] 126 | n_frame = int(trim_value*fps) 127 | 128 | if n_frame >= len(frames_list): 129 | print("video is shorter than the cut value") 130 | n_frame = len(frames_list) 131 | 132 | # 2. prepare frames result array 133 | result_frames = [] 134 | print("set stop frames to: " + str(n_frame)) 135 | 136 | for i in frames_list[0:int(n_frame)]: 137 | controlnet_img = controlnet(i, prompt,control_task, seed_in, ddim_steps, scale) 138 | #images = controlnet_img[0] 139 | #rgb_im = images[0].convert("RGB") 140 | 141 | # exporting the image 142 | #rgb_im.save(f"result_img-{i}.jpg") 143 | result_frames.append(controlnet_img) 144 | print("frame " + i + "/" + str(n_frame) + ": done;") 145 | 146 | final_vid = create_video(result_frames, fps) 147 | print("finished !") 148 | 149 | return final_vid, gr.Group.update(visible=True) 150 | #return controlnet_img 151 | 152 | title = """ 153 |
154 |
162 |

163 | ControlNet Video 164 |

165 |
166 |

167 | Apply ControlNet to a video 168 |

169 |
170 | """ 171 | 172 | article = """ 173 | 174 | 179 |
180 |

You may also like:

181 |
182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 |
190 | 191 |
192 | 193 | """ 194 | 195 | with gr.Blocks(css='style.css') as demo: 196 | with gr.Column(elem_id="col-container"): 197 | gr.HTML(title) 198 | with gr.Row(): 199 | with gr.Column(): 200 | video_inp = gr.Video(label="Video source", source="upload", type="filepath", elem_id="input-vid") 201 | video_out = gr.Video(label="ControlNet video result", elem_id="video-output") 202 | with gr.Group(elem_id="share-btn-container", visible=False) as share_group: 203 | community_icon = gr.HTML(community_icon_html) 204 | loading_icon = gr.HTML(loading_icon_html) 205 | share_button = gr.Button("Share to community", elem_id="share-btn") 206 | with gr.Column(): 207 | #status = gr.Textbox() 208 | 209 | prompt = gr.Textbox(label="Prompt", placeholder="enter prompt", show_label=True, elem_id="prompt-in") 210 | control_task = gr.Dropdown(label="Control Task", choices=["Canny", "Depth", "Pose"], value="Pose", multiselect=False) 211 | with gr.Row(): 212 | seed_inp = gr.Slider(label="Seed", minimum=0, maximum=2147483647, step=1, value=123456) 213 | trim_in = gr.Slider(label="Cut video at (s)", minimun=1, maximum=5, step=1, value=1) 214 | ddim_steps = gr.Slider(label='Steps', 215 | minimum=1, 216 | maximum=100, 217 | value=20, 218 | step=1) 219 | scale = gr.Slider(label='Guidance Scale', 220 | minimum=0.1, 221 | maximum=30.0, 222 | value=9.0, 223 | step=0.1) 224 | 225 | submit_btn = gr.Button("Generate Pix2Pix video") 226 | 227 | gr.HTML(""" 228 | Duplicate Space 229 | work with longer videos / skip the queue: 230 | """, elem_id="duplicate-container") 231 | 232 | inputs = [prompt,video_inp,control_task, seed_inp, trim_in, ddim_steps, scale] 233 | outputs = [video_out, share_group] 234 | #outputs = [status] 235 | 236 | 237 | gr.HTML(article) 238 | 239 | submit_btn.click(infer, inputs, outputs) 240 | share_button.click(None, [], [], _js=share_js) 241 | 242 | 243 | 244 | demo.launch().queue(max_size=12) -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | # This file is adapted from gradio_*.py in https://github.com/lllyasviel/ControlNet/tree/f4748e3630d8141d7765e2bd9b1e348f47847707 2 | # The original license file is LICENSE.ControlNet in this repo. 3 | from __future__ import annotations 4 | 5 | import pathlib 6 | import random 7 | import shlex 8 | import subprocess 9 | import sys 10 | 11 | import cv2 12 | import einops 13 | import numpy as np 14 | import torch 15 | from pytorch_lightning import seed_everything 16 | 17 | sys.path.append('ControlNet') 18 | 19 | import config 20 | from annotator.canny import apply_canny 21 | from annotator.hed import apply_hed, nms 22 | from annotator.midas import apply_midas 23 | from annotator.mlsd import apply_mlsd 24 | from annotator.openpose import apply_openpose 25 | from annotator.uniformer import apply_uniformer 26 | from annotator.util import HWC3, resize_image 27 | from cldm.model import create_model, load_state_dict 28 | from ldm.models.diffusion.ddim import DDIMSampler 29 | from share import * 30 | 31 | ORIGINAL_MODEL_NAMES = { 32 | 'canny': 'control_sd15_canny.pth', 33 | 'hough': 'control_sd15_mlsd.pth', 34 | 'hed': 'control_sd15_hed.pth', 35 | 'scribble': 'control_sd15_scribble.pth', 36 | 'pose': 'control_sd15_openpose.pth', 37 | 'seg': 'control_sd15_seg.pth', 38 | 'depth': 'control_sd15_depth.pth', 39 | 'normal': 'control_sd15_normal.pth', 40 | } 41 | ORIGINAL_WEIGHT_ROOT = 'https://huggingface.co/lllyasviel/ControlNet/resolve/main/models/' 42 | 43 | LIGHTWEIGHT_MODEL_NAMES = { 44 | 'canny': 'control_canny-fp16.safetensors', 45 | 'hough': 'control_mlsd-fp16.safetensors', 46 | 'hed': 'control_hed-fp16.safetensors', 47 | 'scribble': 'control_scribble-fp16.safetensors', 48 | 'pose': 'control_openpose-fp16.safetensors', 49 | 'seg': 'control_seg-fp16.safetensors', 50 | 'depth': 'control_depth-fp16.safetensors', 51 | 'normal': 'control_normal-fp16.safetensors', 52 | } 53 | LIGHTWEIGHT_WEIGHT_ROOT = 'https://huggingface.co/webui/ControlNet-modules-safetensors/resolve/main/' 54 | 55 | 56 | class Model: 57 | def __init__(self, 58 | model_config_path: str = 'ControlNet/models/cldm_v15.yaml', 59 | model_dir: str = 'models', 60 | use_lightweight: bool = True): 61 | self.device = torch.device( 62 | 'cuda:0' if torch.cuda.is_available() else 'cpu') 63 | self.model = create_model(model_config_path).to(self.device) 64 | self.ddim_sampler = DDIMSampler(self.model) 65 | self.task_name = '' 66 | 67 | self.model_dir = pathlib.Path(model_dir) 68 | self.model_dir.mkdir(exist_ok=True, parents=True) 69 | 70 | self.use_lightweight = use_lightweight 71 | if use_lightweight: 72 | self.model_names = LIGHTWEIGHT_MODEL_NAMES 73 | self.weight_root = LIGHTWEIGHT_WEIGHT_ROOT 74 | base_model_url = 'https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors' 75 | self.load_base_model(base_model_url) 76 | else: 77 | self.model_names = ORIGINAL_MODEL_NAMES 78 | self.weight_root = ORIGINAL_WEIGHT_ROOT 79 | 80 | self.download_models() 81 | 82 | def download_base_model(self, model_url: str) -> pathlib.Path: 83 | model_name = model_url.split('/')[-1] 84 | out_path = self.model_dir / model_name 85 | if not out_path.exists(): 86 | subprocess.run(shlex.split(f'wget {model_url} -O {out_path}')) 87 | return out_path 88 | 89 | def load_base_model(self, model_url: str) -> None: 90 | model_path = self.download_base_model(model_url) 91 | self.model.load_state_dict(load_state_dict(model_path, 92 | location=self.device.type), 93 | strict=False) 94 | 95 | def load_weight(self, task_name: str) -> None: 96 | if task_name == self.task_name: 97 | return 98 | weight_path = self.get_weight_path(task_name) 99 | if not self.use_lightweight: 100 | self.model.load_state_dict( 101 | load_state_dict(weight_path, location=self.device)) 102 | else: 103 | self.model.control_model.load_state_dict( 104 | load_state_dict(weight_path, location=self.device.type)) 105 | self.task_name = task_name 106 | 107 | def get_weight_path(self, task_name: str) -> str: 108 | if 'scribble' in task_name: 109 | task_name = 'scribble' 110 | return f'{self.model_dir}/{self.model_names[task_name]}' 111 | 112 | def download_models(self) -> None: 113 | self.model_dir.mkdir(exist_ok=True, parents=True) 114 | for name in self.model_names.values(): 115 | out_path = self.model_dir / name 116 | if out_path.exists(): 117 | continue 118 | subprocess.run( 119 | shlex.split(f'wget {self.weight_root}{name} -O {out_path}')) 120 | 121 | @torch.inference_mode() 122 | def process_canny(self, input_image, prompt, a_prompt, n_prompt, 123 | num_samples, image_resolution, ddim_steps, scale, seed, 124 | eta, low_threshold, high_threshold): 125 | self.load_weight('canny') 126 | 127 | img = resize_image(HWC3(input_image), image_resolution) 128 | H, W, C = img.shape 129 | 130 | detected_map = apply_canny(img, low_threshold, high_threshold) 131 | detected_map = HWC3(detected_map) 132 | 133 | control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0 134 | control = torch.stack([control for _ in range(num_samples)], dim=0) 135 | control = einops.rearrange(control, 'b h w c -> b c h w').clone() 136 | 137 | if seed == -1: 138 | seed = random.randint(0, 65535) 139 | seed_everything(seed) 140 | 141 | if config.save_memory: 142 | self.model.low_vram_shift(is_diffusing=False) 143 | 144 | cond = { 145 | 'c_concat': [control], 146 | 'c_crossattn': [ 147 | self.model.get_learned_conditioning( 148 | [prompt + ', ' + a_prompt] * num_samples) 149 | ] 150 | } 151 | un_cond = { 152 | 'c_concat': [control], 153 | 'c_crossattn': 154 | [self.model.get_learned_conditioning([n_prompt] * num_samples)] 155 | } 156 | shape = (4, H // 8, W // 8) 157 | 158 | if config.save_memory: 159 | self.model.low_vram_shift(is_diffusing=True) 160 | 161 | samples, intermediates = self.ddim_sampler.sample( 162 | ddim_steps, 163 | num_samples, 164 | shape, 165 | cond, 166 | verbose=False, 167 | eta=eta, 168 | unconditional_guidance_scale=scale, 169 | unconditional_conditioning=un_cond) 170 | 171 | if config.save_memory: 172 | self.model.low_vram_shift(is_diffusing=False) 173 | 174 | x_samples = self.model.decode_first_stage(samples) 175 | x_samples = ( 176 | einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 177 | 127.5).cpu().numpy().clip(0, 255).astype(np.uint8) 178 | 179 | results = [x_samples[i] for i in range(num_samples)] 180 | return [255 - detected_map] + results 181 | 182 | @torch.inference_mode() 183 | def process_hough(self, input_image, prompt, a_prompt, n_prompt, 184 | num_samples, image_resolution, detect_resolution, 185 | ddim_steps, scale, seed, eta, value_threshold, 186 | distance_threshold): 187 | self.load_weight('hough') 188 | 189 | input_image = HWC3(input_image) 190 | detected_map = apply_mlsd(resize_image(input_image, detect_resolution), 191 | value_threshold, distance_threshold) 192 | detected_map = HWC3(detected_map) 193 | img = resize_image(input_image, image_resolution) 194 | H, W, C = img.shape 195 | 196 | detected_map = cv2.resize(detected_map, (W, H), 197 | interpolation=cv2.INTER_NEAREST) 198 | 199 | control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0 200 | control = torch.stack([control for _ in range(num_samples)], dim=0) 201 | control = einops.rearrange(control, 'b h w c -> b c h w').clone() 202 | 203 | if seed == -1: 204 | seed = random.randint(0, 65535) 205 | seed_everything(seed) 206 | 207 | if config.save_memory: 208 | self.model.low_vram_shift(is_diffusing=False) 209 | 210 | cond = { 211 | 'c_concat': [control], 212 | 'c_crossattn': [ 213 | self.model.get_learned_conditioning( 214 | [prompt + ', ' + a_prompt] * num_samples) 215 | ] 216 | } 217 | un_cond = { 218 | 'c_concat': [control], 219 | 'c_crossattn': 220 | [self.model.get_learned_conditioning([n_prompt] * num_samples)] 221 | } 222 | shape = (4, H // 8, W // 8) 223 | 224 | if config.save_memory: 225 | self.model.low_vram_shift(is_diffusing=True) 226 | 227 | samples, intermediates = self.ddim_sampler.sample( 228 | ddim_steps, 229 | num_samples, 230 | shape, 231 | cond, 232 | verbose=False, 233 | eta=eta, 234 | unconditional_guidance_scale=scale, 235 | unconditional_conditioning=un_cond) 236 | 237 | if config.save_memory: 238 | self.model.low_vram_shift(is_diffusing=False) 239 | 240 | x_samples = self.model.decode_first_stage(samples) 241 | x_samples = ( 242 | einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 243 | 127.5).cpu().numpy().clip(0, 255).astype(np.uint8) 244 | 245 | results = [x_samples[i] for i in range(num_samples)] 246 | return [ 247 | 255 - cv2.dilate(detected_map, 248 | np.ones(shape=(3, 3), dtype=np.uint8), 249 | iterations=1) 250 | ] + results 251 | 252 | @torch.inference_mode() 253 | def process_hed(self, input_image, prompt, a_prompt, n_prompt, num_samples, 254 | image_resolution, detect_resolution, ddim_steps, scale, 255 | seed, eta): 256 | self.load_weight('hed') 257 | 258 | input_image = HWC3(input_image) 259 | detected_map = apply_hed(resize_image(input_image, detect_resolution)) 260 | detected_map = HWC3(detected_map) 261 | img = resize_image(input_image, image_resolution) 262 | H, W, C = img.shape 263 | 264 | detected_map = cv2.resize(detected_map, (W, H), 265 | interpolation=cv2.INTER_LINEAR) 266 | 267 | control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0 268 | control = torch.stack([control for _ in range(num_samples)], dim=0) 269 | control = einops.rearrange(control, 'b h w c -> b c h w').clone() 270 | 271 | if seed == -1: 272 | seed = random.randint(0, 65535) 273 | seed_everything(seed) 274 | 275 | if config.save_memory: 276 | self.model.low_vram_shift(is_diffusing=False) 277 | 278 | cond = { 279 | 'c_concat': [control], 280 | 'c_crossattn': [ 281 | self.model.get_learned_conditioning( 282 | [prompt + ', ' + a_prompt] * num_samples) 283 | ] 284 | } 285 | un_cond = { 286 | 'c_concat': [control], 287 | 'c_crossattn': 288 | [self.model.get_learned_conditioning([n_prompt] * num_samples)] 289 | } 290 | shape = (4, H // 8, W // 8) 291 | 292 | if config.save_memory: 293 | self.model.low_vram_shift(is_diffusing=True) 294 | 295 | samples, intermediates = self.ddim_sampler.sample( 296 | ddim_steps, 297 | num_samples, 298 | shape, 299 | cond, 300 | verbose=False, 301 | eta=eta, 302 | unconditional_guidance_scale=scale, 303 | unconditional_conditioning=un_cond) 304 | 305 | if config.save_memory: 306 | self.model.low_vram_shift(is_diffusing=False) 307 | 308 | x_samples = self.model.decode_first_stage(samples) 309 | x_samples = ( 310 | einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 311 | 127.5).cpu().numpy().clip(0, 255).astype(np.uint8) 312 | 313 | results = [x_samples[i] for i in range(num_samples)] 314 | return [detected_map] + results 315 | 316 | @torch.inference_mode() 317 | def process_scribble(self, input_image, prompt, a_prompt, n_prompt, 318 | num_samples, image_resolution, ddim_steps, scale, 319 | seed, eta): 320 | self.load_weight('scribble') 321 | 322 | img = resize_image(HWC3(input_image), image_resolution) 323 | H, W, C = img.shape 324 | 325 | detected_map = np.zeros_like(img, dtype=np.uint8) 326 | detected_map[np.min(img, axis=2) < 127] = 255 327 | 328 | control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0 329 | control = torch.stack([control for _ in range(num_samples)], dim=0) 330 | control = einops.rearrange(control, 'b h w c -> b c h w').clone() 331 | 332 | if seed == -1: 333 | seed = random.randint(0, 65535) 334 | seed_everything(seed) 335 | 336 | if config.save_memory: 337 | self.model.low_vram_shift(is_diffusing=False) 338 | 339 | cond = { 340 | 'c_concat': [control], 341 | 'c_crossattn': [ 342 | self.model.get_learned_conditioning( 343 | [prompt + ', ' + a_prompt] * num_samples) 344 | ] 345 | } 346 | un_cond = { 347 | 'c_concat': [control], 348 | 'c_crossattn': 349 | [self.model.get_learned_conditioning([n_prompt] * num_samples)] 350 | } 351 | shape = (4, H // 8, W // 8) 352 | 353 | if config.save_memory: 354 | self.model.low_vram_shift(is_diffusing=True) 355 | 356 | samples, intermediates = self.ddim_sampler.sample( 357 | ddim_steps, 358 | num_samples, 359 | shape, 360 | cond, 361 | verbose=False, 362 | eta=eta, 363 | unconditional_guidance_scale=scale, 364 | unconditional_conditioning=un_cond) 365 | 366 | if config.save_memory: 367 | self.model.low_vram_shift(is_diffusing=False) 368 | 369 | x_samples = self.model.decode_first_stage(samples) 370 | x_samples = ( 371 | einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 372 | 127.5).cpu().numpy().clip(0, 255).astype(np.uint8) 373 | 374 | results = [x_samples[i] for i in range(num_samples)] 375 | return [255 - detected_map] + results 376 | 377 | @torch.inference_mode() 378 | def process_scribble_interactive(self, input_image, prompt, a_prompt, 379 | n_prompt, num_samples, image_resolution, 380 | ddim_steps, scale, seed, eta): 381 | self.load_weight('scribble') 382 | 383 | img = resize_image(HWC3(input_image['mask'][:, :, 0]), 384 | image_resolution) 385 | H, W, C = img.shape 386 | 387 | detected_map = np.zeros_like(img, dtype=np.uint8) 388 | detected_map[np.min(img, axis=2) > 127] = 255 389 | 390 | control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0 391 | control = torch.stack([control for _ in range(num_samples)], dim=0) 392 | control = einops.rearrange(control, 'b h w c -> b c h w').clone() 393 | 394 | if seed == -1: 395 | seed = random.randint(0, 65535) 396 | seed_everything(seed) 397 | 398 | if config.save_memory: 399 | self.model.low_vram_shift(is_diffusing=False) 400 | 401 | cond = { 402 | 'c_concat': [control], 403 | 'c_crossattn': [ 404 | self.model.get_learned_conditioning( 405 | [prompt + ', ' + a_prompt] * num_samples) 406 | ] 407 | } 408 | un_cond = { 409 | 'c_concat': [control], 410 | 'c_crossattn': 411 | [self.model.get_learned_conditioning([n_prompt] * num_samples)] 412 | } 413 | shape = (4, H // 8, W // 8) 414 | 415 | if config.save_memory: 416 | self.model.low_vram_shift(is_diffusing=True) 417 | 418 | samples, intermediates = self.ddim_sampler.sample( 419 | ddim_steps, 420 | num_samples, 421 | shape, 422 | cond, 423 | verbose=False, 424 | eta=eta, 425 | unconditional_guidance_scale=scale, 426 | unconditional_conditioning=un_cond) 427 | 428 | if config.save_memory: 429 | self.model.low_vram_shift(is_diffusing=False) 430 | 431 | x_samples = self.model.decode_first_stage(samples) 432 | x_samples = ( 433 | einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 434 | 127.5).cpu().numpy().clip(0, 255).astype(np.uint8) 435 | 436 | results = [x_samples[i] for i in range(num_samples)] 437 | return [255 - detected_map] + results 438 | 439 | @torch.inference_mode() 440 | def process_fake_scribble(self, input_image, prompt, a_prompt, n_prompt, 441 | num_samples, image_resolution, detect_resolution, 442 | ddim_steps, scale, seed, eta): 443 | self.load_weight('scribble') 444 | 445 | input_image = HWC3(input_image) 446 | detected_map = apply_hed(resize_image(input_image, detect_resolution)) 447 | detected_map = HWC3(detected_map) 448 | img = resize_image(input_image, image_resolution) 449 | H, W, C = img.shape 450 | 451 | detected_map = cv2.resize(detected_map, (W, H), 452 | interpolation=cv2.INTER_LINEAR) 453 | detected_map = nms(detected_map, 127, 3.0) 454 | detected_map = cv2.GaussianBlur(detected_map, (0, 0), 3.0) 455 | detected_map[detected_map > 4] = 255 456 | detected_map[detected_map < 255] = 0 457 | 458 | control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0 459 | control = torch.stack([control for _ in range(num_samples)], dim=0) 460 | control = einops.rearrange(control, 'b h w c -> b c h w').clone() 461 | 462 | if seed == -1: 463 | seed = random.randint(0, 65535) 464 | seed_everything(seed) 465 | 466 | if config.save_memory: 467 | self.model.low_vram_shift(is_diffusing=False) 468 | 469 | cond = { 470 | 'c_concat': [control], 471 | 'c_crossattn': [ 472 | self.model.get_learned_conditioning( 473 | [prompt + ', ' + a_prompt] * num_samples) 474 | ] 475 | } 476 | un_cond = { 477 | 'c_concat': [control], 478 | 'c_crossattn': 479 | [self.model.get_learned_conditioning([n_prompt] * num_samples)] 480 | } 481 | shape = (4, H // 8, W // 8) 482 | 483 | if config.save_memory: 484 | self.model.low_vram_shift(is_diffusing=True) 485 | 486 | samples, intermediates = self.ddim_sampler.sample( 487 | ddim_steps, 488 | num_samples, 489 | shape, 490 | cond, 491 | verbose=False, 492 | eta=eta, 493 | unconditional_guidance_scale=scale, 494 | unconditional_conditioning=un_cond) 495 | 496 | if config.save_memory: 497 | self.model.low_vram_shift(is_diffusing=False) 498 | 499 | x_samples = self.model.decode_first_stage(samples) 500 | x_samples = ( 501 | einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 502 | 127.5).cpu().numpy().clip(0, 255).astype(np.uint8) 503 | 504 | results = [x_samples[i] for i in range(num_samples)] 505 | return [255 - detected_map] + results 506 | 507 | @torch.inference_mode() 508 | def process_pose(self, input_image, prompt, a_prompt, n_prompt, 509 | num_samples, image_resolution, detect_resolution, 510 | ddim_steps, scale, seed, eta): 511 | self.load_weight('pose') 512 | 513 | input_image = HWC3(input_image) 514 | detected_map, _ = apply_openpose( 515 | resize_image(input_image, detect_resolution)) 516 | detected_map = HWC3(detected_map) 517 | img = resize_image(input_image, image_resolution) 518 | H, W, C = img.shape 519 | 520 | detected_map = cv2.resize(detected_map, (W, H), 521 | interpolation=cv2.INTER_NEAREST) 522 | 523 | control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0 524 | control = torch.stack([control for _ in range(num_samples)], dim=0) 525 | control = einops.rearrange(control, 'b h w c -> b c h w').clone() 526 | 527 | if seed == -1: 528 | seed = random.randint(0, 65535) 529 | seed_everything(seed) 530 | 531 | if config.save_memory: 532 | self.model.low_vram_shift(is_diffusing=False) 533 | 534 | cond = { 535 | 'c_concat': [control], 536 | 'c_crossattn': [ 537 | self.model.get_learned_conditioning( 538 | [prompt + ', ' + a_prompt] * num_samples) 539 | ] 540 | } 541 | un_cond = { 542 | 'c_concat': [control], 543 | 'c_crossattn': 544 | [self.model.get_learned_conditioning([n_prompt] * num_samples)] 545 | } 546 | shape = (4, H // 8, W // 8) 547 | 548 | if config.save_memory: 549 | self.model.low_vram_shift(is_diffusing=True) 550 | 551 | samples, intermediates = self.ddim_sampler.sample( 552 | ddim_steps, 553 | num_samples, 554 | shape, 555 | cond, 556 | verbose=False, 557 | eta=eta, 558 | unconditional_guidance_scale=scale, 559 | unconditional_conditioning=un_cond) 560 | 561 | if config.save_memory: 562 | self.model.low_vram_shift(is_diffusing=False) 563 | 564 | x_samples = self.model.decode_first_stage(samples) 565 | x_samples = ( 566 | einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 567 | 127.5).cpu().numpy().clip(0, 255).astype(np.uint8) 568 | 569 | results = [x_samples[i] for i in range(num_samples)] 570 | return [detected_map] + results 571 | 572 | @torch.inference_mode() 573 | def process_seg(self, input_image, prompt, a_prompt, n_prompt, num_samples, 574 | image_resolution, detect_resolution, ddim_steps, scale, 575 | seed, eta): 576 | self.load_weight('seg') 577 | 578 | input_image = HWC3(input_image) 579 | detected_map = apply_uniformer( 580 | resize_image(input_image, detect_resolution)) 581 | img = resize_image(input_image, image_resolution) 582 | H, W, C = img.shape 583 | 584 | detected_map = cv2.resize(detected_map, (W, H), 585 | interpolation=cv2.INTER_NEAREST) 586 | 587 | control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0 588 | control = torch.stack([control for _ in range(num_samples)], dim=0) 589 | control = einops.rearrange(control, 'b h w c -> b c h w').clone() 590 | 591 | if seed == -1: 592 | seed = random.randint(0, 65535) 593 | seed_everything(seed) 594 | 595 | if config.save_memory: 596 | self.model.low_vram_shift(is_diffusing=False) 597 | 598 | cond = { 599 | 'c_concat': [control], 600 | 'c_crossattn': [ 601 | self.model.get_learned_conditioning( 602 | [prompt + ', ' + a_prompt] * num_samples) 603 | ] 604 | } 605 | un_cond = { 606 | 'c_concat': [control], 607 | 'c_crossattn': 608 | [self.model.get_learned_conditioning([n_prompt] * num_samples)] 609 | } 610 | shape = (4, H // 8, W // 8) 611 | 612 | if config.save_memory: 613 | self.model.low_vram_shift(is_diffusing=True) 614 | 615 | samples, intermediates = self.ddim_sampler.sample( 616 | ddim_steps, 617 | num_samples, 618 | shape, 619 | cond, 620 | verbose=False, 621 | eta=eta, 622 | unconditional_guidance_scale=scale, 623 | unconditional_conditioning=un_cond) 624 | 625 | if config.save_memory: 626 | self.model.low_vram_shift(is_diffusing=False) 627 | 628 | x_samples = self.model.decode_first_stage(samples) 629 | x_samples = ( 630 | einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 631 | 127.5).cpu().numpy().clip(0, 255).astype(np.uint8) 632 | 633 | results = [x_samples[i] for i in range(num_samples)] 634 | return [detected_map] + results 635 | 636 | @torch.inference_mode() 637 | def process_depth(self, input_image, prompt, a_prompt, n_prompt, 638 | num_samples, image_resolution, detect_resolution, 639 | ddim_steps, scale, seed, eta): 640 | self.load_weight('depth') 641 | 642 | input_image = HWC3(input_image) 643 | detected_map, _ = apply_midas( 644 | resize_image(input_image, detect_resolution)) 645 | detected_map = HWC3(detected_map) 646 | img = resize_image(input_image, image_resolution) 647 | H, W, C = img.shape 648 | 649 | detected_map = cv2.resize(detected_map, (W, H), 650 | interpolation=cv2.INTER_LINEAR) 651 | 652 | control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0 653 | control = torch.stack([control for _ in range(num_samples)], dim=0) 654 | control = einops.rearrange(control, 'b h w c -> b c h w').clone() 655 | 656 | if seed == -1: 657 | seed = random.randint(0, 65535) 658 | seed_everything(seed) 659 | 660 | if config.save_memory: 661 | self.model.low_vram_shift(is_diffusing=False) 662 | 663 | cond = { 664 | 'c_concat': [control], 665 | 'c_crossattn': [ 666 | self.model.get_learned_conditioning( 667 | [prompt + ', ' + a_prompt] * num_samples) 668 | ] 669 | } 670 | un_cond = { 671 | 'c_concat': [control], 672 | 'c_crossattn': 673 | [self.model.get_learned_conditioning([n_prompt] * num_samples)] 674 | } 675 | shape = (4, H // 8, W // 8) 676 | 677 | if config.save_memory: 678 | self.model.low_vram_shift(is_diffusing=True) 679 | 680 | samples, intermediates = self.ddim_sampler.sample( 681 | ddim_steps, 682 | num_samples, 683 | shape, 684 | cond, 685 | verbose=False, 686 | eta=eta, 687 | unconditional_guidance_scale=scale, 688 | unconditional_conditioning=un_cond) 689 | 690 | if config.save_memory: 691 | self.model.low_vram_shift(is_diffusing=False) 692 | 693 | x_samples = self.model.decode_first_stage(samples) 694 | x_samples = ( 695 | einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 696 | 127.5).cpu().numpy().clip(0, 255).astype(np.uint8) 697 | 698 | results = [x_samples[i] for i in range(num_samples)] 699 | return [detected_map] + results 700 | 701 | @torch.inference_mode() 702 | def process_normal(self, input_image, prompt, a_prompt, n_prompt, 703 | num_samples, image_resolution, detect_resolution, 704 | ddim_steps, scale, seed, eta, bg_threshold): 705 | self.load_weight('normal') 706 | 707 | input_image = HWC3(input_image) 708 | _, detected_map = apply_midas(resize_image(input_image, 709 | detect_resolution), 710 | bg_th=bg_threshold) 711 | detected_map = HWC3(detected_map) 712 | img = resize_image(input_image, image_resolution) 713 | H, W, C = img.shape 714 | 715 | detected_map = cv2.resize(detected_map, (W, H), 716 | interpolation=cv2.INTER_LINEAR) 717 | 718 | control = torch.from_numpy( 719 | detected_map[:, :, ::-1].copy()).float().cuda() / 255.0 720 | control = torch.stack([control for _ in range(num_samples)], dim=0) 721 | control = einops.rearrange(control, 'b h w c -> b c h w').clone() 722 | 723 | if seed == -1: 724 | seed = random.randint(0, 65535) 725 | seed_everything(seed) 726 | 727 | if config.save_memory: 728 | self.model.low_vram_shift(is_diffusing=False) 729 | 730 | cond = { 731 | 'c_concat': [control], 732 | 'c_crossattn': [ 733 | self.model.get_learned_conditioning( 734 | [prompt + ', ' + a_prompt] * num_samples) 735 | ] 736 | } 737 | un_cond = { 738 | 'c_concat': [control], 739 | 'c_crossattn': 740 | [self.model.get_learned_conditioning([n_prompt] * num_samples)] 741 | } 742 | shape = (4, H // 8, W // 8) 743 | 744 | if config.save_memory: 745 | self.model.low_vram_shift(is_diffusing=True) 746 | 747 | samples, intermediates = self.ddim_sampler.sample( 748 | ddim_steps, 749 | num_samples, 750 | shape, 751 | cond, 752 | verbose=False, 753 | eta=eta, 754 | unconditional_guidance_scale=scale, 755 | unconditional_conditioning=un_cond) 756 | 757 | if config.save_memory: 758 | self.model.low_vram_shift(is_diffusing=False) 759 | 760 | x_samples = self.model.decode_first_stage(samples) 761 | x_samples = ( 762 | einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 763 | 127.5).cpu().numpy().clip(0, 255).astype(np.uint8) 764 | 765 | results = [x_samples[i] for i in range(num_samples)] 766 | return [detected_map] + results 767 | -------------------------------------------------------------------------------- /patch: -------------------------------------------------------------------------------- 1 | diff --git a/annotator/hed/__init__.py b/annotator/hed/__init__.py 2 | index 42d8dc6..1587035 100644 3 | --- a/annotator/hed/__init__.py 4 | +++ b/annotator/hed/__init__.py 5 | @@ -1,8 +1,12 @@ 6 | +import pathlib 7 | + 8 | import numpy as np 9 | import cv2 10 | import torch 11 | from einops import rearrange 12 | 13 | +root_dir = pathlib.Path(__file__).parents[2] 14 | + 15 | 16 | class Network(torch.nn.Module): 17 | def __init__(self): 18 | @@ -64,7 +68,7 @@ class Network(torch.nn.Module): 19 | torch.nn.Sigmoid() 20 | ) 21 | 22 | - self.load_state_dict({strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.load('./annotator/ckpts/network-bsds500.pth').items()}) 23 | + self.load_state_dict({strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.load(f'{root_dir}/annotator/ckpts/network-bsds500.pth').items()}) 24 | # end 25 | 26 | def forward(self, tenInput): 27 | diff --git a/annotator/midas/api.py b/annotator/midas/api.py 28 | index 9fa305e..d8594ea 100644 29 | --- a/annotator/midas/api.py 30 | +++ b/annotator/midas/api.py 31 | @@ -1,5 +1,7 @@ 32 | # based on https://github.com/isl-org/MiDaS 33 | 34 | +import pathlib 35 | + 36 | import cv2 37 | import torch 38 | import torch.nn as nn 39 | @@ -10,10 +12,11 @@ from .midas.midas_net import MidasNet 40 | from .midas.midas_net_custom import MidasNet_small 41 | from .midas.transforms import Resize, NormalizeImage, PrepareForNet 42 | 43 | +root_dir = pathlib.Path(__file__).parents[2] 44 | 45 | ISL_PATHS = { 46 | - "dpt_large": "annotator/ckpts/dpt_large-midas-2f21e586.pt", 47 | - "dpt_hybrid": "annotator/ckpts/dpt_hybrid-midas-501f0c75.pt", 48 | + "dpt_large": f"{root_dir}/annotator/ckpts/dpt_large-midas-2f21e586.pt", 49 | + "dpt_hybrid": f"{root_dir}/annotator/ckpts/dpt_hybrid-midas-501f0c75.pt", 50 | "midas_v21": "", 51 | "midas_v21_small": "", 52 | } 53 | diff --git a/annotator/mlsd/__init__.py b/annotator/mlsd/__init__.py 54 | index 75db717..f310fe6 100644 55 | --- a/annotator/mlsd/__init__.py 56 | +++ b/annotator/mlsd/__init__.py 57 | @@ -1,3 +1,5 @@ 58 | +import pathlib 59 | + 60 | import cv2 61 | import numpy as np 62 | import torch 63 | @@ -8,8 +10,9 @@ from .models.mbv2_mlsd_tiny import MobileV2_MLSD_Tiny 64 | from .models.mbv2_mlsd_large import MobileV2_MLSD_Large 65 | from .utils import pred_lines 66 | 67 | +root_dir = pathlib.Path(__file__).parents[2] 68 | 69 | -model_path = './annotator/ckpts/mlsd_large_512_fp32.pth' 70 | +model_path = f'{root_dir}/annotator/ckpts/mlsd_large_512_fp32.pth' 71 | model = MobileV2_MLSD_Large() 72 | model.load_state_dict(torch.load(model_path), strict=True) 73 | model = model.cuda().eval() 74 | diff --git a/annotator/openpose/__init__.py b/annotator/openpose/__init__.py 75 | index 47d50a5..2369eed 100644 76 | --- a/annotator/openpose/__init__.py 77 | +++ b/annotator/openpose/__init__.py 78 | @@ -1,4 +1,5 @@ 79 | import os 80 | +import pathlib 81 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" 82 | 83 | import torch 84 | @@ -7,8 +8,10 @@ from . import util 85 | from .body import Body 86 | from .hand import Hand 87 | 88 | -body_estimation = Body('./annotator/ckpts/body_pose_model.pth') 89 | -hand_estimation = Hand('./annotator/ckpts/hand_pose_model.pth') 90 | +root_dir = pathlib.Path(__file__).parents[2] 91 | + 92 | +body_estimation = Body(f'{root_dir}/annotator/ckpts/body_pose_model.pth') 93 | +hand_estimation = Hand(f'{root_dir}/annotator/ckpts/hand_pose_model.pth') 94 | 95 | 96 | def apply_openpose(oriImg, hand=False): 97 | diff --git a/annotator/uniformer/__init__.py b/annotator/uniformer/__init__.py 98 | index 500e53c..4061dbe 100644 99 | --- a/annotator/uniformer/__init__.py 100 | +++ b/annotator/uniformer/__init__.py 101 | @@ -1,9 +1,12 @@ 102 | +import pathlib 103 | + 104 | from annotator.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot 105 | from annotator.uniformer.mmseg.core.evaluation import get_palette 106 | 107 | +root_dir = pathlib.Path(__file__).parents[2] 108 | 109 | -checkpoint_file = "annotator/ckpts/upernet_global_small.pth" 110 | -config_file = 'annotator/uniformer/exp/upernet_global_small/config.py' 111 | +checkpoint_file = f"{root_dir}/annotator/ckpts/upernet_global_small.pth" 112 | +config_file = f'{root_dir}/annotator/uniformer/exp/upernet_global_small/config.py' 113 | model = init_segmentor(config_file, checkpoint_file).cuda() 114 | 115 | 116 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | ffmpeg-python 3 | moviepy 4 | addict==2.4.0 5 | albumentations==1.3.0 6 | einops==0.6.0 7 | gradio==3.18.0 8 | imageio==2.25.0 9 | imageio-ffmpeg==0.4.8 10 | kornia==0.6.9 11 | omegaconf==2.3.0 12 | open-clip-torch==2.13.0 13 | opencv-contrib-python==4.7.0.68 14 | opencv-python-headless==4.7.0.68 15 | prettytable==3.6.0 16 | pytorch-lightning==1.9.0 17 | safetensors==0.2.8 18 | timm==0.6.12 19 | torch==1.13.1 20 | torchvision==0.14.1 21 | transformers==4.26.1 22 | xformers==0.0.16 23 | yapf==0.32.0 24 | -------------------------------------------------------------------------------- /share_btn.py: -------------------------------------------------------------------------------- 1 | community_icon_html = """""" 5 | 6 | loading_icon_html = """""" 10 | 11 | share_js = """async () => { 12 | async function uploadFile(file){ 13 | const UPLOAD_URL = 'https://huggingface.co/uploads'; 14 | const response = await fetch(UPLOAD_URL, { 15 | method: 'POST', 16 | headers: { 17 | 'Content-Type': file.type, 18 | 'X-Requested-With': 'XMLHttpRequest', 19 | }, 20 | body: file, /// <- File inherits from Blob 21 | }); 22 | const url = await response.text(); 23 | return url; 24 | } 25 | 26 | async function getVideoBlobFile(videoEL){ 27 | const res = await fetch(videoEL.src); 28 | const blob = await res.blob(); 29 | const videoId = Date.now() % 200; 30 | const fileName = `vid-pix2pix-${{videoId}}.wav`; 31 | const videoBlob = new File([blob], fileName, { type: 'video/mp4' }); 32 | console.log(videoBlob); 33 | return videoBlob; 34 | } 35 | 36 | const gradioEl = document.querySelector("gradio-app").shadowRoot || document.querySelector('body > gradio-app'); 37 | const captionTxt = gradioEl.querySelector('#prompt-in textarea').value; 38 | const inputVidEl = gradioEl.querySelector('#input-vid video'); 39 | const outputVideo = gradioEl.querySelector('#video-output video'); 40 | 41 | const shareBtnEl = gradioEl.querySelector('#share-btn'); 42 | const shareIconEl = gradioEl.querySelector('#share-btn-share-icon'); 43 | const loadingIconEl = gradioEl.querySelector('#share-btn-loading-icon'); 44 | if(!outputVideo){ 45 | return; 46 | }; 47 | shareBtnEl.style.pointerEvents = 'none'; 48 | shareIconEl.style.display = 'none'; 49 | loadingIconEl.style.removeProperty('display'); 50 | 51 | const inputFile = await getVideoBlobFile(inputVidEl); 52 | const urlInputVid = await uploadFile(inputFile); 53 | const videoOutFile = await getVideoBlobFile(outputVideo); 54 | const dataOutputVid = await uploadFile(videoOutFile); 55 | 56 | const descriptionMd = ` 57 | #### Video input: 58 | ${urlInputVid} 59 | 60 | #### ControlNet result: 61 | ${dataOutputVid} 62 | `; 63 | const params = new URLSearchParams({ 64 | title: captionTxt, 65 | description: descriptionMd, 66 | }); 67 | const paramsStr = params.toString(); 68 | window.open(`https://huggingface.co/spaces/fffiloni/ControlNet-Video/discussions/new?${paramsStr}`, '_blank'); 69 | shareBtnEl.style.removeProperty('pointer-events'); 70 | shareIconEl.style.removeProperty('display'); 71 | loadingIconEl.style.display = 'none'; 72 | }""" -------------------------------------------------------------------------------- /style.css: -------------------------------------------------------------------------------- 1 | #col-container {max-width: 820px; margin-left: auto; margin-right: auto;} 2 | #duplicate-container{ 3 | display: flex; 4 | justify-content: space-between; 5 | align-items: center; 6 | line-height: 1em; 7 | flex-direction: row-reverse; 8 | font-size:1em; 9 | } 10 | a, a:hover, a:visited { 11 | text-decoration-line: underline; 12 | font-weight: 600; 13 | color: #1f2937 !important; 14 | } 15 | 16 | .dark a, .dark a:hover, .dark a:visited { 17 | color: #f3f4f6 !important; 18 | } 19 | 20 | .footer { 21 | margin-bottom: 45px; 22 | margin-top: 10px; 23 | text-align: center; 24 | border-bottom: 1px solid #e5e5e5; 25 | } 26 | 27 | .footer>p { 28 | font-size: .8rem!important; 29 | display: inline-block; 30 | padding: 0 10px; 31 | transform: translateY(26px); 32 | background: white; 33 | } 34 | .dark .footer { 35 | border-color: #303030; 36 | } 37 | .dark .footer>p { 38 | background: #0b0f19; 39 | } 40 | 41 | div#may-like-container > p { 42 | font-size: .8em; 43 | margin-bottom: 4px; 44 | } 45 | 46 | .animate-spin { 47 | animation: spin 1s linear infinite; 48 | } 49 | 50 | @keyframes spin { 51 | from { 52 | transform: rotate(0deg); 53 | } 54 | to { 55 | transform: rotate(360deg); 56 | } 57 | } 58 | 59 | #share-btn-container { 60 | display: flex; 61 | padding-left: 0.5rem !important; 62 | padding-right: 0.5rem !important; 63 | background-color: #000000; 64 | justify-content: center; 65 | align-items: center; 66 | border-radius: 9999px !important; 67 | max-width: 13rem; 68 | } 69 | 70 | #share-btn-container:hover { 71 | background-color: #060606; 72 | } 73 | 74 | #share-btn { 75 | all: initial; 76 | color: #ffffff; 77 | font-weight: 600; 78 | cursor:pointer; 79 | font-family: 'IBM Plex Sans', sans-serif; 80 | margin-left: 0.5rem !important; 81 | padding-top: 0.5rem !important; 82 | padding-bottom: 0.5rem !important; 83 | right:0; 84 | } 85 | 86 | #share-btn * { 87 | all: unset; 88 | } 89 | 90 | #share-btn-container div:nth-child(-n+2){ 91 | width: auto !important; 92 | min-height: 0px !important; 93 | } 94 | 95 | #share-btn-container .wrap { 96 | display: none !important; 97 | } 98 | 99 | #share-btn-container.hidden { 100 | display: none!important; 101 | } --------------------------------------------------------------------------------