├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ └── feature_request.yml ├── scripts │ └── issue_checker.py └── workflows │ ├── issue_checker.yaml │ └── run_tests.yaml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── install.py ├── javascript ├── deforum-hints.js └── deforum.js ├── preload.py ├── pytest.ini ├── requirements-dev.txt ├── requirements.txt ├── scripts ├── default_settings.txt ├── deforum.py ├── deforum_api.py ├── deforum_api_models.py ├── deforum_extend_paths.py └── deforum_helpers │ ├── 114763196.jpg │ ├── RAFT.py │ ├── animation.py │ ├── animation_key_frames.py │ ├── args.py │ ├── auto_navigation.py │ ├── colors.py │ ├── composable_masks.py │ ├── consistency_check.py │ ├── defaults.py │ ├── deforum_controlnet.py │ ├── deforum_controlnet_gradio.py │ ├── deforum_tqdm.py │ ├── deprecation_utils.py │ ├── depth.py │ ├── depth_adabins.py │ ├── depth_leres.py │ ├── depth_midas.py │ ├── depth_zoe.py │ ├── frame_interpolation.py │ ├── general_utils.py │ ├── generate.py │ ├── gradio_funcs.py │ ├── human_masking.py │ ├── hybrid_video.py │ ├── image_sharpening.py │ ├── load_images.py │ ├── masks.py │ ├── noise.py │ ├── opts_overrider.py │ ├── parseq_adapter.py │ ├── parseq_adapter_test.py │ ├── prompt.py │ ├── render.py │ ├── render_modes.py │ ├── resume.py │ ├── rich.py │ ├── run_deforum.py │ ├── save_images.py │ ├── seed.py │ ├── settings.py │ ├── src │ ├── adabins │ │ ├── __init__.py │ │ ├── layers.py │ │ ├── miniViT.py │ │ └── unet_adaptive_bins.py │ ├── clipseg │ │ ├── LICENSE │ │ ├── Quickstart.ipynb │ │ ├── Readme.md │ │ ├── Tables.ipynb │ │ ├── Visual_Feature_Engineering.ipynb │ │ ├── datasets │ │ │ ├── coco_wrapper.py │ │ │ ├── pascal_classes.json │ │ │ ├── pascal_zeroshot.py │ │ │ ├── pfe_dataset.py │ │ │ ├── phrasecut.py │ │ │ └── utils.py │ │ ├── environment.yml │ │ ├── evaluation_utils.py │ │ ├── example_image.jpg │ │ ├── experiments │ │ │ ├── ablation.yaml │ │ │ ├── coco.yaml │ │ │ ├── pascal_1shot.yaml │ │ │ └── phrasecut.yaml │ │ ├── general_utils.py │ │ ├── metrics.py │ │ ├── models │ │ │ ├── clipseg.py │ │ │ └── vitseg.py │ │ ├── overview.png │ │ ├── score.py │ │ ├── setup.py │ │ ├── training.py │ │ └── weights │ │ │ └── rd64-uni.pth │ ├── film_interpolation │ │ ├── film_inference.py │ │ └── film_util.py │ ├── infer.py │ ├── leres │ │ └── lib │ │ │ ├── Resnet.py │ │ │ ├── Resnext_torch.py │ │ │ ├── __init__.py │ │ │ ├── multi_depth_model_woauxi.py │ │ │ ├── net_tools.py │ │ │ ├── network_auxi.py │ │ │ ├── spvcnn_classsification.py │ │ │ ├── spvcnn_utils.py │ │ │ └── test_utils.py │ ├── midas │ │ ├── backbones │ │ │ ├── beit.py │ │ │ ├── levit.py │ │ │ ├── next_vit.py │ │ │ ├── swin.py │ │ │ ├── swin2.py │ │ │ ├── swin_common.py │ │ │ ├── utils.py │ │ │ └── vit.py │ │ ├── base_model.py │ │ ├── blocks.py │ │ ├── dpt_depth.py │ │ ├── midas_net.py │ │ ├── midas_net_custom.py │ │ ├── model_loader.py │ │ ├── transforms.py │ │ └── vit.py │ ├── model_io.py │ ├── py3d_tools.py │ ├── rife │ │ ├── inference_video.py │ │ ├── model │ │ │ ├── loss.py │ │ │ ├── pytorch_msssim │ │ │ │ └── __init__.py │ │ │ └── warplayer.py │ │ └── rife_new_gen │ │ │ ├── IFNet_HDv3.py │ │ │ ├── RIFE_HDv3.py │ │ │ └── refine.py │ ├── utils.py │ └── zoedepth │ │ ├── data │ │ ├── __init__.py │ │ ├── data_mono.py │ │ ├── ddad.py │ │ ├── diml_indoor_test.py │ │ ├── diml_outdoor_test.py │ │ ├── diode.py │ │ ├── hypersim.py │ │ ├── ibims.py │ │ ├── preprocess.py │ │ ├── sun_rgbd_loader.py │ │ ├── transforms.py │ │ ├── vkitti.py │ │ └── vkitti2.py │ │ ├── models │ │ ├── __init__.py │ │ ├── base_models │ │ │ ├── __init__.py │ │ │ └── midas.py │ │ ├── builder.py │ │ ├── depth_model.py │ │ ├── layers │ │ │ ├── attractor.py │ │ │ ├── dist_layers.py │ │ │ ├── localbins_layers.py │ │ │ └── patch_transformer.py │ │ ├── model_io.py │ │ ├── zoedepth │ │ │ ├── __init__.py │ │ │ ├── config_zoedepth.json │ │ │ ├── config_zoedepth_kitti.json │ │ │ └── zoedepth_v1.py │ │ └── zoedepth_nk │ │ │ ├── __init__.py │ │ │ ├── config_zoedepth_nk.json │ │ │ └── zoedepth_nk_v1.py │ │ └── utils │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── easydict │ │ └── __init__.py │ │ ├── geometry.py │ │ └── misc.py │ ├── subtitle_handler.py │ ├── ui_elements.py │ ├── ui_left.py │ ├── ui_right.py │ ├── ui_settings.py │ ├── upscaling.py │ ├── vid2depth.py │ ├── video_audio_utilities.py │ ├── webui_sd_pipeline.py │ └── word_masking.py ├── style.css └── tests ├── __snapshots__ ├── deforum_postprocess_test.ambr └── deforum_test.ambr ├── conftest.py ├── deforum_postprocess_test.py ├── deforum_test.py ├── testdata ├── example_init_vid.mp4 ├── parseq.json └── simple.input_settings.txt └── utils.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: deforum 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Deforum Github discussions 4 | url: https://github.com/deforum-art/deforum-for-automatic1111-webui/discussions 5 | about: Please ask and answer questions here. If you want to complain about something, don't try to circumvent issue filling by starting a discussion here 🙃 6 | - name: Deforum Discord 7 | url: https://discord.gg/deforum 8 | about: Here is our main community where we chat, discuss development and share experiments and results 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Suggest an idea for the Deforum extension 3 | title: "[Feature Request]: " 4 | labels: ["enhancement"] 5 | 6 | body: 7 | - type: checkboxes 8 | attributes: 9 | label: Is there an existing issue for this? 10 | description: Please search to see if an issue already exists for the feature you want, and that it's not implemented in a recent build/commit. 11 | options: 12 | - label: I have searched the existing issues and checked the recent builds/commits 13 | required: true 14 | - type: markdown 15 | attributes: 16 | value: | 17 | *Please fill this form with as much information as possible, provide screenshots and/or illustrations of the feature if possible* 18 | - type: textarea 19 | id: feature 20 | attributes: 21 | label: What would your feature do ? 22 | description: Tell us about your feature in a very clear and simple way, and what problem it would solve 23 | validations: 24 | required: true 25 | - type: textarea 26 | id: workflow 27 | attributes: 28 | label: Proposed workflow 29 | description: Please provide us with step by step information on how you'd like the feature to be accessed and used 30 | value: | 31 | 1. Go to .... 32 | 2. Press .... 33 | 3. ... 34 | validations: 35 | required: true 36 | - type: textarea 37 | id: misc 38 | attributes: 39 | label: Additional information 40 | description: Add any other context or screenshots about the feature request here. 41 | - type: textarea 42 | attributes: 43 | label: Are you going to help adding it? 44 | description: Do you want to participate in Deforum development and bring the desired feature sooner? Let us know if you are willing to add the desired feature, ideally, leave your Discord handle here, so we will contact you for a less formal conversation. Our community is welcoming and ready to provide you with any information on the project structure or how the code works. If not, however, keep in mind that if you do not want to do your new feature yourself, you will have to wait until the team picks up your issue. 45 | validations: 46 | required: true 47 | -------------------------------------------------------------------------------- /.github/workflows/issue_checker.yaml: -------------------------------------------------------------------------------- 1 | name: Issue Checker 2 | 3 | on: 4 | issues: 5 | types: [opened, reopened, edited] 6 | 7 | jobs: 8 | check_issue: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout repository 12 | uses: actions/checkout@v3 13 | - name: Set up Python 14 | uses: actions/setup-python@v3 15 | with: 16 | python-version: '3.x' 17 | - name: Install dependencies 18 | run: pip install PyGithub 19 | - name: Check issue 20 | env: 21 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | ISSUE_NUMBER: ${{ github.event.number }} 23 | run: python .github/scripts/issue_checker.py 24 | -------------------------------------------------------------------------------- /.github/workflows/run_tests.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | jobs: 8 | test: 9 | name: tests on CPU with empty model 10 | runs-on: ubuntu-latest 11 | if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name 12 | steps: 13 | - name: Checkout a1111 14 | uses: actions/checkout@v3 15 | with: 16 | repository: AUTOMATIC1111/stable-diffusion-webui 17 | ref: v1.6.0 18 | - name: Checkout Controlnet extension 19 | uses: actions/checkout@v3 20 | with: 21 | repository: Mikubill/sd-webui-controlnet 22 | path: extensions/sd-webui-controlnet 23 | - name: Checkout Deforum 24 | uses: actions/checkout@v3 25 | with: 26 | path: extensions/deforum 27 | - name: Set up Python 3.10 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: 3.10.6 31 | cache: pip 32 | cache-dependency-path: | 33 | **/requirements*txt 34 | launch.py 35 | - name: Install test dependencies 36 | run: pip install wait-for-it -r extensions/deforum/requirements-dev.txt 37 | env: 38 | PIP_DISABLE_PIP_VERSION_CHECK: "1" 39 | PIP_PROGRESS_BAR: "off" 40 | - name: Setup environment 41 | run: python launch.py --skip-torch-cuda-test --exit 42 | env: 43 | PIP_DISABLE_PIP_VERSION_CHECK: "1" 44 | PIP_PROGRESS_BAR: "off" 45 | TORCH_INDEX_URL: https://download.pytorch.org/whl/cpu 46 | WEBUI_LAUNCH_LIVE_OUTPUT: "1" 47 | PYTHONUNBUFFERED: "1" 48 | - name: Start test server 49 | run: > 50 | python -m coverage run 51 | --data-file=.coverage.server 52 | launch.py 53 | --skip-prepare-environment 54 | --skip-torch-cuda-test 55 | --test-server 56 | --do-not-download-clip 57 | --no-half 58 | --disable-opt-split-attention 59 | --use-cpu all 60 | --api-server-stop 61 | --deforum-api 62 | --api 63 | 2>&1 | tee serverlog.txt & 64 | - name: Run tests (with continue-on-error due to mysterious non-zero return code on success) 65 | continue-on-error: true 66 | id: runtests 67 | run: | 68 | wait-for-it --service 127.0.0.1:7860 -t 600 69 | cd extensions/deforum 70 | python -m coverage run --data-file=.coverage.client -m pytest -vv --junitxml=tests/results.xml tests 71 | - name: Check for test failures (necessary because of continue-on-error above) 72 | id: testresults 73 | uses: mavrosxristoforos/get-xml-info@1.1.0 74 | with: 75 | xml-file: 'extensions/deforum/tests/results.xml' 76 | xpath: '//testsuite/@failures' 77 | - name: Fail if there were test failures 78 | run: | 79 | echo "Test failures: ${{ steps.testresults.outputs.info }}" 80 | [ ${{ steps.testresults.outputs.info }} -eq 0 ] 81 | - name: Kill test server 82 | if: always() 83 | run: curl -vv -XPOST http://127.0.0.1:7860/sdapi/v1/server-stop && sleep 10 84 | - name: Show coverage 85 | run: | 86 | python -m coverage combine .coverage* extensions/deforum/.coverage* 87 | python -m coverage report -i 88 | python -m coverage html -i 89 | - name: Upload main app output 90 | uses: actions/upload-artifact@v3 91 | if: always() 92 | with: 93 | name: serverlog 94 | path: serverlog.txt 95 | - name: Upload coverage HTML 96 | uses: actions/upload-artifact@v3 97 | if: always() 98 | with: 99 | name: htmlcov 100 | path: htmlcov 101 | - name: Surface failing tests 102 | if: always() 103 | uses: pmeier/pytest-results-action@main 104 | with: 105 | path: extensions/deforum/tests/results.xml 106 | summary: true 107 | display-options: fEX 108 | fail-on-empty: true 109 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | # Unnecessary compiled python files. 18 | __pycache__ 19 | *.pyc 20 | *.pyo 21 | 22 | # Output Images 23 | outputs 24 | 25 | # Log files for colab-convert 26 | cc-outputs.log 27 | *.safetensors 28 | scripts/deforum_helpers/navigation.py 29 | 30 | #test output 31 | htmlcov 32 | tests/results.xml 33 | .coverage* 34 | serverlog.txt 35 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | When contributing please ping the devs via Discord https://discord.gg/deforum to make sure you addition will fit well such a large project and to get help if needed. 4 | 5 | *By contributing to this project you agree that your work will be granted copyright to Deforum LLC and licensed under the terms of the GNU Affero General Public License version 3.* 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Deforum Stable Diffusion — official extension for AUTOMATIC1111's webui 3 | 4 |

5 | Last Commit 6 | GitHub issues 7 | GitHub stars 8 | GitHub forks 9 | 10 |

11 | 12 | ## Need help? See our [FAQ](https://github.com/deforum-art/sd-webui-deforum/wiki/FAQ-&-Troubleshooting) 13 | 14 | ## Getting Started 15 | 16 | 1. Install [AUTOMATIC1111's webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui/). 17 | 18 | 2. Now two ways: either clone the repo into the `extensions` directory via git commandline launched within in the `stable-diffusion-webui` folder 19 | 20 | ```sh 21 | git clone https://github.com/deforum-art/sd-webui-deforum extensions/deforum 22 | ``` 23 | 24 | Or download this repository, locate the `extensions` folder within your WebUI installation, create a folder named `deforum` and put the contents of the downloaded directory inside of it. Then restart WebUI. 25 | 26 | Or launch A1111, navigate to the Extensions tab, choose Available, find deforum in the list of available extensions and install it. Restart A1111 once the extension has been installed. 27 | 3. Open the webui, find the Deforum tab at the top of the page. 28 | 29 | 4. Enter the animation settings. Refer to [this general guide](https://docs.google.com/document/d/1pEobUknMFMkn8F5TMsv8qRzamXX_75BShMMXV8IFslI/edit) and [this guide to math keyframing functions in Deforum](https://docs.google.com/document/d/1pfW1PwbDIuW0cv-dnuyYj1UzPqe23BlSLTJsqazffXM/edit?usp=sharing). However, **in this version prompt weights less than zero don't just like in original Deforum!** Split the positive and the negative prompt in the json section using --neg argument like this "apple:\`where(cos(t)>=0, cos(t), 0)\`, snow --neg strawberry:\`where(cos(t)<0, -cos(t), 0)\`" 30 | 31 | 5. To view animation frames as they're being made, without waiting for the completion of an animation, go to the 'Settings' tab and set the value of this toolbar **above zero**. Warning: it may slow down the generation process. 32 | 33 | ![adsdasunknown](https://user-images.githubusercontent.com/14872007/196064311-1b79866a-e55b-438a-84a7-004ff30829ad.png) 34 | 35 | 36 | 6. Run the script and see if you got it working or even got something. **In 3D mode a large delay is expected at first** as the script loads the depth models. In the end, using the default settings the whole thing should consume 6.4 GBs of VRAM at 3D mode peaks and no more than 3.8 GB VRAM in 3D mode if you launch the webui with the '--lowvram' command line argument. 37 | 38 | 7. After the generation process is completed, click the button with the self-describing name to show the video or gif result right in the GUI! 39 | 40 | 8. Join our Discord where you can post generated stuff, ask questions and more: https://discord.gg/deforum.
41 | * There's also the 'Issues' tab in the repo, for well... reporting issues ;) 42 | 43 | 9. Profit! 44 | 45 | ## Known issues 46 | 47 | * This port is not fully backward-compatible with the notebook and the local version both due to the changes in how AUTOMATIC1111's webui handles Stable Diffusion models and the changes in this script to get it to work in the new environment. *Expect* that you may not get exactly the same result or that the thing may break down because of the older settings. 48 | 49 | ## Screenshots 50 | 51 | Amazing raw Deforum animation by [Pxl.Pshr](https://www.instagram.com/pxl.pshr): 52 | * Turn Audio ON! 53 | 54 | (Audio credits: SKRILLEX, FRED AGAIN & FLOWDAN - RUMBLE (PHACE'S DNB FLIP)) 55 | 56 | https://user-images.githubusercontent.com/121192995/224450647-39529b28-be04-4871-bb7a-faf7afda2ef2.mp4 57 | 58 | Setting file of that video: [here](https://github.com/deforum-art/sd-webui-deforum/files/11353167/PxlPshrWinningAnimationSettings.txt). 59 | 60 |
61 | 62 | Main extension tab: 63 | 64 | ![image](https://user-images.githubusercontent.com/121192995/226101131-43bf594a-3152-45dd-a5d1-2538d0bc221d.png) 65 | 66 | Keyframes tab: 67 | 68 | ![image](https://user-images.githubusercontent.com/121192995/226101140-bfe6cce7-9b78-4a1d-be9a-43e1fc78239e.png) 69 | 70 | ## License 71 | 72 | This program is distributed under the terms of the GNU Affero Public License v3.0, copyright (c) 2023 Deforum LLC. 73 | 74 | Some of its sublicensed integrated 3rd party components may have other licenses, see LICENSE for usage terms. 75 | -------------------------------------------------------------------------------- /install.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import launch 18 | import os 19 | 20 | req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt") 21 | 22 | with open(req_file) as file: 23 | for lib in file: 24 | lib = lib.strip() 25 | if not launch.is_installed(lib): 26 | launch.run_pip(f"install {lib}", f"Deforum requirement: {lib}") -------------------------------------------------------------------------------- /javascript/deforum.js: -------------------------------------------------------------------------------- 1 | /* 2 | # Copyright (C) 2023 Deforum LLC 3 | # 4 | # This program is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU Affero General Public License as published by 6 | # the Free Software Foundation, version 3 of the License. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU Affero General Public License 14 | # along with this program. If not, see . 15 | 16 | Contact the authors: https://deforum.github.io/ 17 | */ 18 | 19 | function submit_deforum(){ 20 | rememberGallerySelection('deforum_gallery') 21 | showSubmitButtons('deforum', false) 22 | 23 | var id = randomId() 24 | requestProgress(id, gradioApp().getElementById('deforum_gallery_container'), gradioApp().getElementById('deforum_gallery'), function(){ 25 | showSubmitButtons('deforum', true) 26 | }) 27 | 28 | var res = create_submit_args(arguments) 29 | 30 | res[0] = id 31 | 32 | return res 33 | } -------------------------------------------------------------------------------- /preload.py: -------------------------------------------------------------------------------- 1 | # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI. 2 | # Copyright (C) 2023 Deforum LLC 3 | # 4 | # This program is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU Affero General Public License as published by 6 | # the Free Software Foundation, version 3 of the License. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU Affero General Public License 14 | # along with this program. If not, see . 15 | 16 | # Contact the authors: https://deforum.github.io/ 17 | 18 | def preload(parser): 19 | parser.add_argument( 20 | "--deforum-api", 21 | action="store_true", 22 | help="Enable the Deforum API", 23 | default=None, 24 | ) 25 | parser.add_argument( 26 | "--deforum-simple-api", 27 | action="store_true", 28 | help="Enable the simplified version of Deforum API", 29 | default=None, 30 | ) 31 | parser.add_argument( 32 | "--deforum-run-now", 33 | type=str, 34 | help="Comma-delimited list of deforum settings files to run immediately on startup", 35 | default=None, 36 | ) 37 | parser.add_argument( 38 | "--deforum-terminate-after-run-now", 39 | action="store_true", 40 | help="Whether to shut down the a1111 process immediately after completing the generations passed in to '--deforum-run-now'.", 41 | default=None, 42 | ) -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore::DeprecationWarning 4 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | coverage 2 | syrupy 3 | pytest 4 | tenacity 5 | pydantic_requests 6 | moviepy -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numexpr 2 | matplotlib 3 | pandas 4 | av 5 | pims 6 | imageio_ffmpeg 7 | rich 8 | gdown -------------------------------------------------------------------------------- /scripts/deforum.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import os 18 | 19 | import modules.paths as ph 20 | from modules import script_callbacks 21 | from modules.shared import cmd_opts 22 | from scripts.deforum_extend_paths import deforum_sys_extend 23 | 24 | 25 | def init_deforum(): 26 | # use sys.path.extend to make sure all of our files are available for importation 27 | deforum_sys_extend() 28 | 29 | # create the Models/Deforum folder, where many of the deforum related models/ packages will be downloaded 30 | os.makedirs(ph.models_path + '/Deforum', exist_ok=True) 31 | 32 | # import our on_ui_tabs and on_ui_settings functions from the respected files 33 | from deforum_helpers.ui_right import on_ui_tabs 34 | from deforum_helpers.ui_settings import on_ui_settings 35 | 36 | # trigger webui's extensions mechanism using our imported main functions - 37 | # first to create the actual deforum gui, then to make the deforum tab in webui's settings section 38 | script_callbacks.on_ui_tabs(on_ui_tabs) 39 | script_callbacks.on_ui_settings(on_ui_settings) 40 | 41 | init_deforum() 42 | 43 | -------------------------------------------------------------------------------- /scripts/deforum_api_models.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | from pydantic import BaseModel 18 | from typing import Any, Dict, List, Optional, Union 19 | from dataclasses import dataclass 20 | from enum import Enum 21 | 22 | class Batch(BaseModel): 23 | deforum_settings : Optional[Union[Dict[str, Any],List[Dict[str, Any]]]] 24 | options_overrides : Optional[Dict[str, Any]] 25 | 26 | class DeforumJobStatusCategory(str, Enum): 27 | ACCEPTED = "ACCEPTED" 28 | SUCCEEDED = "SUCCEEDED" 29 | FAILED = "FAILED" 30 | CANCELLED = "CANCELLED" 31 | 32 | class DeforumJobPhase(str, Enum): 33 | QUEUED = "QUEUED" 34 | PREPARING = "PREPARING" 35 | GENERATING = "GENERATING" 36 | POST_PROCESSING = "POST_PROCESSING" 37 | DONE = "DONE" 38 | 39 | class DeforumJobErrorType(str, Enum): 40 | NONE = "NONE" 41 | RETRYABLE = "RETRYABLE" 42 | TERMINAL = "TERMINAL" 43 | 44 | @dataclass(frozen=True) 45 | class DeforumJobStatus(BaseModel): 46 | id: str 47 | status : DeforumJobStatusCategory 48 | phase : DeforumJobPhase 49 | error_type : DeforumJobErrorType 50 | phase_progress : float 51 | started_at: float 52 | last_updated: float 53 | execution_time: float # time between job start and the last status update 54 | update_interval_time: float # time between the last two status updates 55 | updates: int # number of status updates so far 56 | message: Optional[str] 57 | outdir: Optional[str] 58 | timestring: Optional[str] 59 | deforum_settings : Optional[List[Dict[str, Any]]] 60 | options_overrides : Optional[Dict[str, Any]] -------------------------------------------------------------------------------- /scripts/deforum_extend_paths.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import os 18 | import sys 19 | 20 | def deforum_sys_extend(): 21 | deforum_folder_name = os.path.sep.join(os.path.abspath(__file__).split(os.path.sep)[:-2]) 22 | 23 | basedirs = [os.getcwd()] 24 | if 'google.colab' in sys.modules: 25 | basedirs.append('/content/gdrive/MyDrive/sd/stable-diffusion-webui') # for TheLastBen's colab 26 | for _ in basedirs: 27 | deforum_paths_to_ensure = [ 28 | os.path.join(deforum_folder_name, 'scripts'), 29 | os.path.join(deforum_folder_name, 'scripts', 'deforum_helpers', 'src') 30 | ] 31 | for deforum_scripts_path_fix in deforum_paths_to_ensure: 32 | if deforum_scripts_path_fix not in sys.path: 33 | sys.path.extend([deforum_scripts_path_fix]) 34 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/114763196.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deforum-art/sd-webui-deforum/5d63a339dbec8d476657a1f672a4eeb6dc79ed37/scripts/deforum_helpers/114763196.jpg -------------------------------------------------------------------------------- /scripts/deforum_helpers/RAFT.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import torch 18 | import numpy as np 19 | import torchvision.transforms.functional as F 20 | from torchvision.models.optical_flow import Raft_Large_Weights, raft_large 21 | 22 | class RAFT: 23 | def __init__(self): 24 | weights = Raft_Large_Weights.DEFAULT 25 | self.transforms = weights.transforms() 26 | self.device = "cuda" if torch.cuda.is_available() else "cpu" 27 | self.model = raft_large(weights=weights, progress=False).to(self.device).eval() 28 | 29 | def predict(self, image1, image2, num_flow_updates:int = 50): 30 | img1 = F.to_tensor(image1) 31 | img2 = F.to_tensor(image2) 32 | img1_batch, img2_batch = img1.unsqueeze(0), img2.unsqueeze(0) 33 | img1_batch, img2_batch = self.transforms(img1_batch, img2_batch) 34 | 35 | with torch.no_grad(): 36 | flow = self.model(image1=img1_batch.to(self.device), image2=img2_batch.to(self.device), num_flow_updates=num_flow_updates)[-1].cpu().numpy()[0] 37 | 38 | # align the flow array to have the shape (w, h, 2) so it's compatible with the rest of CV2's flow methods 39 | flow = np.transpose(flow, (1, 2, 0)) 40 | 41 | return flow 42 | 43 | def delete_model(self): 44 | del self.model -------------------------------------------------------------------------------- /scripts/deforum_helpers/auto_navigation.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import numpy as np 18 | import torch 19 | 20 | # reallybigname - auto-navigation functions in progress... 21 | # usage: 22 | # if auto_rotation: 23 | # rot_mat = rotate_camera_towards_depth(depth_tensor, auto_rotation_steps, w, h, fov_deg, auto_rotation_depth_target) 24 | def rotate_camera_towards_depth(depth_tensor, turn_weight, width, height, h_fov=60, target_depth=1): 25 | # Compute the depth at the target depth 26 | target_depth_index = int(target_depth * depth_tensor.shape[0]) 27 | target_depth_values = depth_tensor[target_depth_index] 28 | max_depth_index = torch.argmax(target_depth_values).item() 29 | max_depth_index = (max_depth_index, target_depth_index) 30 | max_depth = target_depth_values[max_depth_index[0]].item() 31 | 32 | # Compute the normalized x and y coordinates 33 | x, y = max_depth_index 34 | x_normalized = (x / (width - 1)) * 2 - 1 35 | y_normalized = (y / (height - 1)) * 2 - 1 36 | 37 | # Calculate horizontal and vertical field of view (in radians) 38 | h_fov_rad = np.radians(h_fov) 39 | aspect_ratio = width / height 40 | v_fov_rad = h_fov_rad / aspect_ratio 41 | 42 | # Calculate the world coordinates (x, y) at the target depth 43 | x_world = np.tan(h_fov_rad / 2) * max_depth * x_normalized 44 | y_world = np.tan(v_fov_rad / 2) * max_depth * y_normalized 45 | 46 | # Compute the target position using the world coordinates and max_depth 47 | target_position = np.array([x_world, y_world, max_depth]) 48 | 49 | # Assuming the camera is initially at the origin, and looking in the negative Z direction 50 | cam_position = np.array([0, 0, 0]) 51 | current_direction = np.array([0, 0, -1]) 52 | 53 | # Compute the direction vector and normalize it 54 | direction = target_position - cam_position 55 | direction = direction / np.linalg.norm(direction) 56 | 57 | # Compute the rotation angle based on the turn_weight (number of frames) 58 | axis = np.cross(current_direction, direction) 59 | axis = axis / np.linalg.norm(axis) 60 | angle = np.arcsin(np.linalg.norm(axis)) 61 | max_angle = np.pi * (0.1 / turn_weight) # Limit the maximum rotation angle to half of the visible screen 62 | rotation_angle = np.clip(np.sign(np.cross(current_direction, direction)) * angle / turn_weight, -max_angle, max_angle) 63 | 64 | # Compute the rotation matrix 65 | rotation_matrix = np.eye(3) + np.sin(rotation_angle) * np.array([ 66 | [0, -axis[2], axis[1]], 67 | [axis[2], 0, -axis[0]], 68 | [-axis[1], axis[0], 0] 69 | ]) + (1 - np.cos(rotation_angle)) * np.outer(axis, axis) 70 | 71 | # Convert the NumPy array to a PyTorch tensor 72 | rotation_matrix_tensor = torch.from_numpy(rotation_matrix).float() 73 | 74 | # Add an extra dimension to match the expected shape (1, 3, 3) 75 | rotation_matrix_tensor = rotation_matrix_tensor.unsqueeze(0) 76 | 77 | return rotation_matrix_tensor 78 | 79 | def rotation_matrix(axis, angle): 80 | axis = np.asarray(axis) 81 | axis = axis / np.linalg.norm(axis) 82 | a = np.cos(angle / 2.0) 83 | b, c, d = -axis * np.sin(angle / 2.0) 84 | aa, bb, cc, dd = a * a, b * b, c * c, d * d 85 | bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d 86 | return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)], 87 | [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)], 88 | [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]]) 89 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/colors.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import cv2 18 | import pkg_resources 19 | from skimage.exposure import match_histograms 20 | 21 | def maintain_colors(prev_img, color_match_sample, mode): 22 | 23 | match_histograms_kwargs = {'channel_axis': -1} 24 | 25 | if mode == 'RGB': 26 | return match_histograms(prev_img, color_match_sample, **match_histograms_kwargs) 27 | elif mode == 'HSV': 28 | prev_img_hsv = cv2.cvtColor(prev_img, cv2.COLOR_RGB2HSV) 29 | color_match_hsv = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2HSV) 30 | matched_hsv = match_histograms(prev_img_hsv, color_match_hsv, **match_histograms_kwargs) 31 | return cv2.cvtColor(matched_hsv, cv2.COLOR_HSV2RGB) 32 | else: # LAB 33 | prev_img_lab = cv2.cvtColor(prev_img, cv2.COLOR_RGB2LAB) 34 | color_match_lab = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2LAB) 35 | matched_lab = match_histograms(prev_img_lab, color_match_lab, **match_histograms_kwargs) 36 | return cv2.cvtColor(matched_lab, cv2.COLOR_LAB2RGB) 37 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/deforum_controlnet_gradio.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import gradio as gr 18 | # print (cnet_1.get_modules()) 19 | 20 | # *** TODO: re-enable table printing! disabled only temp! 13-04-23 *** 21 | # table = Table(title="ControlNet params",padding=0, box=box.ROUNDED) 22 | 23 | # TODO: auto infer the names and the values for the table 24 | # field_names = [] 25 | # field_names += ["module", "model", "weight", "inv", "guide_start", "guide_end", "guess", "resize", "rgb_bgr", "proc res", "thr a", "thr b"] 26 | # for field_name in field_names: 27 | # table.add_column(field_name, justify="center") 28 | 29 | # cn_model_name = str(controlnet_args.cn_1_model) 30 | 31 | # rows = [] 32 | # rows += [controlnet_args.cn_1_module, cn_model_name[len('control_'):] if 'control_' in cn_model_name else cn_model_name, controlnet_args.cn_1_weight, controlnet_args.cn_1_invert_image, controlnet_args.cn_1_guidance_start, controlnet_args.cn_1_guidance_end, controlnet_args.cn_1_guess_mode, controlnet_args.cn_1_resize_mode, controlnet_args.cn_1_rgbbgr_mode, controlnet_args.cn_1_processor_res, controlnet_args.cn_1_threshold_a, controlnet_args.cn_1_threshold_b] 33 | # rows = [str(x) for x in rows] 34 | 35 | # table.add_row(*rows) 36 | # console.print(table) 37 | 38 | def hide_ui_by_cn_status(choice): 39 | return gr.update(visible=True) if choice else gr.update(visible=False) 40 | 41 | def hide_file_textboxes(choice): 42 | return gr.update(visible=False) if choice else gr.update(visible=True) 43 | 44 | class ToolButton(gr.Button, gr.components.FormComponent): 45 | """Small button with single emoji as text, fits inside gradio forms""" 46 | def __init__(self, **kwargs): 47 | super().__init__(variant="tool", **kwargs) 48 | 49 | def get_block_name(self): 50 | return "button" -------------------------------------------------------------------------------- /scripts/deforum_helpers/deforum_tqdm.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import os 18 | from math import ceil 19 | import tqdm 20 | from modules.shared import progress_print_out, opts, cmd_opts 21 | 22 | class DeforumTQDM: 23 | def __init__(self, args, anim_args, parseq_args, video_args): 24 | self._tqdm = None 25 | self._args = args 26 | self._anim_args = anim_args 27 | self._parseq_args = parseq_args 28 | self._video_args = video_args 29 | 30 | def reset(self): 31 | from .animation_key_frames import DeformAnimKeys 32 | from .parseq_adapter import ParseqAdapter 33 | deforum_total = 0 34 | # FIXME: get only amount of steps 35 | parseq_adapter = ParseqAdapter(self._parseq_args, self._anim_args, self._video_args, None, None, mute=True) 36 | keys = DeformAnimKeys(self._anim_args) if not parseq_adapter.use_parseq else parseq_adapter.anim_keys 37 | 38 | start_frame = 0 39 | if self._anim_args.resume_from_timestring: 40 | for tmp in os.listdir(self._args.outdir): 41 | filename = tmp.split("_") 42 | # don't use saved depth maps to count number of frames 43 | if self._anim_args.resume_timestring in filename and "depth" not in filename: 44 | start_frame += 1 45 | start_frame = start_frame - 1 46 | using_vid_init = self._anim_args.animation_mode == 'Video Input' 47 | turbo_steps = 1 if using_vid_init else int(self._anim_args.diffusion_cadence) 48 | if self._anim_args.resume_from_timestring: 49 | last_frame = start_frame - 1 50 | if turbo_steps > 1: 51 | last_frame -= last_frame % turbo_steps 52 | if turbo_steps > 1: 53 | turbo_next_frame_idx = last_frame 54 | turbo_prev_frame_idx = turbo_next_frame_idx 55 | start_frame = last_frame + turbo_steps 56 | frame_idx = start_frame 57 | had_first = False 58 | while frame_idx < self._anim_args.max_frames: 59 | strength = keys.strength_schedule_series[frame_idx] 60 | if not had_first and self._args.use_init and ((self._args.init_image is not None and self._args.init_image != '') or self._args.init_image_box is not None): 61 | deforum_total += int(ceil(self._args.steps * (1 - strength))) 62 | had_first = True 63 | elif not had_first: 64 | deforum_total += self._args.steps 65 | had_first = True 66 | else: 67 | deforum_total += int(ceil(self._args.steps * (1 - strength))) 68 | 69 | if turbo_steps > 1: 70 | frame_idx += turbo_steps 71 | else: 72 | frame_idx += 1 73 | 74 | self._tqdm = tqdm.tqdm( 75 | desc="Deforum progress", 76 | total=deforum_total, 77 | position=1, 78 | file=progress_print_out 79 | ) 80 | 81 | def update(self): 82 | if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars: 83 | return 84 | if self._tqdm is None: 85 | self.reset() 86 | self._tqdm.update() 87 | 88 | def updateTotal(self, new_total): 89 | if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars: 90 | return 91 | if self._tqdm is None: 92 | self.reset() 93 | self._tqdm.total = new_total 94 | 95 | def clear(self): 96 | if self._tqdm is not None: 97 | self._tqdm.close() 98 | self._tqdm = None 99 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/depth_adabins.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import torch 18 | import numpy as np 19 | from PIL import Image 20 | import torchvision.transforms.functional as TF 21 | from .general_utils import download_file_with_checksum 22 | from infer import InferenceHelper 23 | 24 | class AdaBinsModel: 25 | _instance = None 26 | 27 | def __new__(cls, *args, **kwargs): 28 | keep_in_vram = kwargs.get('keep_in_vram', False) 29 | if cls._instance is None: 30 | cls._instance = super().__new__(cls) 31 | cls._instance._initialize(*args, keep_in_vram=keep_in_vram) 32 | return cls._instance 33 | 34 | def _initialize(self, models_path, keep_in_vram=False): 35 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 36 | self.keep_in_vram = keep_in_vram 37 | self.adabins_helper = None 38 | 39 | download_file_with_checksum(url='https://github.com/hithereai/deforum-for-automatic1111-webui/releases/download/AdaBins/AdaBins_nyu.pt', expected_checksum='643db9785c663aca72f66739427642726b03acc6c4c1d3755a4587aa2239962746410d63722d87b49fc73581dbc98ed8e3f7e996ff7b9c0d56d0fbc98e23e41a', dest_folder=models_path, dest_filename='AdaBins_nyu.pt') 40 | 41 | self.adabins_helper = InferenceHelper(models_path=models_path, dataset='nyu', device=self.device) 42 | 43 | def predict(self, img_pil, prev_img_cv2): 44 | w, h = prev_img_cv2.shape[1], prev_img_cv2.shape[0] 45 | adabins_depth = np.array([]) 46 | use_adabins = True 47 | MAX_ADABINS_AREA, MIN_ADABINS_AREA = 500000, 448 * 448 48 | 49 | image_pil_area, resized = w * h, False 50 | 51 | if image_pil_area not in range(MIN_ADABINS_AREA, MAX_ADABINS_AREA + 1): 52 | scale = ((MAX_ADABINS_AREA if image_pil_area > MAX_ADABINS_AREA else MIN_ADABINS_AREA) / image_pil_area) ** 0.5 53 | depth_input = img_pil.resize((int(w * scale), int(h * scale)), Image.LANCZOS if image_pil_area > MAX_ADABINS_AREA else Image.BICUBIC) 54 | print(f"AdaBins depth resized to {depth_input.width}x{depth_input.height}") 55 | resized = True 56 | else: 57 | depth_input = img_pil 58 | 59 | try: 60 | with torch.no_grad(): 61 | _, adabins_depth = self.adabins_helper.predict_pil(depth_input) 62 | if resized: 63 | adabins_depth = TF.resize(torch.from_numpy(adabins_depth), torch.Size([h, w]), interpolation=TF.InterpolationMode.BICUBIC).cpu().numpy() 64 | adabins_depth = adabins_depth.squeeze() 65 | except Exception as e: 66 | print("AdaBins exception encountered. Falling back to pure MiDaS/Zoe (only if running in Legacy Midas/Zoe+AdaBins mode)") 67 | use_adabins = False 68 | torch.cuda.empty_cache() 69 | 70 | return use_adabins, adabins_depth 71 | 72 | def to(self, device): 73 | self.device = device 74 | if self.adabins_helper is not None: 75 | self.adabins_helper.to(device) 76 | 77 | def delete_model(self): 78 | del self.adabins_helper 79 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/depth_leres.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import torch 18 | import cv2 19 | import os 20 | import numpy as np 21 | import torchvision.transforms as transforms 22 | from .general_utils import download_file_with_checksum 23 | from leres.lib.multi_depth_model_woauxi import RelDepthModel 24 | from leres.lib.net_tools import load_ckpt 25 | 26 | class LeReSDepth: 27 | def __init__(self, width=448, height=448, models_path=None, checkpoint_name='res101.pth', backbone='resnext101'): 28 | self.width = width 29 | self.height = height 30 | self.models_path = models_path 31 | self.checkpoint_name = checkpoint_name 32 | self.backbone = backbone 33 | 34 | download_file_with_checksum(url='https://cloudstor.aarnet.edu.au/plus/s/lTIJF4vrvHCAI31/download', expected_checksum='7fdc870ae6568cb28d56700d0be8fc45541e09cea7c4f84f01ab47de434cfb7463cacae699ad19fe40ee921849f9760dedf5e0dec04a62db94e169cf203f55b1', dest_folder=models_path, dest_filename=self.checkpoint_name) 35 | 36 | self.depth_model = RelDepthModel(backbone=self.backbone) 37 | self.depth_model.eval() 38 | self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 39 | self.depth_model.to(self.DEVICE) 40 | load_ckpt(os.path.join(self.models_path, self.checkpoint_name), self.depth_model, None, None) 41 | 42 | @staticmethod 43 | def scale_torch(img): 44 | if len(img.shape) == 2: 45 | img = img[np.newaxis, :, :] 46 | if img.shape[2] == 3: 47 | transform = transforms.Compose([transforms.ToTensor(), 48 | transforms.Normalize((0.485, 0.456, 0.406) , (0.229, 0.224, 0.225))]) 49 | img = transform(img) 50 | else: 51 | img = img.astype(np.float32) 52 | img = torch.from_numpy(img) 53 | return img 54 | 55 | def predict(self, image): 56 | resized_image = cv2.resize(image, (self.width, self.height)) 57 | img_torch = self.scale_torch(resized_image)[None, :, :, :] 58 | pred_depth = self.depth_model.inference(img_torch).cpu().numpy().squeeze() 59 | pred_depth_ori = cv2.resize(pred_depth, (image.shape[1], image.shape[0])) 60 | return torch.from_numpy(pred_depth_ori).unsqueeze(0).to(self.DEVICE) 61 | 62 | def save_raw_depth(self, depth, filepath): 63 | depth_normalized = (depth / depth.max() * 60000).astype(np.uint16) 64 | cv2.imwrite(filepath, depth_normalized) 65 | 66 | def to(self, device): 67 | self.DEVICE = device 68 | self.depth_model = self.depth_model.to(device) 69 | 70 | def delete(self): 71 | del self.depth_model -------------------------------------------------------------------------------- /scripts/deforum_helpers/depth_midas.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import os 18 | import cv2 19 | import torch 20 | import numpy as np 21 | from .general_utils import download_file_with_checksum 22 | from midas.dpt_depth import DPTDepthModel 23 | from midas.transforms import Resize, NormalizeImage, PrepareForNet 24 | import torchvision.transforms as T 25 | 26 | class MidasDepth: 27 | def __init__(self, models_path, device, half_precision=True, midas_model_type='Midas-3-Hybrid'): 28 | if midas_model_type.lower() == 'midas-3.1-beitlarge': 29 | self.midas_model_filename = 'dpt_beit_large_512.pt' 30 | self.midas_model_checksum='66cbb00ea7bccd6e43d3fd277bd21002d8d8c2c5c487e5fcd1e1d70c691688a19122418b3ddfa94e62ab9f086957aa67bbec39afe2b41c742aaaf0699ee50b33' 31 | self.midas_model_url = 'https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt' 32 | self.resize_px = 512 33 | self.backbone = 'beitl16_512' 34 | else: 35 | self.midas_model_filename = 'dpt_large-midas-2f21e586.pt' 36 | self.midas_model_checksum = 'fcc4829e65d00eeed0a38e9001770676535d2e95c8a16965223aba094936e1316d569563552a852d471f310f83f597e8a238987a26a950d667815e08adaebc06' 37 | self.midas_model_url = 'https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt' 38 | self.resize_px = 384 39 | self.backbone = 'vitl16_384' 40 | self.device = device 41 | self.normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) 42 | self.midas_transform = T.Compose([ 43 | Resize(self.resize_px, self.resize_px, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, 44 | resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC), 45 | self.normalization, 46 | PrepareForNet() 47 | ]) 48 | 49 | download_file_with_checksum(url=self.midas_model_url, expected_checksum=self.midas_model_checksum, dest_folder=models_path, dest_filename=self.midas_model_filename) 50 | 51 | self.load_midas_model(models_path, self.midas_model_filename) 52 | if half_precision: 53 | self.midas_model = self.midas_model.half() 54 | 55 | def load_midas_model(self, models_path, midas_model_filename): 56 | model_file = os.path.join(models_path, midas_model_filename) 57 | print(f"Loading MiDaS model from {midas_model_filename}...") 58 | self.midas_model = DPTDepthModel( 59 | path=model_file, 60 | backbone=self.backbone, 61 | non_negative=True, 62 | ) 63 | self.midas_model.eval().to(self.device, memory_format=torch.channels_last if self.device == torch.device("cuda") else None) 64 | 65 | def predict(self, prev_img_cv2, half_precision): 66 | img_midas = prev_img_cv2.astype(np.float32) / 255.0 67 | img_midas_input = self.midas_transform({"image": img_midas})["image"] 68 | sample = torch.from_numpy(img_midas_input).float().to(self.device).unsqueeze(0) 69 | 70 | if self.device.type == "cuda" or self.device.type == "mps": 71 | sample = sample.to(memory_format=torch.channels_last) 72 | if half_precision: 73 | sample = sample.half() 74 | 75 | with torch.no_grad(): 76 | midas_depth = self.midas_model.forward(sample) 77 | midas_depth = torch.nn.functional.interpolate( 78 | midas_depth.unsqueeze(1), 79 | size=img_midas.shape[:2], 80 | mode="bicubic", 81 | align_corners=False, 82 | ).squeeze().cpu().numpy() 83 | 84 | torch.cuda.empty_cache() 85 | depth_tensor = torch.from_numpy(np.expand_dims(midas_depth, axis=0)).squeeze().to(self.device) 86 | 87 | return depth_tensor 88 | 89 | def to(self, device): 90 | self.device = device 91 | self.midas_model = self.midas_model.to(device, memory_format=torch.channels_last if device == torch.device("cuda") else None) -------------------------------------------------------------------------------- /scripts/deforum_helpers/depth_zoe.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import torch 18 | from zoedepth.models.builder import build_model 19 | from zoedepth.utils.config import get_config 20 | 21 | class ZoeDepth: 22 | def __init__(self, width=512, height=512): 23 | conf = get_config("zoedepth_nk", "infer") 24 | conf.img_size = [width, height] 25 | self.model_zoe = build_model(conf) 26 | self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 27 | self.zoe = self.model_zoe.to(self.DEVICE) 28 | self.width = width 29 | self.height = height 30 | 31 | def predict(self, image): 32 | self.zoe.core.prep.resizer._Resize__width = self.width 33 | self.zoe.core.prep.resizer._Resize__height = self.height 34 | depth_tensor = self.zoe.infer_pil(image, output_type="tensor") 35 | return depth_tensor 36 | 37 | def to(self, device): 38 | self.DEVICE = device 39 | self.zoe = self.model_zoe.to(device) 40 | 41 | def save_raw_depth(self, depth, filepath): 42 | depth.save(filepath, format='PNG', mode='I;16') 43 | 44 | def delete(self): 45 | del self.model_zoe 46 | del self.zoe -------------------------------------------------------------------------------- /scripts/deforum_helpers/human_masking.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import os, cv2 18 | import torch 19 | from pathlib import Path 20 | from multiprocessing import freeze_support 21 | 22 | def extract_frames(input_video_path, output_imgs_path): 23 | # Open the video file 24 | vidcap = cv2.VideoCapture(input_video_path) 25 | 26 | # Get the total number of frames in the video 27 | frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) 28 | 29 | # Create the output directory if it does not exist 30 | os.makedirs(output_imgs_path, exist_ok=True) 31 | 32 | # Extract the frames 33 | for i in range(frame_count): 34 | success, image = vidcap.read() 35 | if success: 36 | cv2.imwrite(os.path.join(output_imgs_path, f"frame{i}.png"), image) 37 | print(f"{frame_count} frames extracted and saved to {output_imgs_path}") 38 | 39 | def video2humanmasks(input_frames_path, output_folder_path, output_type, fps): 40 | # freeze support is needed for video outputting 41 | freeze_support() 42 | 43 | # check if input path exists and is a directory 44 | if not os.path.exists(input_frames_path) or not os.path.isdir(input_frames_path): 45 | raise ValueError("Invalid input path: {}".format(input_frames_path)) 46 | 47 | # check if output path exists and is a directory 48 | if not os.path.exists(output_folder_path) or not os.path.isdir(output_folder_path): 49 | raise ValueError("Invalid output path: {}".format(output_folder_path)) 50 | 51 | # check if output_type is valid 52 | valid_output_types = ["video", "pngs", "both"] 53 | if output_type.lower() not in valid_output_types: 54 | raise ValueError("Invalid output type: {}. Must be one of {}".format(output_type, valid_output_types)) 55 | 56 | # try to predict where torch cache lives, so we can try and fetch models from cache in the next step 57 | predicted_torch_model_cache_path = os.path.join(Path.home(), ".cache", "torch", "hub", "hithereai_RobustVideoMatting_master") 58 | predicted_rvm_cache_testilfe = os.path.join(predicted_torch_model_cache_path, "hubconf.py") 59 | 60 | # try to fetch the models from cache, and only if it can't be find, download from the internet (to enable offline usage) 61 | try: 62 | # Try to fetch the models from cache 63 | convert_video = torch.hub.load(predicted_torch_model_cache_path, "converter", source='local') 64 | model = torch.hub.load(predicted_torch_model_cache_path, "resnet50", source='local').cuda() 65 | except: 66 | # Download from the internet if not found in cache 67 | convert_video = torch.hub.load("hithereai/RobustVideoMatting", "converter") 68 | model = torch.hub.load("hithereai/RobustVideoMatting", "resnet50").cuda() 69 | 70 | output_alpha_vid_path = os.path.join(output_folder_path, "human_masked_video.mp4") 71 | # extract humans masks from the input folder' imgs. 72 | # in this step PNGs will be extracted only if output_type is set to PNGs. Otherwise a video will be made, and in the case of Both, the video will be extracted in the next step to PNGs 73 | convert_video( 74 | model, 75 | input_source=input_frames_path, # full path of the folder that contains all of the extracted input imgs 76 | output_type='video' if output_type.upper() in ("VIDEO", "BOTH") else 'png_sequence', 77 | output_alpha=output_alpha_vid_path if output_type.upper() in ("VIDEO", "BOTH") else output_folder_path, 78 | output_video_mbps=4, 79 | output_video_fps=fps, 80 | downsample_ratio=None, # None for auto 81 | seq_chunk=12, # Process n frames at once for better parallelism 82 | progress=True # show extraction progress 83 | ) 84 | 85 | if output_type.lower() == "both": 86 | extract_frames(output_alpha_vid_path, output_folder_path) 87 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/image_sharpening.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import cv2 18 | import numpy as np 19 | 20 | def unsharp_mask(img, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0, mask=None): 21 | if amount == 0: 22 | return img 23 | # Return a sharpened version of the image, using an unsharp mask. 24 | # If mask is not None, only areas under mask are handled 25 | blurred = cv2.GaussianBlur(img, kernel_size, sigma) 26 | sharpened = float(amount + 1) * img - float(amount) * blurred 27 | sharpened = np.maximum(sharpened, np.zeros(sharpened.shape)) 28 | sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape)) 29 | sharpened = sharpened.round().astype(np.uint8) 30 | if threshold > 0: 31 | low_contrast_mask = np.absolute(img - blurred) < threshold 32 | np.copyto(sharpened, img, where=low_contrast_mask) 33 | if mask is not None: 34 | mask = np.array(mask) 35 | masked_sharpened = cv2.bitwise_and(sharpened, sharpened, mask=mask) 36 | masked_img = cv2.bitwise_and(img, img, mask=255-mask) 37 | sharpened = cv2.add(masked_img, masked_sharpened) 38 | return sharpened -------------------------------------------------------------------------------- /scripts/deforum_helpers/masks.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import os 18 | import cv2 19 | import gc 20 | import numpy as np 21 | from PIL import Image, ImageOps 22 | from .video_audio_utilities import get_frame_name 23 | from .load_images import load_image 24 | 25 | def do_overlay_mask(args, anim_args, img, frame_idx, is_bgr_array=False): 26 | if is_bgr_array: 27 | img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) 28 | img = Image.fromarray(img) 29 | 30 | if anim_args.use_mask_video: 31 | current_mask = Image.open(os.path.join(args.outdir, 'maskframes', get_frame_name(anim_args.video_mask_path) + f"{frame_idx:09}.jpg")) 32 | current_frame = Image.open(os.path.join(args.outdir, 'inputframes', get_frame_name(anim_args.video_init_path) + f"{frame_idx:09}.jpg")) 33 | elif args.use_mask: 34 | current_mask = args.mask_image if args.mask_image is not None else load_image(args.mask_file, None) 35 | if args.init_image is None and args.init_image_box is None: 36 | current_frame = img 37 | else: 38 | current_frame = load_image(args.init_image, args.init_image_box) 39 | 40 | current_mask = current_mask.resize((args.W, args.H), Image.LANCZOS) 41 | current_frame = current_frame.resize((args.W, args.H), Image.LANCZOS) 42 | current_mask = ImageOps.grayscale(current_mask) 43 | 44 | if args.invert_mask: 45 | current_mask = ImageOps.invert(current_mask) 46 | 47 | img = Image.composite(img, current_frame, current_mask) 48 | 49 | if is_bgr_array: 50 | img = np.array(img) 51 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 52 | 53 | del(current_mask, current_frame) 54 | gc.collect() 55 | 56 | return img -------------------------------------------------------------------------------- /scripts/deforum_helpers/noise.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import torch 18 | from torch.nn.functional import interpolate 19 | import numpy as np 20 | from PIL import ImageOps 21 | import math 22 | from .animation import sample_to_cv2 23 | import cv2 24 | from modules.shared import opts 25 | 26 | DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False) 27 | 28 | deforum_noise_gen = torch.Generator(device='cpu') 29 | 30 | # 2D Perlin noise in PyTorch https://gist.github.com/vadimkantorov/ac1b097753f217c5c11bc2ff396e0a57 31 | def rand_perlin_2d(shape, res, fade = lambda t: 6*t**5 - 15*t**4 + 10*t**3): 32 | delta = (res[0] / shape[0], res[1] / shape[1]) 33 | d = (shape[0] // res[0], shape[1] // res[1]) 34 | 35 | grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1]), indexing='ij'), dim = -1) % 1 36 | angles = 2*math.pi*torch.rand(res[0]+1, res[1]+1, generator=deforum_noise_gen) 37 | gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim = -1) 38 | 39 | tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1) 40 | dot = lambda grad, shift: (torch.stack((grid[:shape[0],:shape[1],0] + shift[0], grid[:shape[0],:shape[1], 1] + shift[1] ), dim = -1) * grad[:shape[0], :shape[1]]).sum(dim = -1) 41 | 42 | n00 = dot(tile_grads([0, -1], [0, -1]), [0, 0]) 43 | n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0]) 44 | n01 = dot(tile_grads([0, -1],[1, None]), [0, -1]) 45 | n11 = dot(tile_grads([1, None], [1, None]), [-1,-1]) 46 | t = fade(grid[:shape[0], :shape[1]]) 47 | return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1]) 48 | 49 | def rand_perlin_2d_octaves(shape, res, octaves=1, persistence=0.5): 50 | noise = torch.zeros(shape) 51 | frequency = 1 52 | amplitude = 1 53 | for _ in range(int(octaves)): 54 | noise += amplitude * rand_perlin_2d(shape, (frequency*res[0], frequency*res[1])) 55 | frequency *= 2 56 | amplitude *= persistence 57 | return noise 58 | 59 | def condition_noise_mask(noise_mask, invert_mask = False): 60 | if invert_mask: 61 | noise_mask = ImageOps.invert(noise_mask) 62 | noise_mask = np.array(noise_mask.convert("L")) 63 | noise_mask = noise_mask.astype(np.float32) / 255.0 64 | noise_mask = np.around(noise_mask, decimals=0) 65 | noise_mask = torch.from_numpy(noise_mask) 66 | #noise_mask = torch.round(noise_mask) 67 | return noise_mask 68 | 69 | def add_noise(sample, noise_amt: float, seed: int, noise_type: str, noise_args, noise_mask = None, invert_mask = False): 70 | deforum_noise_gen.manual_seed(seed) # Reproducibility 71 | perlin_w = sample.shape[0] 72 | perlin_h = sample.shape[1] 73 | perlin_w, perlin_h = map(lambda x: x - x % 64, (perlin_w, perlin_h)) # rescale perlin to multiplies of 64 74 | sample2dshape = (perlin_w, perlin_h) 75 | noise = torch.randn((sample.shape[2], perlin_w, perlin_h), generator=deforum_noise_gen) # White noise 76 | if noise_type == 'perlin': 77 | # rand_perlin_2d_octaves is between -1 and 1, so we need to shift it to be between 0 and 1 78 | # print(sample.shape) 79 | noise = noise * ((rand_perlin_2d_octaves(sample2dshape, (int(noise_args[0]), int(noise_args[1])), octaves=noise_args[2], persistence=noise_args[3]) + torch.ones(sample2dshape)) / 2) 80 | noise = interpolate(noise.unsqueeze(1), size=(sample.shape[0], sample.shape[1])).squeeze(1) # rescale perlin back to the target resolution 81 | if noise_mask is not None: 82 | noise_mask = condition_noise_mask(noise_mask, invert_mask) 83 | noise_to_add = sample_to_cv2(noise * noise_mask) 84 | else: 85 | noise_to_add = sample_to_cv2(noise) 86 | sample = cv2.addWeighted(sample, 1-noise_amt, noise_to_add, noise_amt, 0) 87 | 88 | return sample 89 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/opts_overrider.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any, Dict 3 | from modules.shared import opts 4 | 5 | log = logging.getLogger(__name__) 6 | 7 | class A1111OptionsOverrider(object): 8 | def __init__(self, opts_overrides: Dict[str, Any]): 9 | self.opts_overrides = opts_overrides 10 | 11 | def __enter__(self): 12 | if self.opts_overrides is not None and len(self.opts_overrides)>0: 13 | self.original_opts = {k: opts.data[k] for k in self.opts_overrides.keys() if k in opts.data} 14 | log.debug(f"Captured options to override: {self.original_opts}") 15 | log.info(f"Setting options: {self.opts_overrides}") 16 | for k, v in self.opts_overrides.items(): 17 | setattr(opts, k, v) 18 | else: 19 | self.original_opts = None 20 | return self 21 | 22 | def __exit__(self, exception_type, exception_value, traceback): 23 | if (exception_type is not None): 24 | log.warning(f"Error during batch execution: {exception_type} - {exception_value}") 25 | log.debug(f"{traceback}") 26 | if (self.original_opts is not None): 27 | log.info(f"Restoring options: {self.original_opts}") 28 | for k, v in self.original_opts.items(): 29 | setattr(opts, k, v) 30 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/resume.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import os 18 | import cv2 19 | from modules.shared import opts 20 | 21 | # Resume requires at least two actual frames in order to work 22 | # 'Actual' frames are defined as frames that go through generation 23 | # - Can't resume from a single frame. 24 | # - If you have a cadence of 10, you need at least 10 frames in order to resume. 25 | # - Resume grabs the last actual frame and the 2nd to last actual frame 26 | # in order to work with cadence properly and feed it the prev_img/next_img 27 | 28 | def get_resume_vars(folder, timestring, cadence): 29 | DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False) 30 | # count previous frames 31 | frame_count = 0 32 | for item in os.listdir(folder): 33 | # don't count txt files or mp4 files 34 | if ".txt" in item or ".mp4" in item: 35 | pass 36 | else: 37 | filename = item.split("_") 38 | # other image file types may be supported in the future, 39 | # so we just count files containing timestring 40 | # that don't contain the depth keyword (depth maps are saved in same folder) 41 | if timestring in filename and "depth" not in filename: 42 | frame_count += 1 43 | # add this to debugging var 44 | if DEBUG_MODE: 45 | print(f"\033[36mResuming:\033[0m File: {filename}") 46 | 47 | print(f"\033[36mResuming:\033[0m Current frame count: {frame_count}") 48 | 49 | # get last frame from frame count corrected for any trailing cadence frames 50 | last_frame = frame_count - (frame_count % cadence) 51 | 52 | # calculate previous actual frame 53 | prev_frame = last_frame - cadence 54 | 55 | # calculate next actual frame 56 | next_frame = last_frame - 1 57 | 58 | # get prev_img/next_img from prev/next frame index (files start at 0, so subtract 1 for index var) 59 | path = os.path.join(folder, f"{timestring}_{prev_frame:09}.png") 60 | prev_img = cv2.imread(path) 61 | path = os.path.join(folder, f"{timestring}_{next_frame:09}.png") 62 | next_img = cv2.imread(path) 63 | 64 | # report resume last/next in console 65 | print(f"\033[36mResuming:\033[0m Last frame: {prev_frame} - Next frame: {next_frame} ") 66 | 67 | # returns: 68 | # last frame count, accounting for cadence 69 | # next frame count, accounting for cadence 70 | # prev frame's image cv2 BGR 71 | # next frame's image cv2 BGR 72 | return prev_frame, next_frame, prev_img, next_img 73 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/rich.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | from rich.console import Console 18 | console = Console() -------------------------------------------------------------------------------- /scripts/deforum_helpers/save_images.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import os 18 | import cv2 19 | import gc 20 | import time 21 | 22 | def get_output_folder(output_path, batch_folder): 23 | out_path = os.path.join(output_path,time.strftime('%Y-%m')) 24 | if batch_folder != "": 25 | out_path = os.path.join(out_path, batch_folder) 26 | os.makedirs(out_path, exist_ok=True) 27 | return out_path 28 | 29 | def save_image(image, image_type, filename, args, video_args, root): 30 | if video_args.store_frames_in_ram: 31 | root.frames_cache.append({'path':os.path.join(args.outdir, filename), 'image':image, 'image_type':image_type}) 32 | else: 33 | image.save(os.path.join(args.outdir, filename)) 34 | 35 | def reset_frames_cache(root): 36 | root.frames_cache = [] 37 | gc.collect() 38 | 39 | def dump_frames_cache(root): 40 | for image_cache in root.frames_cache: 41 | if image_cache['image_type'] == 'cv2': 42 | cv2.imwrite(image_cache['path'], image_cache['image']) 43 | elif image_cache['image_type'] == 'PIL': 44 | image_cache['image'].save(image_cache['path']) 45 | # do not reset the cache since we're going to add frame erasing later function #TODO 46 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/seed.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import random 18 | 19 | def next_seed(args, root): 20 | if args.seed_behavior == 'iter': 21 | args.seed += 1 if root.seed_internal % args.seed_iter_N == 0 else 0 22 | root.seed_internal += 1 23 | elif args.seed_behavior == 'ladder': 24 | args.seed += 2 if root.seed_internal == 0 else -1 25 | root.seed_internal = 1 if root.seed_internal == 0 else 0 26 | elif args.seed_behavior == 'alternate': 27 | args.seed += 1 if root.seed_internal == 0 else -1 28 | root.seed_internal = 1 if root.seed_internal == 0 else 0 29 | elif args.seed_behavior == 'fixed': 30 | pass # always keep seed the same 31 | else: 32 | args.seed = random.randint(0, 2**32 - 1) 33 | return args.seed 34 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/adabins/__init__.py: -------------------------------------------------------------------------------- 1 | from .unet_adaptive_bins import UnetAdaptiveBins 2 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/adabins/layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class PatchTransformerEncoder(nn.Module): 6 | def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4): 7 | super(PatchTransformerEncoder, self).__init__() 8 | encoder_layers = nn.TransformerEncoderLayer(embedding_dim, num_heads, dim_feedforward=1024) 9 | self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=4) # takes shape S,N,E 10 | 11 | self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim, 12 | kernel_size=patch_size, stride=patch_size, padding=0) 13 | 14 | self.positional_encodings = nn.Parameter(torch.rand(500, embedding_dim), requires_grad=True) 15 | 16 | def forward(self, x): 17 | embeddings = self.embedding_convPxP(x).flatten(2) # .shape = n,c,s = n, embedding_dim, s 18 | # embeddings = nn.functional.pad(embeddings, (1,0)) # extra special token at start ? 19 | embeddings = embeddings + self.positional_encodings[:embeddings.shape[2], :].T.unsqueeze(0) 20 | 21 | # change to S,N,E format required by transformer 22 | embeddings = embeddings.permute(2, 0, 1) 23 | x = self.transformer_encoder(embeddings) # .shape = S, N, E 24 | return x 25 | 26 | 27 | class PixelWiseDotProduct(nn.Module): 28 | def __init__(self): 29 | super(PixelWiseDotProduct, self).__init__() 30 | 31 | def forward(self, x, K): 32 | n, c, h, w = x.size() 33 | _, cout, ck = K.size() 34 | assert c == ck, "Number of channels in x and Embedding dimension (at dim 2) of K matrix must match" 35 | y = torch.matmul(x.view(n, c, h * w).permute(0, 2, 1), K.permute(0, 2, 1)) # .shape = n, hw, cout 36 | return y.permute(0, 2, 1).view(n, cout, h, w) 37 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/adabins/miniViT.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .layers import PatchTransformerEncoder, PixelWiseDotProduct 5 | 6 | 7 | class mViT(nn.Module): 8 | def __init__(self, in_channels, n_query_channels=128, patch_size=16, dim_out=256, 9 | embedding_dim=128, num_heads=4, norm='linear'): 10 | super(mViT, self).__init__() 11 | self.norm = norm 12 | self.n_query_channels = n_query_channels 13 | self.patch_transformer = PatchTransformerEncoder(in_channels, patch_size, embedding_dim, num_heads) 14 | self.dot_product_layer = PixelWiseDotProduct() 15 | 16 | self.conv3x3 = nn.Conv2d(in_channels, embedding_dim, kernel_size=3, stride=1, padding=1) 17 | self.regressor = nn.Sequential(nn.Linear(embedding_dim, 256), 18 | nn.LeakyReLU(), 19 | nn.Linear(256, 256), 20 | nn.LeakyReLU(), 21 | nn.Linear(256, dim_out)) 22 | 23 | def forward(self, x): 24 | # n, c, h, w = x.size() 25 | tgt = self.patch_transformer(x.clone()) # .shape = S, N, E 26 | 27 | x = self.conv3x3(x) 28 | 29 | regression_head, queries = tgt[0, ...], tgt[1:self.n_query_channels + 1, ...] 30 | 31 | # Change from S, N, E to N, S, E 32 | queries = queries.permute(1, 0, 2) 33 | range_attention_maps = self.dot_product_layer(x, queries) # .shape = n, n_query_channels, h, w 34 | 35 | y = self.regressor(regression_head) # .shape = N, dim_out 36 | if self.norm == 'linear': 37 | y = torch.relu(y) 38 | eps = 0.1 39 | y = y + eps 40 | elif self.norm == 'softmax': 41 | return torch.softmax(y, dim=1), range_attention_maps 42 | else: 43 | y = torch.sigmoid(y) 44 | y = y / y.sum(dim=1, keepdim=True) 45 | return y, range_attention_maps 46 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | This license does not apply to the model weights. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/Quickstart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "import requests\n", 11 | "\n", 12 | "! wget https://owncloud.gwdg.de/index.php/s/ioHbRzFx6th32hn/download -O weights.zip\n", 13 | "! unzip -d weights -j weights.zip\n", 14 | "from models.clipseg import CLIPDensePredT\n", 15 | "from PIL import Image\n", 16 | "from torchvision import transforms\n", 17 | "from matplotlib import pyplot as plt\n", 18 | "\n", 19 | "# load model\n", 20 | "model = CLIPDensePredT(version='ViT-B/16', reduce_dim=64)\n", 21 | "model.eval();\n", 22 | "\n", 23 | "# non-strict, because we only stored decoder weights (not CLIP weights)\n", 24 | "model.load_state_dict(torch.load('weights/rd64-uni.pth', map_location=torch.device('cpu')), strict=False);" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "Load and normalize `example_image.jpg`. You can also load through an URL." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# load and normalize image\n", 41 | "input_image = Image.open('example_image.jpg')\n", 42 | "\n", 43 | "# or load from URL...\n", 44 | "# image_url = 'https://farm5.staticflickr.com/4141/4856248695_03475782dc_z.jpg'\n", 45 | "# input_image = Image.open(requests.get(image_url, stream=True).raw)\n", 46 | "\n", 47 | "transform = transforms.Compose([\n", 48 | " transforms.ToTensor(),\n", 49 | " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", 50 | " transforms.Resize((352, 352)),\n", 51 | "])\n", 52 | "img = transform(input_image).unsqueeze(0)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "Predict and visualize (this might take a few seconds if running without GPU support)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "prompts = ['a glass', 'something to fill', 'wood', 'a jar']\n", 69 | "\n", 70 | "# predict\n", 71 | "with torch.no_grad():\n", 72 | " preds = model(img.repeat(4,1,1,1), prompts)[0]\n", 73 | "\n", 74 | "# visualize prediction\n", 75 | "_, ax = plt.subplots(1, 5, figsize=(15, 4))\n", 76 | "[a.axis('off') for a in ax.flatten()]\n", 77 | "ax[0].imshow(input_image)\n", 78 | "[ax[i+1].imshow(torch.sigmoid(preds[i][0])) for i in range(4)];\n", 79 | "[ax[i+1].text(0, -15, prompts[i]) for i in range(4)];" 80 | ] 81 | } 82 | ], 83 | "metadata": { 84 | "interpreter": { 85 | "hash": "800ed241f7db2bd3aa6942aa3be6809cdb30ee6b0a9e773dfecfa9fef1f4c586" 86 | }, 87 | "kernelspec": { 88 | "display_name": "Python 3", 89 | "language": "python", 90 | "name": "python3" 91 | }, 92 | "language_info": { 93 | "codemirror_mode": { 94 | "name": "ipython", 95 | "version": 3 96 | }, 97 | "file_extension": ".py", 98 | "mimetype": "text/x-python", 99 | "name": "python", 100 | "nbconvert_exporter": "python", 101 | "pygments_lexer": "ipython3", 102 | "version": "3.8.10" 103 | } 104 | }, 105 | "nbformat": 4, 106 | "nbformat_minor": 4 107 | } 108 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/Readme.md: -------------------------------------------------------------------------------- 1 | # Image Segmentation Using Text and Image Prompts 2 | This repository contains the code used in the paper ["Image Segmentation Using Text and Image Prompts"](https://arxiv.org/abs/2112.10003). 3 | 4 | **The Paper has been accepted to CVPR 2022!** 5 | 6 | drawing 7 | 8 | The systems allows to create segmentation models without training based on: 9 | - An arbitrary text query 10 | - Or an image with a mask highlighting stuff or an object. 11 | 12 | ### Quick Start 13 | 14 | In the `Quickstart.ipynb` notebook we provide the code for using a pre-trained CLIPSeg model. If you run the notebook locally, make sure you downloaded the `rd64-uni.pth` weights, either manually or via git lfs extension. 15 | It can also be used interactively using [MyBinder](https://mybinder.org/v2/gh/timojl/clipseg/HEAD?labpath=Quickstart.ipynb) 16 | (please note that the VM does not use a GPU, thus inference takes a few seconds). 17 | 18 | 19 | ### Dependencies 20 | This code base depends on pytorch, torchvision and clip (`pip install git+https://github.com/openai/CLIP.git`). 21 | Additional dependencies are hidden for double blind review. 22 | 23 | 24 | ### Datasets 25 | 26 | * `PhraseCut` and `PhraseCutPlus`: Referring expression dataset 27 | * `PFEPascalWrapper`: Wrapper class for PFENet's Pascal-5i implementation 28 | * `PascalZeroShot`: Wrapper class for PascalZeroShot 29 | * `COCOWrapper`: Wrapper class for COCO. 30 | 31 | ### Models 32 | 33 | * `CLIPDensePredT`: CLIPSeg model with transformer-based decoder. 34 | * `ViTDensePredT`: CLIPSeg model with transformer-based decoder. 35 | 36 | ### Third Party Dependencies 37 | For some of the datasets third party dependencies are required. Run the following commands in the `third_party` folder. 38 | ```bash 39 | git clone https://github.com/cvlab-yonsei/JoEm 40 | git clone https://github.com/Jia-Research-Lab/PFENet.git 41 | git clone https://github.com/ChenyunWu/PhraseCutDataset.git 42 | git clone https://github.com/juhongm999/hsnet.git 43 | ``` 44 | 45 | ### Weights 46 | 47 | The MIT license does not apply to these weights. 48 | 49 | We provide two model weights, for D=64 (4.1MB) and D=16 (1.1MB). 50 | ``` 51 | wget https://owncloud.gwdg.de/index.php/s/ioHbRzFx6th32hn/download -O weights.zip 52 | unzip -d weights -j weights.zip 53 | ``` 54 | 55 | 56 | ### Training and Evaluation 57 | 58 | To train use the `training.py` script with experiment file and experiment id parameters. E.g. `python training.py phrasecut.yaml 0` will train the first phrasecut experiment which is defined by the `configuration` and first `individual_configurations` parameters. Model weights will be written in `logs/`. 59 | 60 | For evaluation use `score.py`. E.g. `python score.py phrasecut.yaml 0 0` will train the first phrasecut experiment of `test_configuration` and the first configuration in `individual_configurations`. 61 | 62 | 63 | ### Usage of PFENet Wrappers 64 | 65 | In order to use the dataset and model wrappers for PFENet, the PFENet repository needs to be cloned to the root folder. 66 | `git clone https://github.com/Jia-Research-Lab/PFENet.git ` 67 | 68 | 69 | ### License 70 | 71 | The source code files in this repository (excluding model weights) are released under MIT license. 72 | 73 | ### Citation 74 | ``` 75 | @InProceedings{lueddecke22_cvpr, 76 | author = {L\"uddecke, Timo and Ecker, Alexander}, 77 | title = {Image Segmentation Using Text and Image Prompts}, 78 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 79 | month = {June}, 80 | year = {2022}, 81 | pages = {7086-7096} 82 | } 83 | 84 | ``` 85 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/datasets/coco_wrapper.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from types import new_class 3 | import torch 4 | import numpy as np 5 | import os 6 | import json 7 | 8 | from os.path import join, dirname, isdir, isfile, expanduser, realpath, basename 9 | from random import shuffle, seed as set_seed 10 | from PIL import Image 11 | 12 | from itertools import combinations 13 | from torchvision import transforms 14 | from torchvision.transforms.transforms import Resize 15 | 16 | from datasets.utils import blend_image_segmentation 17 | from general_utils import get_from_repository 18 | 19 | COCO_CLASSES = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'} 20 | 21 | class COCOWrapper(object): 22 | 23 | def __init__(self, split, fold=0, image_size=400, aug=None, mask='separate', negative_prob=0, 24 | with_class_label=False): 25 | super().__init__() 26 | 27 | self.mask = mask 28 | self.with_class_label = with_class_label 29 | self.negative_prob = negative_prob 30 | 31 | from third_party.hsnet.data.coco import DatasetCOCO 32 | 33 | get_from_repository('COCO-20i', ['COCO-20i.tar']) 34 | 35 | foldpath = join(dirname(__file__), '../third_party/hsnet/data/splits/coco/%s/fold%d.pkl') 36 | 37 | def build_img_metadata_classwise(self): 38 | with open(foldpath % (self.split, self.fold), 'rb') as f: 39 | img_metadata_classwise = pickle.load(f) 40 | return img_metadata_classwise 41 | 42 | 43 | DatasetCOCO.build_img_metadata_classwise = build_img_metadata_classwise 44 | # DatasetCOCO.read_mask = read_mask 45 | 46 | mean = [0.485, 0.456, 0.406] 47 | std = [0.229, 0.224, 0.225] 48 | transform = transforms.Compose([ 49 | transforms.Resize((image_size, image_size)), 50 | transforms.ToTensor(), 51 | transforms.Normalize(mean, std) 52 | ]) 53 | 54 | self.coco = DatasetCOCO(expanduser('~/datasets/COCO-20i/'), fold, transform, split, 1, False) 55 | 56 | self.all_classes = [self.coco.class_ids] 57 | self.coco.base_path = join(expanduser('~/datasets/COCO-20i')) 58 | 59 | def __len__(self): 60 | return len(self.coco) 61 | 62 | def __getitem__(self, i): 63 | sample = self.coco[i] 64 | 65 | label_name = COCO_CLASSES[int(sample['class_id'])] 66 | 67 | img_s, seg_s = sample['support_imgs'][0], sample['support_masks'][0] 68 | 69 | if self.negative_prob > 0 and torch.rand(1).item() < self.negative_prob: 70 | new_class_id = sample['class_id'] 71 | while new_class_id == sample['class_id']: 72 | sample2 = self.coco[torch.randint(0, len(self), (1,)).item()] 73 | new_class_id = sample2['class_id'] 74 | img_s = sample2['support_imgs'][0] 75 | seg_s = torch.zeros_like(seg_s) 76 | 77 | mask = self.mask 78 | if mask == 'separate': 79 | supp = (img_s, seg_s) 80 | elif mask == 'text_label': 81 | # DEPRECATED 82 | supp = [int(sample['class_id'])] 83 | elif mask == 'text': 84 | supp = [label_name] 85 | else: 86 | if mask.startswith('text_and_'): 87 | mask = mask[9:] 88 | label_add = [label_name] 89 | else: 90 | label_add = [] 91 | 92 | supp = label_add + blend_image_segmentation(img_s, seg_s, mode=mask) 93 | 94 | if self.with_class_label: 95 | label = (torch.zeros(0), sample['class_id'],) 96 | else: 97 | label = (torch.zeros(0), ) 98 | 99 | return (sample['query_img'],) + tuple(supp), (sample['query_mask'].unsqueeze(0),) + label -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/datasets/pascal_classes.json: -------------------------------------------------------------------------------- 1 | [{"id": 1, "synonyms": ["aeroplane"]}, {"id": 2, "synonyms": ["bicycle"]}, {"id": 3, "synonyms": ["bird"]}, {"id": 4, "synonyms": ["boat"]}, {"id": 5, "synonyms": ["bottle"]}, {"id": 6, "synonyms": ["bus"]}, {"id": 7, "synonyms": ["car"]}, {"id": 8, "synonyms": ["cat"]}, {"id": 9, "synonyms": ["chair"]}, {"id": 10, "synonyms": ["cow"]}, {"id": 11, "synonyms": ["diningtable"]}, {"id": 12, "synonyms": ["dog"]}, {"id": 13, "synonyms": ["horse"]}, {"id": 14, "synonyms": ["motorbike"]}, {"id": 15, "synonyms": ["person"]}, {"id": 16, "synonyms": ["pottedplant"]}, {"id": 17, "synonyms": ["sheep"]}, {"id": 18, "synonyms": ["sofa"]}, {"id": 19, "synonyms": ["train"]}, {"id": 20, "synonyms": ["tvmonitor"]}] -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/datasets/pascal_zeroshot.py: -------------------------------------------------------------------------------- 1 | from os.path import expanduser 2 | import torch 3 | import json 4 | import torchvision 5 | from general_utils import get_from_repository 6 | from general_utils import log 7 | from torchvision import transforms 8 | 9 | PASCAL_VOC_CLASSES_ZS = [['cattle.n.01', 'motorcycle.n.01'], ['aeroplane.n.01', 'sofa.n.01'], 10 | ['cat.n.01', 'television.n.03'], ['train.n.01', 'bottle.n.01'], 11 | ['chair.n.01', 'pot_plant.n.01']] 12 | 13 | 14 | class PascalZeroShot(object): 15 | 16 | def __init__(self, split, n_unseen, image_size=224) -> None: 17 | super().__init__() 18 | 19 | import sys 20 | sys.path.append('third_party/JoEm') 21 | from third_party.JoEm.data_loader.dataset import VOCSegmentation 22 | from third_party.JoEm.data_loader import get_seen_idx, get_unseen_idx, VOC 23 | 24 | self.pascal_classes = VOC 25 | self.image_size = image_size 26 | 27 | self.transform = transforms.Compose([ 28 | transforms.Resize((image_size, image_size)), 29 | ]) 30 | 31 | if split == 'train': 32 | self.voc = VOCSegmentation(get_unseen_idx(n_unseen), get_seen_idx(n_unseen), 33 | split=split, transform=True, transform_args=dict(base_size=312, crop_size=312), 34 | ignore_bg=False, ignore_unseen=False, remv_unseen_img=True) 35 | elif split == 'val': 36 | self.voc = VOCSegmentation(get_unseen_idx(n_unseen), get_seen_idx(n_unseen), 37 | split=split, transform=False, 38 | ignore_bg=False, ignore_unseen=False) 39 | 40 | self.unseen_idx = get_unseen_idx(n_unseen) 41 | 42 | def __len__(self): 43 | return len(self.voc) 44 | 45 | def __getitem__(self, i): 46 | 47 | sample = self.voc[i] 48 | label = sample['label'].long() 49 | all_labels = [l for l in torch.where(torch.bincount(label.flatten())>0)[0].numpy().tolist() if l != 255] 50 | class_indices = [l for l in all_labels] 51 | class_names = [self.pascal_classes[l] for l in all_labels] 52 | 53 | image = self.transform(sample['image']) 54 | 55 | label = transforms.Resize((self.image_size, self.image_size), 56 | interpolation=torchvision.transforms.InterpolationMode.NEAREST)(label.unsqueeze(0))[0] 57 | 58 | return (image,), (label, ) 59 | 60 | 61 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/datasets/utils.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import torch 4 | 5 | 6 | def blend_image_segmentation(img, seg, mode, image_size=224): 7 | 8 | 9 | if mode in {'blur_highlight', 'blur3_highlight', 'blur3_highlight01', 'blur_highlight_random', 'crop'}: 10 | if isinstance(img, np.ndarray): 11 | img = torch.from_numpy(img) 12 | 13 | if isinstance(seg, np.ndarray): 14 | seg = torch.from_numpy(seg) 15 | 16 | if mode == 'overlay': 17 | out = img * seg 18 | out = [out.astype('float32')] 19 | elif mode == 'highlight': 20 | out = img * seg[None, :, :] * 0.85 + 0.15 * img 21 | out = [out.astype('float32')] 22 | elif mode == 'highlight2': 23 | img = img / 2 24 | out = (img+0.1) * seg[None, :, :] + 0.3 * img 25 | out = [out.astype('float32')] 26 | elif mode == 'blur_highlight': 27 | from evaluation_utils import img_preprocess 28 | out = [img_preprocess((None, [img], [seg]), blur=1, bg_fac=0.5).numpy()[0] - 0.01] 29 | elif mode == 'blur3_highlight': 30 | from evaluation_utils import img_preprocess 31 | out = [img_preprocess((None, [img], [seg]), blur=3, bg_fac=0.5).numpy()[0] - 0.01] 32 | elif mode == 'blur3_highlight01': 33 | from evaluation_utils import img_preprocess 34 | out = [img_preprocess((None, [img], [seg]), blur=3, bg_fac=0.1).numpy()[0] - 0.01] 35 | elif mode == 'blur_highlight_random': 36 | from evaluation_utils import img_preprocess 37 | out = [img_preprocess((None, [img], [seg]), blur=0 + torch.randint(0, 3, (1,)).item(), bg_fac=0.1 + 0.8*torch.rand(1).item()).numpy()[0] - 0.01] 38 | elif mode == 'crop': 39 | from evaluation_utils import img_preprocess 40 | out = [img_preprocess((None, [img], [seg]), blur=1, center_context=0.1, image_size=image_size)[0].numpy()] 41 | elif mode == 'crop_blur_highlight': 42 | from evaluation_utils import img_preprocess 43 | out = [img_preprocess((None, [img], [seg]), blur=3, center_context=0.1, bg_fac=0.1, image_size=image_size)[0].numpy()] 44 | elif mode == 'crop_blur_highlight352': 45 | from evaluation_utils import img_preprocess 46 | out = [img_preprocess((None, [img], [seg]), blur=3, center_context=0.1, bg_fac=0.1, image_size=352)[0].numpy()] 47 | elif mode == 'shape': 48 | out = [np.stack([seg[:, :]]*3).astype('float32')] 49 | elif mode == 'concat': 50 | out = [np.concatenate([img, seg[None, :, :]]).astype('float32')] 51 | elif mode == 'image_only': 52 | out = [img.astype('float32')] 53 | elif mode == 'image_black': 54 | out = [img.astype('float32')*0] 55 | elif mode is None: 56 | out = [img.astype('float32')] 57 | elif mode == 'separate': 58 | out = [img.astype('float32'), seg.astype('int64')] 59 | elif mode == 'separate_img_black': 60 | out = [img.astype('float32')*0, seg.astype('int64')] 61 | elif mode == 'separate_seg_ones': 62 | out = [img.astype('float32'), np.ones_like(seg).astype('int64')] 63 | elif mode == 'separate_both_black': 64 | out = [img.astype('float32')*0, seg.astype('int64')*0] 65 | else: 66 | raise ValueError(f'invalid mode: {mode}') 67 | 68 | return out -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/environment.yml: -------------------------------------------------------------------------------- 1 | name: clipseg-environment 2 | channels: 3 | - conda-forge 4 | - pytorch 5 | dependencies: 6 | - numpy 7 | - scipy 8 | - matplotlib-base 9 | - pip 10 | - pip: 11 | - --find-links https://download.pytorch.org/whl/torch_stable.html 12 | - torch==1.10.0+cpu 13 | - torchvision==0.11.1+cpu 14 | - opencv-python 15 | - git+https://github.com/openai/CLIP.git 16 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/example_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deforum-art/sd-webui-deforum/5d63a339dbec8d476657a1f672a4eeb6dc79ed37/scripts/deforum_helpers/src/clipseg/example_image.jpg -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/experiments/ablation.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | batch_size: 64 3 | optimizer: torch.optim.AdamW 4 | 5 | lr: 0.001 6 | 7 | trainer: experiment_setup.train_loop 8 | scorer: experiment_setup.score 9 | model: models.clipseg.CLIPDensePredT 10 | 11 | lr_scheduler: cosine 12 | T_max: 20000 13 | eta_min: 0.0001 14 | 15 | max_iterations: 20000 # <-########################################## 16 | val_interval: null 17 | 18 | # dataset 19 | dataset: datasets.phrasecut.PhraseCut # <----------------- 20 | split_mode: pascal_test 21 | split: train 22 | mask: text_and_crop_blur_highlight352 23 | image_size: 352 24 | negative_prob: 0.2 25 | mix_text_max: 0.5 26 | 27 | # general 28 | mix: True # <----------------- 29 | prompt: shuffle+ 30 | norm_cond: True 31 | mix_text_min: 0.0 32 | with_visual: True 33 | 34 | # model 35 | version: 'ViT-B/16' 36 | extract_layers: [3, 7, 9] 37 | reduce_dim: 64 38 | depth: 3 39 | fix_shift: False # <-########################################## 40 | 41 | loss: torch.nn.functional.binary_cross_entropy_with_logits 42 | amp: True 43 | 44 | test_configuration_common: 45 | normalize: True 46 | image_size: 352 47 | batch_size: 32 48 | sigmoid: True 49 | split: test 50 | label_support: True 51 | 52 | test_configuration: 53 | 54 | - 55 | name: pc 56 | metric: metrics.FixedIntervalMetrics 57 | test_dataset: phrasecut 58 | mask: text 59 | 60 | - 61 | name: pc-vis 62 | metric: metrics.FixedIntervalMetrics 63 | test_dataset: phrasecut 64 | mask: crop_blur_highlight352 65 | with_visual: True 66 | visual_only: True 67 | 68 | 69 | columns: [name, 70 | pc_fgiou_best, pc_miou_best, pc_fgiou_0.5, 71 | pc-vis_fgiou_best, pc-vis_miou_best, pc-vis_fgiou_0.5, 72 | duration] 73 | 74 | 75 | individual_configurations: 76 | 77 | - {name: rd64-uni} 78 | - {name: rd64-no-pretrain, not_pretrained: True, lr: 0.0003} 79 | - {name: rd64-no-negatives, negative_prob: 0.0} 80 | - {name: rd64-neg0.5, negative_prob: 0.5} 81 | - {name: rd64-no-visual, with_visual: False, mix: False} 82 | - {name: rd16-uni, reduce_dim: 16} 83 | - {name: rd64-layer3, extract_layers: [3], depth: 1} 84 | - {name: rd64-blur-highlight, mask: text_and_blur_highlight, test_configuration: {mask: blur_highlight}} -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/experiments/pascal_1shot.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | batch_size: 64 3 | optimizer: torch.optim.AdamW 4 | 5 | lr: 0.001 6 | 7 | trainer: experiment_setup.train_loop 8 | scorer: experiment_setup.score 9 | model: models.clipseg.CLIPDensePredT 10 | 11 | lr_scheduler: cosine 12 | T_max: 20000 13 | eta_min: 0.0001 14 | 15 | max_iterations: 20000 # <-########################################## 16 | val_interval: null 17 | 18 | # dataset 19 | dataset: datasets.phrasecut.PhraseCut 20 | split_mode: pascal_test 21 | mode: train 22 | mask: text_and_crop_blur_highlight352 23 | image_size: 352 24 | normalize: True 25 | pre_crop_image_size: [sample, 1, 1.5] 26 | aug: 1new 27 | with_visual: True 28 | split: train 29 | 30 | # general 31 | mix: True 32 | prompt: shuffle+ 33 | norm_cond: True 34 | mix_text_min: 0.0 35 | 36 | # model 37 | out: 1 38 | version: 'ViT-B/16' 39 | extract_layers: [3, 7, 9] 40 | reduce_dim: 64 41 | depth: 3 42 | 43 | loss: torch.nn.functional.binary_cross_entropy_with_logits 44 | amp: True 45 | 46 | test_configuration_common: 47 | normalize: True 48 | image_size: 352 49 | metric: metrics.FixedIntervalMetrics 50 | batch_size: 1 51 | test_dataset: pascal 52 | sigmoid: True 53 | # max_iterations: 250 54 | 55 | test_configuration: 56 | 57 | - 58 | name: pas_t 59 | mask: text 60 | 61 | - 62 | name: pas_h 63 | mask: blur3_highlight01 64 | 65 | - 66 | name: pas_h2 67 | mask: crop_blur_highlight352 68 | 69 | 70 | columns: [name, 71 | pas_t_fgiou_best, pas_t_miou_best, pas_t_fgiou_ct, 72 | pas_h_fgiou_best, pas_h_miou_best, pas_h_fgiou_ct, 73 | pas_h2_fgiou_best, pas_h2_miou_best, pas_h2_fgiou_ct, pas_h2_fgiou_best_t, 74 | train_loss, duration, date 75 | ] 76 | 77 | individual_configurations: 78 | 79 | - {name: rd64-uni-phrasepas5i-0, remove_classes: [pas5i, 0], negative_prob: 0.2, mix_text_max: 0.5, test_configuration: {splits: [0], custom_threshold: 0.24}} 80 | - {name: rd64-uni-phrasepas5i-1, remove_classes: [pas5i, 1], negative_prob: 0.2, mix_text_max: 0.5, test_configuration: {splits: [1], custom_threshold: 0.24}} 81 | - {name: rd64-uni-phrasepas5i-2, remove_classes: [pas5i, 2], negative_prob: 0.2, mix_text_max: 0.5, test_configuration: {splits: [2], custom_threshold: 0.24}} 82 | - {name: rd64-uni-phrasepas5i-3, remove_classes: [pas5i, 3], negative_prob: 0.2, mix_text_max: 0.5, test_configuration: {splits: [3], custom_threshold: 0.24}} 83 | 84 | 85 | - {name: rd64-phrasepas5i-0, remove_classes: [pas5i, 0], negative_prob: 0.0, test_configuration: {splits: [0], custom_threshold: 0.28}} 86 | - {name: rd64-phrasepas5i-1, remove_classes: [pas5i, 1], negative_prob: 0.0, test_configuration: {splits: [1], custom_threshold: 0.28}} 87 | - {name: rd64-phrasepas5i-2, remove_classes: [pas5i, 2], negative_prob: 0.0, test_configuration: {splits: [2], custom_threshold: 0.28}} 88 | - {name: rd64-phrasepas5i-3, remove_classes: [pas5i, 3], negative_prob: 0.0, test_configuration: {splits: [3], custom_threshold: 0.28}} 89 | 90 | 91 | # baseline 92 | - {name: bl64-phrasepas5i-0, model: models.clipseg.CLIPDenseBaseline, remove_classes: [pas5i, 0], reduce2_dim: 64, negative_prob: 0.0, test_configuration: {splits: [0], custom_threshold: 0.24}} 93 | - {name: bl64-phrasepas5i-1, model: models.clipseg.CLIPDenseBaseline, remove_classes: [pas5i, 1], reduce2_dim: 64, negative_prob: 0.0, test_configuration: {splits: [1], custom_threshold: 0.24}} 94 | - {name: bl64-phrasepas5i-2, model: models.clipseg.CLIPDenseBaseline, remove_classes: [pas5i, 2], reduce2_dim: 64, negative_prob: 0.0, test_configuration: {splits: [2], custom_threshold: 0.24}} 95 | - {name: bl64-phrasepas5i-3, model: models.clipseg.CLIPDenseBaseline, remove_classes: [pas5i, 3], reduce2_dim: 64, negative_prob: 0.0, test_configuration: {splits: [3], custom_threshold: 0.24}} 96 | 97 | # ViT 98 | - {name: vit64-uni-phrasepas5i-0, remove_classes: [pas5i, 0], model: models.vitseg.VITDensePredT, negative_prob: 0.2, mix_text_max: 0.5, lr: 0.0001, test_configuration: {splits: [0], custom_threshold: 0.02}} 99 | - {name: vit64-uni-phrasepas5i-1, remove_classes: [pas5i, 1], model: models.vitseg.VITDensePredT, negative_prob: 0.2, mix_text_max: 0.5, lr: 0.0001, test_configuration: {splits: [1], custom_threshold: 0.02}} 100 | - {name: vit64-uni-phrasepas5i-2, remove_classes: [pas5i, 2], model: models.vitseg.VITDensePredT, negative_prob: 0.2, mix_text_max: 0.5, lr: 0.0001, test_configuration: {splits: [2], custom_threshold: 0.02}} 101 | - {name: vit64-uni-phrasepas5i-3, remove_classes: [pas5i, 3], model: models.vitseg.VITDensePredT, negative_prob: 0.2, mix_text_max: 0.5, lr: 0.0001, test_configuration: {splits: [3], custom_threshold: 0.02}} 102 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/experiments/phrasecut.yaml: -------------------------------------------------------------------------------- 1 | configuration: 2 | batch_size: 64 3 | optimizer: torch.optim.AdamW 4 | 5 | lr: 0.001 6 | 7 | trainer: experiment_setup.train_loop 8 | scorer: experiment_setup.score 9 | model: models.clipseg.CLIPDensePredT 10 | 11 | lr_scheduler: cosine 12 | T_max: 20000 13 | eta_min: 0.0001 14 | 15 | max_iterations: 20000 16 | val_interval: null 17 | 18 | # dataset 19 | dataset: datasets.phrasecut.PhraseCut # <----------------- 20 | split_mode: pascal_test 21 | split: train 22 | mask: text_and_crop_blur_highlight352 23 | image_size: 352 24 | normalize: True 25 | pre_crop_image_size: [sample, 1, 1.5] 26 | aug: 1new 27 | 28 | # general 29 | mix: False # <----------------- 30 | prompt: shuffle+ 31 | norm_cond: True 32 | mix_text_min: 0.0 33 | 34 | # model 35 | out: 1 36 | extract_layers: [3, 7, 9] 37 | reduce_dim: 64 38 | depth: 3 39 | fix_shift: False 40 | 41 | loss: torch.nn.functional.binary_cross_entropy_with_logits 42 | amp: True 43 | 44 | test_configuration_common: 45 | normalize: True 46 | image_size: 352 47 | batch_size: 32 48 | # max_iterations: 5 49 | # max_iterations: 150 50 | 51 | test_configuration: 52 | 53 | - 54 | name: pc # old: phrasecut 55 | metric: metrics.FixedIntervalMetrics 56 | test_dataset: phrasecut 57 | split: test 58 | mask: text 59 | label_support: True 60 | sigmoid: True 61 | 62 | 63 | columns: [i, name, pc_miou_0.3, pc_fgiou_0.3, pc_fgiou_0.5, pc_ap, duration, date] 64 | 65 | 66 | individual_configurations: 67 | 68 | # important ones 69 | 70 | 71 | - {name: rd64-uni, version: 'ViT-B/16', reduce_dim: 64, with_visual: True, negative_prob: 0.2, mix: True, mix_text_max: 0.5} 72 | 73 | # this was accedentally trained using old mask 74 | - {name: rd128-vit16-phrasecut, version: 'ViT-B/16', reduce_dim: 128, mask: text_and_blur3_highlight01} 75 | - {name: rd64-uni-novis, version: 'ViT-B/16', reduce_dim: 64, with_visual: False, negative_prob: 0.2, mix: False} 76 | # this was accedentally trained using old mask 77 | - {name: baseline3-vit16-phrasecut, model: models.clipseg.CLIPDenseBaseline, version: 'ViT-B/16', reduce_dim: 64, reduce2_dim: 64, mask: text_and_blur3_highlight01} 78 | 79 | - {name: vit64-uni, version: 'ViT-B/16', model: models.vitseg.VITDensePredT, reduce_dim: 64, with_visual: True, only_visual: True, negative_prob: 0.2, mask: crop_blur_highlight352, lr: 0.0003} 80 | - {name: vit64-uni-novis, version: 'ViT-B/16', model: models.vitseg.VITDensePredT, with_visual: False, reduce_dim: 64, lr: 0.0001} -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deforum-art/sd-webui-deforum/5d63a339dbec8d476657a1f672a4eeb6dc79ed37/scripts/deforum_helpers/src/clipseg/overview.png -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open("README.md", "r", encoding="utf-8") as readme_file: 4 | readme = readme_file.read() 5 | 6 | requirements = [ 7 | "numpy", 8 | "scipy", 9 | "matplotlib", 10 | "torch", 11 | "torchvision", 12 | "opencv-python", 13 | "CLIP @ git+https://github.com/openai/CLIP.git" 14 | ] 15 | 16 | setup( 17 | name='clipseg', 18 | packages=['clipseg'], 19 | package_dir={'clipseg': 'models'}, 20 | package_data={'clipseg': [ 21 | "../weights/*.pth", 22 | ]}, 23 | version='0.0.1', 24 | url='https://github.com/timojl/clipseg', 25 | python_requires='>=3.9', 26 | install_requires=requirements, 27 | description='This repository contains the code used in the paper "Image Segmentation Using Text and Image Prompts".', 28 | long_description=readme, 29 | long_description_content_type="text/markdown", 30 | ) 31 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/clipseg/weights/rd64-uni.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deforum-art/sd-webui-deforum/5d63a339dbec8d476657a1f672a4eeb6dc79ed37/scripts/deforum_helpers/src/clipseg/weights/rd64-uni.pth -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/leres/lib/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/leres/lib/multi_depth_model_woauxi.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from leres.lib import network_auxi as network 5 | from leres.lib.net_tools import get_func 6 | 7 | class RelDepthModel(nn.Module): 8 | def __init__(self, backbone='resnet50'): 9 | super(RelDepthModel, self).__init__() 10 | if backbone == 'resnet50': 11 | encoder = 'resnet50_stride32' 12 | elif backbone == 'resnext101': 13 | encoder = 'resnext101_stride32x8d' 14 | self.depth_model = DepthModel(encoder) 15 | 16 | def inference(self, rgb): 17 | with torch.no_grad(): 18 | input = rgb.cuda() 19 | depth = self.depth_model(input) 20 | pred_depth_out = depth - depth.min() + 0.01 21 | return pred_depth_out 22 | 23 | 24 | class DepthModel(nn.Module): 25 | def __init__(self, encoder): 26 | super(DepthModel, self).__init__() 27 | backbone = network.__name__.split('.')[-1] + '.' + encoder 28 | self.encoder_modules = get_func(backbone)() 29 | self.decoder_modules = network.Decoder() 30 | 31 | def forward(self, x): 32 | lateral_out = self.encoder_modules(x) 33 | out_logit = self.decoder_modules(lateral_out) 34 | return out_logit -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/leres/lib/net_tools.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import torch 3 | import os 4 | from collections import OrderedDict 5 | 6 | 7 | def get_func(func_name): 8 | """Helper to return a function object by name. func_name must identify a 9 | function in this module or the path to a function relative to the base 10 | 'modeling' module. 11 | """ 12 | if func_name == '': 13 | return None 14 | try: 15 | parts = func_name.split('.') 16 | # Refers to a function in this module 17 | if len(parts) == 1: 18 | return globals()[parts[0]] 19 | # Otherwise, assume we're referencing a module under modeling 20 | module_name = 'leres.lib.' + '.'.join(parts[:-1]) 21 | module = importlib.import_module(module_name) 22 | return getattr(module, parts[-1]) 23 | except Exception: 24 | print('Failed to f1ind function: %s', func_name) 25 | raise 26 | 27 | def load_ckpt(args, depth_model, shift_model, focal_model): 28 | """ 29 | Load checkpoint. 30 | """ 31 | if os.path.isfile(args): 32 | print("loading LeReS checkpoint from %s" % args) 33 | checkpoint = torch.load(args) 34 | if shift_model is not None: 35 | shift_model.load_state_dict(strip_prefix_if_present(checkpoint['shift_model'], 'module.'), 36 | strict=True) 37 | if focal_model is not None: 38 | focal_model.load_state_dict(strip_prefix_if_present(checkpoint['focal_model'], 'module.'), 39 | strict=True) 40 | depth_model.load_state_dict(strip_prefix_if_present(checkpoint['depth_model'], "module."), 41 | strict=True) 42 | del checkpoint 43 | torch.cuda.empty_cache() 44 | 45 | 46 | def strip_prefix_if_present(state_dict, prefix): 47 | keys = sorted(state_dict.keys()) 48 | if not all(key.startswith(prefix) for key in keys): 49 | return state_dict 50 | stripped_state_dict = OrderedDict() 51 | for key, value in state_dict.items(): 52 | stripped_state_dict[key.replace(prefix, "")] = value 53 | return stripped_state_dict -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/leres/lib/spvcnn_utils.py: -------------------------------------------------------------------------------- 1 | import torchsparse.nn.functional as spf 2 | from torchsparse.point_tensor import PointTensor 3 | from torchsparse.utils.kernel_region import * 4 | from torchsparse.utils.helpers import * 5 | 6 | 7 | __all__ = ['initial_voxelize', 'point_to_voxel', 'voxel_to_point'] 8 | 9 | 10 | # z: PointTensor 11 | # return: SparseTensor 12 | def initial_voxelize(z, init_res, after_res): 13 | new_float_coord = torch.cat( 14 | [(z.C[:, :3] * init_res) / after_res, z.C[:, -1].view(-1, 1)], 1) 15 | 16 | pc_hash = spf.sphash(torch.floor(new_float_coord).int()) 17 | sparse_hash = torch.unique(pc_hash) 18 | idx_query = spf.sphashquery(pc_hash, sparse_hash) 19 | counts = spf.spcount(idx_query.int(), len(sparse_hash)) 20 | 21 | inserted_coords = spf.spvoxelize(torch.floor(new_float_coord), idx_query, 22 | counts) 23 | inserted_coords = torch.round(inserted_coords).int() 24 | inserted_feat = spf.spvoxelize(z.F, idx_query, counts) 25 | 26 | new_tensor = SparseTensor(inserted_feat, inserted_coords, 1) 27 | new_tensor.check() 28 | z.additional_features['idx_query'][1] = idx_query 29 | z.additional_features['counts'][1] = counts 30 | z.C = new_float_coord 31 | 32 | return new_tensor 33 | 34 | 35 | # x: SparseTensor, z: PointTensor 36 | # return: SparseTensor 37 | def point_to_voxel(x, z): 38 | if z.additional_features is None or z.additional_features.get('idx_query') is None\ 39 | or z.additional_features['idx_query'].get(x.s) is None: 40 | #pc_hash = hash_gpu(torch.floor(z.C).int()) 41 | pc_hash = spf.sphash( 42 | torch.cat([ 43 | torch.floor(z.C[:, :3] / x.s).int() * x.s, 44 | z.C[:, -1].int().view(-1, 1) 45 | ], 1)) 46 | sparse_hash = spf.sphash(x.C) 47 | idx_query = spf.sphashquery(pc_hash, sparse_hash) 48 | counts = spf.spcount(idx_query.int(), x.C.shape[0]) 49 | z.additional_features['idx_query'][x.s] = idx_query 50 | z.additional_features['counts'][x.s] = counts 51 | else: 52 | idx_query = z.additional_features['idx_query'][x.s] 53 | counts = z.additional_features['counts'][x.s] 54 | 55 | inserted_feat = spf.spvoxelize(z.F, idx_query, counts) 56 | new_tensor = SparseTensor(inserted_feat, x.C, x.s) 57 | new_tensor.coord_maps = x.coord_maps 58 | new_tensor.kernel_maps = x.kernel_maps 59 | 60 | return new_tensor 61 | 62 | 63 | # x: SparseTensor, z: PointTensor 64 | # return: PointTensor 65 | def voxel_to_point(x, z, nearest=False): 66 | if z.idx_query is None or z.weights is None or z.idx_query.get( 67 | x.s) is None or z.weights.get(x.s) is None: 68 | kr = KernelRegion(2, x.s, 1) 69 | off = kr.get_kernel_offset().to(z.F.device) 70 | #old_hash = kernel_hash_gpu(torch.floor(z.C).int(), off) 71 | old_hash = spf.sphash( 72 | torch.cat([ 73 | torch.floor(z.C[:, :3] / x.s).int() * x.s, 74 | z.C[:, -1].int().view(-1, 1) 75 | ], 1), off) 76 | pc_hash = spf.sphash(x.C.to(z.F.device)) 77 | idx_query = spf.sphashquery(old_hash, pc_hash) 78 | weights = spf.calc_ti_weights(z.C, idx_query, 79 | scale=x.s).transpose(0, 1).contiguous() 80 | idx_query = idx_query.transpose(0, 1).contiguous() 81 | if nearest: 82 | weights[:, 1:] = 0. 83 | idx_query[:, 1:] = -1 84 | new_feat = spf.spdevoxelize(x.F, idx_query, weights) 85 | new_tensor = PointTensor(new_feat, 86 | z.C, 87 | idx_query=z.idx_query, 88 | weights=z.weights) 89 | new_tensor.additional_features = z.additional_features 90 | new_tensor.idx_query[x.s] = idx_query 91 | new_tensor.weights[x.s] = weights 92 | z.idx_query[x.s] = idx_query 93 | z.weights[x.s] = weights 94 | 95 | else: 96 | new_feat = spf.spdevoxelize(x.F, z.idx_query.get(x.s), z.weights.get(x.s)) 97 | new_tensor = PointTensor(new_feat, 98 | z.C, 99 | idx_query=z.idx_query, 100 | weights=z.weights) 101 | new_tensor.additional_features = z.additional_features 102 | 103 | return new_tensor 104 | 105 | 106 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/midas/backbones/levit.py: -------------------------------------------------------------------------------- 1 | import timm 2 | import torch 3 | import torch.nn as nn 4 | import numpy as np 5 | 6 | from .utils import activations, get_activation, Transpose 7 | 8 | 9 | def forward_levit(pretrained, x): 10 | pretrained.model.forward_features(x) 11 | 12 | layer_1 = pretrained.activations["1"] 13 | layer_2 = pretrained.activations["2"] 14 | layer_3 = pretrained.activations["3"] 15 | 16 | layer_1 = pretrained.act_postprocess1(layer_1) 17 | layer_2 = pretrained.act_postprocess2(layer_2) 18 | layer_3 = pretrained.act_postprocess3(layer_3) 19 | 20 | return layer_1, layer_2, layer_3 21 | 22 | 23 | def _make_levit_backbone( 24 | model, 25 | hooks=[3, 11, 21], 26 | patch_grid=[14, 14] 27 | ): 28 | pretrained = nn.Module() 29 | 30 | pretrained.model = model 31 | pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) 32 | pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) 33 | pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) 34 | 35 | pretrained.activations = activations 36 | 37 | patch_grid_size = np.array(patch_grid, dtype=int) 38 | 39 | pretrained.act_postprocess1 = nn.Sequential( 40 | Transpose(1, 2), 41 | nn.Unflatten(2, torch.Size(patch_grid_size.tolist())) 42 | ) 43 | pretrained.act_postprocess2 = nn.Sequential( 44 | Transpose(1, 2), 45 | nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 2).astype(int)).tolist())) 46 | ) 47 | pretrained.act_postprocess3 = nn.Sequential( 48 | Transpose(1, 2), 49 | nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 4).astype(int)).tolist())) 50 | ) 51 | 52 | return pretrained 53 | 54 | 55 | class ConvTransposeNorm(nn.Sequential): 56 | """ 57 | Modification of 58 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: ConvNorm 59 | such that ConvTranspose2d is used instead of Conv2d. 60 | """ 61 | 62 | def __init__( 63 | self, in_chs, out_chs, kernel_size=1, stride=1, pad=0, dilation=1, 64 | groups=1, bn_weight_init=1): 65 | super().__init__() 66 | self.add_module('c', 67 | nn.ConvTranspose2d(in_chs, out_chs, kernel_size, stride, pad, dilation, groups, bias=False)) 68 | self.add_module('bn', nn.BatchNorm2d(out_chs)) 69 | 70 | nn.init.constant_(self.bn.weight, bn_weight_init) 71 | 72 | @torch.no_grad() 73 | def fuse(self): 74 | c, bn = self._modules.values() 75 | w = bn.weight / (bn.running_var + bn.eps) ** 0.5 76 | w = c.weight * w[:, None, None, None] 77 | b = bn.bias - bn.running_mean * bn.weight / (bn.running_var + bn.eps) ** 0.5 78 | m = nn.ConvTranspose2d( 79 | w.size(1), w.size(0), w.shape[2:], stride=self.c.stride, 80 | padding=self.c.padding, dilation=self.c.dilation, groups=self.c.groups) 81 | m.weight.data.copy_(w) 82 | m.bias.data.copy_(b) 83 | return m 84 | 85 | 86 | def stem_b4_transpose(in_chs, out_chs, activation): 87 | """ 88 | Modification of 89 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: stem_b16 90 | such that ConvTranspose2d is used instead of Conv2d and stem is also reduced to the half. 91 | """ 92 | return nn.Sequential( 93 | ConvTransposeNorm(in_chs, out_chs, 3, 2, 1), 94 | activation(), 95 | ConvTransposeNorm(out_chs, out_chs // 2, 3, 2, 1), 96 | activation()) 97 | 98 | 99 | def _make_pretrained_levit_384(pretrained, hooks=None): 100 | model = timm.create_model("levit_384", pretrained=pretrained) 101 | 102 | hooks = [3, 11, 21] if hooks == None else hooks 103 | return _make_levit_backbone( 104 | model, 105 | hooks=hooks 106 | ) 107 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/midas/backbones/next_vit.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | import torch.nn as nn 4 | 5 | from pathlib import Path 6 | from .utils import activations, forward_default, get_activation 7 | 8 | from ..external.next_vit.classification.nextvit import * 9 | 10 | 11 | def forward_next_vit(pretrained, x): 12 | return forward_default(pretrained, x, "forward") 13 | 14 | 15 | def _make_next_vit_backbone( 16 | model, 17 | hooks=[2, 6, 36, 39], 18 | ): 19 | pretrained = nn.Module() 20 | 21 | pretrained.model = model 22 | pretrained.model.features[hooks[0]].register_forward_hook(get_activation("1")) 23 | pretrained.model.features[hooks[1]].register_forward_hook(get_activation("2")) 24 | pretrained.model.features[hooks[2]].register_forward_hook(get_activation("3")) 25 | pretrained.model.features[hooks[3]].register_forward_hook(get_activation("4")) 26 | 27 | pretrained.activations = activations 28 | 29 | return pretrained 30 | 31 | 32 | def _make_pretrained_next_vit_large_6m(hooks=None): 33 | model = timm.create_model("nextvit_large") 34 | 35 | hooks = [2, 6, 36, 39] if hooks == None else hooks 36 | return _make_next_vit_backbone( 37 | model, 38 | hooks=hooks, 39 | ) 40 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/midas/backbones/swin.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | from .swin_common import _make_swin_backbone 4 | 5 | 6 | def _make_pretrained_swinl12_384(pretrained, hooks=None): 7 | model = timm.create_model("swin_large_patch4_window12_384", pretrained=pretrained) 8 | 9 | hooks = [1, 1, 17, 1] if hooks == None else hooks 10 | return _make_swin_backbone( 11 | model, 12 | hooks=hooks 13 | ) 14 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/midas/backbones/swin2.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | from .swin_common import _make_swin_backbone 4 | 5 | 6 | def _make_pretrained_swin2l24_384(pretrained, hooks=None): 7 | model = timm.create_model("swinv2_large_window12to24_192to384_22kft1k", pretrained=pretrained) 8 | 9 | hooks = [1, 1, 17, 1] if hooks == None else hooks 10 | return _make_swin_backbone( 11 | model, 12 | hooks=hooks 13 | ) 14 | 15 | 16 | def _make_pretrained_swin2b24_384(pretrained, hooks=None): 17 | model = timm.create_model("swinv2_base_window12to24_192to384_22kft1k", pretrained=pretrained) 18 | 19 | hooks = [1, 1, 17, 1] if hooks == None else hooks 20 | return _make_swin_backbone( 21 | model, 22 | hooks=hooks 23 | ) 24 | 25 | 26 | def _make_pretrained_swin2t16_256(pretrained, hooks=None): 27 | model = timm.create_model("swinv2_tiny_window16_256", pretrained=pretrained) 28 | 29 | hooks = [1, 1, 5, 1] if hooks == None else hooks 30 | return _make_swin_backbone( 31 | model, 32 | hooks=hooks, 33 | patch_grid=[64, 64] 34 | ) 35 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/midas/backbones/swin_common.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import torch.nn as nn 4 | import numpy as np 5 | 6 | from .utils import activations, forward_default, get_activation, Transpose 7 | 8 | 9 | def forward_swin(pretrained, x): 10 | return forward_default(pretrained, x) 11 | 12 | 13 | def _make_swin_backbone( 14 | model, 15 | hooks=[1, 1, 17, 1], 16 | patch_grid=[96, 96] 17 | ): 18 | pretrained = nn.Module() 19 | 20 | pretrained.model = model 21 | pretrained.model.layers[0].blocks[hooks[0]].register_forward_hook(get_activation("1")) 22 | pretrained.model.layers[1].blocks[hooks[1]].register_forward_hook(get_activation("2")) 23 | pretrained.model.layers[2].blocks[hooks[2]].register_forward_hook(get_activation("3")) 24 | pretrained.model.layers[3].blocks[hooks[3]].register_forward_hook(get_activation("4")) 25 | 26 | pretrained.activations = activations 27 | 28 | if hasattr(model, "patch_grid"): 29 | used_patch_grid = model.patch_grid 30 | else: 31 | used_patch_grid = patch_grid 32 | 33 | patch_grid_size = np.array(used_patch_grid, dtype=int) 34 | 35 | pretrained.act_postprocess1 = nn.Sequential( 36 | Transpose(1, 2), 37 | nn.Unflatten(2, torch.Size(patch_grid_size.tolist())) 38 | ) 39 | pretrained.act_postprocess2 = nn.Sequential( 40 | Transpose(1, 2), 41 | nn.Unflatten(2, torch.Size((patch_grid_size // 2).tolist())) 42 | ) 43 | pretrained.act_postprocess3 = nn.Sequential( 44 | Transpose(1, 2), 45 | nn.Unflatten(2, torch.Size((patch_grid_size // 4).tolist())) 46 | ) 47 | pretrained.act_postprocess4 = nn.Sequential( 48 | Transpose(1, 2), 49 | nn.Unflatten(2, torch.Size((patch_grid_size // 8).tolist())) 50 | ) 51 | 52 | return pretrained 53 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/midas/base_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class BaseModel(torch.nn.Module): 5 | def load(self, path): 6 | """Load model from file. 7 | 8 | Args: 9 | path (str): file path 10 | """ 11 | parameters = torch.load(path, map_location=torch.device('cpu')) 12 | 13 | if "optimizer" in parameters: 14 | parameters = parameters["model"] 15 | 16 | self.load_state_dict(parameters) 17 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/midas/midas_net.py: -------------------------------------------------------------------------------- 1 | """MidashNet: Network for monocular depth estimation trained by mixing several datasets. 2 | This file contains code that is adapted from 3 | https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .base_model import BaseModel 9 | from .blocks import FeatureFusionBlock, Interpolate, _make_encoder 10 | 11 | 12 | class MidasNet(BaseModel): 13 | """Network for monocular depth estimation. 14 | """ 15 | 16 | def __init__(self, path=None, features=256, non_negative=True): 17 | """Init. 18 | 19 | Args: 20 | path (str, optional): Path to saved model. Defaults to None. 21 | features (int, optional): Number of features. Defaults to 256. 22 | backbone (str, optional): Backbone network for encoder. Defaults to resnet50 23 | """ 24 | print("Loading weights: ", path) 25 | 26 | super(MidasNet, self).__init__() 27 | 28 | use_pretrained = False if path is None else True 29 | 30 | self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) 31 | 32 | self.scratch.refinenet4 = FeatureFusionBlock(features) 33 | self.scratch.refinenet3 = FeatureFusionBlock(features) 34 | self.scratch.refinenet2 = FeatureFusionBlock(features) 35 | self.scratch.refinenet1 = FeatureFusionBlock(features) 36 | 37 | self.scratch.output_conv = nn.Sequential( 38 | nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), 39 | Interpolate(scale_factor=2, mode="bilinear"), 40 | nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), 41 | nn.ReLU(True), 42 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), 43 | nn.ReLU(True) if non_negative else nn.Identity(), 44 | ) 45 | 46 | if path: 47 | self.load(path) 48 | 49 | def forward(self, x): 50 | """Forward pass. 51 | 52 | Args: 53 | x (tensor): input data (image) 54 | 55 | Returns: 56 | tensor: depth 57 | """ 58 | 59 | layer_1 = self.pretrained.layer1(x) 60 | layer_2 = self.pretrained.layer2(layer_1) 61 | layer_3 = self.pretrained.layer3(layer_2) 62 | layer_4 = self.pretrained.layer4(layer_3) 63 | 64 | layer_1_rn = self.scratch.layer1_rn(layer_1) 65 | layer_2_rn = self.scratch.layer2_rn(layer_2) 66 | layer_3_rn = self.scratch.layer3_rn(layer_3) 67 | layer_4_rn = self.scratch.layer4_rn(layer_4) 68 | 69 | path_4 = self.scratch.refinenet4(layer_4_rn) 70 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn) 71 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn) 72 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn) 73 | 74 | out = self.scratch.output_conv(path_1) 75 | 76 | return torch.squeeze(out, dim=1) 77 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/model_io.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | 5 | 6 | def save_weights(model, filename, path="./saved_models"): 7 | os.makedirs(path, exist_ok=True) 8 | 9 | fpath = os.path.join(path, filename) 10 | torch.save(model.state_dict(), fpath) 11 | return 12 | 13 | def save_checkpoint(model, optimizer, epoch, filename, root="./checkpoints"): 14 | if not os.path.isdir(root): 15 | os.makedirs(root) 16 | 17 | fpath = os.path.join(root, filename) 18 | torch.save( 19 | { 20 | "model": model.state_dict(), 21 | "optimizer": optimizer.state_dict(), 22 | "epoch": epoch 23 | } 24 | , fpath) 25 | 26 | def load_weights(model, filename, path="./saved_models"): 27 | fpath = os.path.join(path, filename) 28 | state_dict = torch.load(fpath) 29 | model.load_state_dict(state_dict) 30 | return model 31 | 32 | def load_checkpoint(fpath, model, optimizer=None): 33 | ckpt = torch.load(fpath, map_location='cpu') 34 | if ckpt is None: 35 | raise Exception(f"\nERROR Loading AdaBins_nyu.pt. Read this for a fix:\nhttps://github.com/deforum-art/deforum-for-automatic1111-webui/wiki/FAQ-&-Troubleshooting#3d-animation-mode-is-not-working-only-2d-works") 36 | if optimizer is None: 37 | optimizer = ckpt.get('optimizer', None) 38 | else: 39 | optimizer.load_state_dict(ckpt['optimizer']) 40 | epoch = ckpt['epoch'] 41 | 42 | if 'model' in ckpt: 43 | ckpt = ckpt['model'] 44 | load_dict = {} 45 | for k, v in ckpt.items(): 46 | if k.startswith('module.'): 47 | k_ = k.replace('module.', '') 48 | load_dict[k_] = v 49 | else: 50 | load_dict[k] = v 51 | 52 | modified = {} # backward compatibility to older naming of architecture blocks 53 | for k, v in load_dict.items(): 54 | if k.startswith('adaptive_bins_layer.embedding_conv.'): 55 | k_ = k.replace('adaptive_bins_layer.embedding_conv.', 56 | 'adaptive_bins_layer.conv3x3.') 57 | modified[k_] = v 58 | # del load_dict[k] 59 | 60 | elif k.startswith('adaptive_bins_layer.patch_transformer.embedding_encoder'): 61 | 62 | k_ = k.replace('adaptive_bins_layer.patch_transformer.embedding_encoder', 63 | 'adaptive_bins_layer.patch_transformer.embedding_convPxP') 64 | modified[k_] = v 65 | # del load_dict[k] 66 | else: 67 | modified[k] = v # else keep the original 68 | 69 | model.load_state_dict(modified) 70 | return model, optimizer, epoch -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/rife/model/warplayer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 5 | backwarp_tenGrid = {} 6 | 7 | 8 | def warp(tenInput, tenFlow): 9 | k = (str(tenFlow.device), str(tenFlow.size())) 10 | if k not in backwarp_tenGrid: 11 | tenHorizontal = torch.linspace(-1.0, 1.0, tenFlow.shape[3], device=device).view( 12 | 1, 1, 1, tenFlow.shape[3]).expand(tenFlow.shape[0], -1, tenFlow.shape[2], -1) 13 | tenVertical = torch.linspace(-1.0, 1.0, tenFlow.shape[2], device=device).view( 14 | 1, 1, tenFlow.shape[2], 1).expand(tenFlow.shape[0], -1, -1, tenFlow.shape[3]) 15 | backwarp_tenGrid[k] = torch.cat( 16 | [tenHorizontal, tenVertical], 1).to(device) 17 | 18 | tenFlow = torch.cat([tenFlow[:, 0:1, :, :] / ((tenInput.shape[3] - 1.0) / 2.0), 19 | tenFlow[:, 1:2, :, :] / ((tenInput.shape[2] - 1.0) / 2.0)], 1) 20 | 21 | g = (backwarp_tenGrid[k] + tenFlow).permute(0, 2, 3, 1) 22 | return torch.nn.functional.grid_sample(input=tenInput, grid=g, mode='bilinear', padding_mode='border', align_corners=True) 23 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/rife/rife_new_gen/refine.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from torch.optim import AdamW 5 | import torch.optim as optim 6 | import itertools 7 | from model.warplayer import warp 8 | from torch.nn.parallel import DistributedDataParallel as DDP 9 | import torch.nn.functional as F 10 | 11 | device = torch.device("cuda") 12 | 13 | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1): 14 | return nn.Sequential( 15 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, 16 | padding=padding, dilation=dilation, bias=True), 17 | nn.PReLU(out_planes) 18 | ) 19 | 20 | def conv_woact(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1): 21 | return nn.Sequential( 22 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, 23 | padding=padding, dilation=dilation, bias=True), 24 | ) 25 | 26 | def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1): 27 | return nn.Sequential( 28 | torch.nn.ConvTranspose2d(in_channels=in_planes, out_channels=out_planes, kernel_size=4, stride=2, padding=1, bias=True), 29 | nn.PReLU(out_planes) 30 | ) 31 | 32 | class Conv2(nn.Module): 33 | def __init__(self, in_planes, out_planes, stride=2): 34 | super(Conv2, self).__init__() 35 | self.conv1 = conv(in_planes, out_planes, 3, stride, 1) 36 | self.conv2 = conv(out_planes, out_planes, 3, 1, 1) 37 | 38 | def forward(self, x): 39 | x = self.conv1(x) 40 | x = self.conv2(x) 41 | return x 42 | 43 | c = 16 44 | class Contextnet(nn.Module): 45 | def __init__(self): 46 | super(Contextnet, self).__init__() 47 | self.conv1 = Conv2(3, c) 48 | self.conv2 = Conv2(c, 2*c) 49 | self.conv3 = Conv2(2*c, 4*c) 50 | self.conv4 = Conv2(4*c, 8*c) 51 | 52 | def forward(self, x, flow): 53 | x = self.conv1(x) 54 | flow = F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False) * 0.5 55 | f1 = warp(x, flow) 56 | x = self.conv2(x) 57 | flow = F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False) * 0.5 58 | f2 = warp(x, flow) 59 | x = self.conv3(x) 60 | flow = F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False) * 0.5 61 | f3 = warp(x, flow) 62 | x = self.conv4(x) 63 | flow = F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False) * 0.5 64 | f4 = warp(x, flow) 65 | return [f1, f2, f3, f4] 66 | 67 | class Unet(nn.Module): 68 | def __init__(self): 69 | super(Unet, self).__init__() 70 | self.down0 = Conv2(17, 2*c) 71 | self.down1 = Conv2(4*c, 4*c) 72 | self.down2 = Conv2(8*c, 8*c) 73 | self.down3 = Conv2(16*c, 16*c) 74 | self.up0 = deconv(32*c, 8*c) 75 | self.up1 = deconv(16*c, 4*c) 76 | self.up2 = deconv(8*c, 2*c) 77 | self.up3 = deconv(4*c, c) 78 | self.conv = nn.Conv2d(c, 3, 3, 1, 1) 79 | 80 | def forward(self, img0, img1, warped_img0, warped_img1, mask, flow, c0, c1): 81 | s0 = self.down0(torch.cat((img0, img1, warped_img0, warped_img1, mask, flow), 1)) 82 | s1 = self.down1(torch.cat((s0, c0[0], c1[0]), 1)) 83 | s2 = self.down2(torch.cat((s1, c0[1], c1[1]), 1)) 84 | s3 = self.down3(torch.cat((s2, c0[2], c1[2]), 1)) 85 | x = self.up0(torch.cat((s3, c0[3], c1[3]), 1)) 86 | x = self.up1(torch.cat((x, s2), 1)) 87 | x = self.up2(torch.cat((x, s1), 1)) 88 | x = self.up3(torch.cat((x, s0), 1)) 89 | x = self.conv(x) 90 | return torch.sigmoid(x) 91 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/utils.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import math 3 | import re 4 | from io import BytesIO 5 | 6 | import matplotlib.cm 7 | import numpy as np 8 | import torch 9 | import torch.nn 10 | from PIL import Image 11 | 12 | 13 | class RunningAverage: 14 | def __init__(self): 15 | self.avg = 0 16 | self.count = 0 17 | 18 | def append(self, value): 19 | self.avg = (value + self.count * self.avg) / (self.count + 1) 20 | self.count += 1 21 | 22 | def get_value(self): 23 | return self.avg 24 | 25 | 26 | def denormalize(x, device='cpu'): 27 | mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device) 28 | std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device) 29 | return x * std + mean 30 | 31 | 32 | class RunningAverageDict: 33 | def __init__(self): 34 | self._dict = None 35 | 36 | def update(self, new_dict): 37 | if self._dict is None: 38 | self._dict = dict() 39 | for key, value in new_dict.items(): 40 | self._dict[key] = RunningAverage() 41 | 42 | for key, value in new_dict.items(): 43 | self._dict[key].append(value) 44 | 45 | def get_value(self): 46 | return {key: value.get_value() for key, value in self._dict.items()} 47 | 48 | 49 | def colorize(value, vmin=10, vmax=1000, cmap='magma_r'): 50 | value = value.cpu().numpy()[0, :, :] 51 | invalid_mask = value == -1 52 | 53 | # normalize 54 | vmin = value.min() if vmin is None else vmin 55 | vmax = value.max() if vmax is None else vmax 56 | if vmin != vmax: 57 | value = (value - vmin) / (vmax - vmin) # vmin..vmax 58 | else: 59 | # Avoid 0-division 60 | value = value * 0. 61 | # squeeze last dim if it exists 62 | # value = value.squeeze(axis=0) 63 | cmapper = matplotlib.cm.get_cmap(cmap) 64 | value = cmapper(value, bytes=True) # (nxmx4) 65 | value[invalid_mask] = 255 66 | img = value[:, :, :3] 67 | 68 | # return img.transpose((2, 0, 1)) 69 | return img 70 | 71 | 72 | def count_parameters(model): 73 | return sum(p.numel() for p in model.parameters() if p.requires_grad) 74 | 75 | 76 | def compute_errors(gt, pred): 77 | thresh = np.maximum((gt / pred), (pred / gt)) 78 | a1 = (thresh < 1.25).mean() 79 | a2 = (thresh < 1.25 ** 2).mean() 80 | a3 = (thresh < 1.25 ** 3).mean() 81 | 82 | abs_rel = np.mean(np.abs(gt - pred) / gt) 83 | sq_rel = np.mean(((gt - pred) ** 2) / gt) 84 | 85 | rmse = (gt - pred) ** 2 86 | rmse = np.sqrt(rmse.mean()) 87 | 88 | rmse_log = (np.log(gt) - np.log(pred)) ** 2 89 | rmse_log = np.sqrt(rmse_log.mean()) 90 | 91 | err = np.log(pred) - np.log(gt) 92 | silog = np.sqrt(np.mean(err ** 2) - np.mean(err) ** 2) * 100 93 | 94 | log_10 = (np.abs(np.log10(gt) - np.log10(pred))).mean() 95 | return dict(a1=a1, a2=a2, a3=a3, abs_rel=abs_rel, rmse=rmse, log_10=log_10, rmse_log=rmse_log, 96 | silog=silog, sq_rel=sq_rel) 97 | 98 | 99 | ##################################### Demo Utilities ############################################ 100 | def b64_to_pil(b64string): 101 | image_data = re.sub('^data:image/.+;base64,', '', b64string) 102 | # image = Image.open(cStringIO.StringIO(image_data)) 103 | return Image.open(BytesIO(base64.b64decode(image_data))) 104 | 105 | 106 | # Compute edge magnitudes 107 | from scipy import ndimage 108 | 109 | 110 | def edges(d): 111 | dx = ndimage.sobel(d, 0) # horizontal derivative 112 | dy = ndimage.sobel(d, 1) # vertical derivative 113 | return np.abs(dx) + np.abs(dy) 114 | 115 | 116 | class PointCloudHelper(): 117 | def __init__(self, width=640, height=480): 118 | self.xx, self.yy = self.worldCoords(width, height) 119 | 120 | def worldCoords(self, width=640, height=480): 121 | hfov_degrees, vfov_degrees = 57, 43 122 | hFov = math.radians(hfov_degrees) 123 | vFov = math.radians(vfov_degrees) 124 | cx, cy = width / 2, height / 2 125 | fx = width / (2 * math.tan(hFov / 2)) 126 | fy = height / (2 * math.tan(vFov / 2)) 127 | xx, yy = np.tile(range(width), height), np.repeat(range(height), width) 128 | xx = (xx - cx) / fx 129 | yy = (yy - cy) / fy 130 | return xx, yy 131 | 132 | def depth_to_points(self, depth): 133 | depth[edges(depth) > 0.3] = np.nan # Hide depth edges 134 | length = depth.shape[0] * depth.shape[1] 135 | # depth[edges(depth) > 0.3] = 1e6 # Hide depth edges 136 | z = depth.reshape(length) 137 | 138 | return np.dstack((self.xx * z, self.yy * z, z)).reshape((length, 3)) 139 | 140 | ##################################################################################################### 141 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/data/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/data/ddad.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self, resize_shape): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | self.resize = transforms.Resize(resize_shape) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | image = self.to_tensor(image) 44 | image = self.normalize(image) 45 | depth = self.to_tensor(depth) 46 | 47 | image = self.resize(image) 48 | 49 | return {'image': image, 'depth': depth, 'dataset': "ddad"} 50 | 51 | def to_tensor(self, pic): 52 | 53 | if isinstance(pic, np.ndarray): 54 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 55 | return img 56 | 57 | # # handle PIL Image 58 | if pic.mode == 'I': 59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 60 | elif pic.mode == 'I;16': 61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 62 | else: 63 | img = torch.ByteTensor( 64 | torch.ByteStorage.from_buffer(pic.tobytes())) 65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 66 | if pic.mode == 'YCbCr': 67 | nchannel = 3 68 | elif pic.mode == 'I;16': 69 | nchannel = 1 70 | else: 71 | nchannel = len(pic.mode) 72 | img = img.view(pic.size[1], pic.size[0], nchannel) 73 | 74 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 75 | 76 | if isinstance(img, torch.ByteTensor): 77 | return img.float() 78 | else: 79 | return img 80 | 81 | 82 | class DDAD(Dataset): 83 | def __init__(self, data_dir_root, resize_shape): 84 | import glob 85 | 86 | # image paths are of the form /{outleft, depthmap}/*.png 87 | self.image_files = glob.glob(os.path.join(data_dir_root, '*.png')) 88 | self.depth_files = [r.replace("_rgb.png", "_depth.npy") 89 | for r in self.image_files] 90 | self.transform = ToTensor(resize_shape) 91 | 92 | def __getitem__(self, idx): 93 | 94 | image_path = self.image_files[idx] 95 | depth_path = self.depth_files[idx] 96 | 97 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 98 | depth = np.load(depth_path) # meters 99 | 100 | # depth[depth > 8] = -1 101 | depth = depth[..., None] 102 | 103 | sample = dict(image=image, depth=depth) 104 | sample = self.transform(sample) 105 | 106 | if idx == 0: 107 | print(sample["image"].shape) 108 | 109 | return sample 110 | 111 | def __len__(self): 112 | return len(self.image_files) 113 | 114 | 115 | def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs): 116 | dataset = DDAD(data_dir_root, resize_shape) 117 | return DataLoader(dataset, batch_size, **kwargs) 118 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/data/diml_outdoor_test.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | 40 | def __call__(self, sample): 41 | image, depth = sample['image'], sample['depth'] 42 | image = self.to_tensor(image) 43 | image = self.normalize(image) 44 | depth = self.to_tensor(depth) 45 | 46 | return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"} 47 | 48 | def to_tensor(self, pic): 49 | 50 | if isinstance(pic, np.ndarray): 51 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 52 | return img 53 | 54 | # # handle PIL Image 55 | if pic.mode == 'I': 56 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 57 | elif pic.mode == 'I;16': 58 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 59 | else: 60 | img = torch.ByteTensor( 61 | torch.ByteStorage.from_buffer(pic.tobytes())) 62 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 63 | if pic.mode == 'YCbCr': 64 | nchannel = 3 65 | elif pic.mode == 'I;16': 66 | nchannel = 1 67 | else: 68 | nchannel = len(pic.mode) 69 | img = img.view(pic.size[1], pic.size[0], nchannel) 70 | 71 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 72 | if isinstance(img, torch.ByteTensor): 73 | return img.float() 74 | else: 75 | return img 76 | 77 | 78 | class DIML_Outdoor(Dataset): 79 | def __init__(self, data_dir_root): 80 | import glob 81 | 82 | # image paths are of the form /{outleft, depthmap}/*.png 83 | self.image_files = glob.glob(os.path.join( 84 | data_dir_root, "*", 'outleft', '*.png')) 85 | self.depth_files = [r.replace("outleft", "depthmap") 86 | for r in self.image_files] 87 | self.transform = ToTensor() 88 | 89 | def __getitem__(self, idx): 90 | image_path = self.image_files[idx] 91 | depth_path = self.depth_files[idx] 92 | 93 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 94 | depth = np.asarray(Image.open(depth_path), 95 | dtype='uint16') / 1000.0 # mm to meters 96 | 97 | # depth[depth > 8] = -1 98 | depth = depth[..., None] 99 | 100 | sample = dict(image=image, depth=depth, dataset="diml_outdoor") 101 | 102 | # return sample 103 | return self.transform(sample) 104 | 105 | def __len__(self): 106 | return len(self.image_files) 107 | 108 | 109 | def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs): 110 | dataset = DIML_Outdoor(data_dir_root) 111 | return DataLoader(dataset, batch_size, **kwargs) 112 | 113 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR") 114 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR") 115 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/data/diode.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | self.resize = transforms.Resize(480) 40 | 41 | def __call__(self, sample): 42 | image, depth = sample['image'], sample['depth'] 43 | image = self.to_tensor(image) 44 | image = self.normalize(image) 45 | depth = self.to_tensor(depth) 46 | 47 | image = self.resize(image) 48 | 49 | return {'image': image, 'depth': depth, 'dataset': "diode"} 50 | 51 | def to_tensor(self, pic): 52 | 53 | if isinstance(pic, np.ndarray): 54 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 55 | return img 56 | 57 | # # handle PIL Image 58 | if pic.mode == 'I': 59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 60 | elif pic.mode == 'I;16': 61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 62 | else: 63 | img = torch.ByteTensor( 64 | torch.ByteStorage.from_buffer(pic.tobytes())) 65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 66 | if pic.mode == 'YCbCr': 67 | nchannel = 3 68 | elif pic.mode == 'I;16': 69 | nchannel = 1 70 | else: 71 | nchannel = len(pic.mode) 72 | img = img.view(pic.size[1], pic.size[0], nchannel) 73 | 74 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 75 | 76 | if isinstance(img, torch.ByteTensor): 77 | return img.float() 78 | else: 79 | return img 80 | 81 | 82 | class DIODE(Dataset): 83 | def __init__(self, data_dir_root): 84 | import glob 85 | 86 | # image paths are of the form /scene_#/scan_#/*.png 87 | self.image_files = glob.glob( 88 | os.path.join(data_dir_root, '*', '*', '*.png')) 89 | self.depth_files = [r.replace(".png", "_depth.npy") 90 | for r in self.image_files] 91 | self.depth_mask_files = [ 92 | r.replace(".png", "_depth_mask.npy") for r in self.image_files] 93 | self.transform = ToTensor() 94 | 95 | def __getitem__(self, idx): 96 | image_path = self.image_files[idx] 97 | depth_path = self.depth_files[idx] 98 | depth_mask_path = self.depth_mask_files[idx] 99 | 100 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 101 | depth = np.load(depth_path) # in meters 102 | valid = np.load(depth_mask_path) # binary 103 | 104 | # depth[depth > 8] = -1 105 | # depth = depth[..., None] 106 | 107 | sample = dict(image=image, depth=depth, valid=valid) 108 | 109 | # return sample 110 | sample = self.transform(sample) 111 | 112 | if idx == 0: 113 | print(sample["image"].shape) 114 | 115 | return sample 116 | 117 | def __len__(self): 118 | return len(self.image_files) 119 | 120 | 121 | def get_diode_loader(data_dir_root, batch_size=1, **kwargs): 122 | dataset = DIODE(data_dir_root) 123 | return DataLoader(dataset, batch_size, **kwargs) 124 | 125 | # get_diode_loader(data_dir_root="datasets/diode/val/outdoor") 126 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/data/ibims.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms as T 32 | 33 | 34 | class iBims(Dataset): 35 | def __init__(self, config): 36 | root_folder = config.ibims_root 37 | with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f: 38 | imglist = f.read().split() 39 | 40 | samples = [] 41 | for basename in imglist: 42 | img_path = os.path.join(root_folder, 'rgb', basename + ".png") 43 | depth_path = os.path.join(root_folder, 'depth', basename + ".png") 44 | valid_mask_path = os.path.join( 45 | root_folder, 'mask_invalid', basename+".png") 46 | transp_mask_path = os.path.join( 47 | root_folder, 'mask_transp', basename+".png") 48 | 49 | samples.append( 50 | (img_path, depth_path, valid_mask_path, transp_mask_path)) 51 | 52 | self.samples = samples 53 | # self.normalize = T.Normalize( 54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 55 | self.normalize = lambda x : x 56 | 57 | def __getitem__(self, idx): 58 | img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx] 59 | 60 | img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0 61 | depth = np.asarray(Image.open(depth_path), 62 | dtype=np.uint16).astype('float')*50.0/65535 63 | 64 | mask_valid = np.asarray(Image.open(valid_mask_path)) 65 | mask_transp = np.asarray(Image.open(transp_mask_path)) 66 | 67 | # depth = depth * mask_valid * mask_transp 68 | depth = np.where(mask_valid * mask_transp, depth, -1) 69 | 70 | img = torch.from_numpy(img).permute(2, 0, 1) 71 | img = self.normalize(img) 72 | depth = torch.from_numpy(depth).unsqueeze(0) 73 | return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims') 74 | 75 | def __len__(self): 76 | return len(self.samples) 77 | 78 | 79 | def get_ibims_loader(config, batch_size=1, **kwargs): 80 | dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs) 81 | return dataloader 82 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/data/sun_rgbd_loader.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms 32 | 33 | 34 | class ToTensor(object): 35 | def __init__(self): 36 | # self.normalize = transforms.Normalize( 37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 38 | self.normalize = lambda x : x 39 | 40 | def __call__(self, sample): 41 | image, depth = sample['image'], sample['depth'] 42 | image = self.to_tensor(image) 43 | image = self.normalize(image) 44 | depth = self.to_tensor(depth) 45 | 46 | return {'image': image, 'depth': depth, 'dataset': "sunrgbd"} 47 | 48 | def to_tensor(self, pic): 49 | 50 | if isinstance(pic, np.ndarray): 51 | img = torch.from_numpy(pic.transpose((2, 0, 1))) 52 | return img 53 | 54 | # # handle PIL Image 55 | if pic.mode == 'I': 56 | img = torch.from_numpy(np.array(pic, np.int32, copy=False)) 57 | elif pic.mode == 'I;16': 58 | img = torch.from_numpy(np.array(pic, np.int16, copy=False)) 59 | else: 60 | img = torch.ByteTensor( 61 | torch.ByteStorage.from_buffer(pic.tobytes())) 62 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK 63 | if pic.mode == 'YCbCr': 64 | nchannel = 3 65 | elif pic.mode == 'I;16': 66 | nchannel = 1 67 | else: 68 | nchannel = len(pic.mode) 69 | img = img.view(pic.size[1], pic.size[0], nchannel) 70 | 71 | img = img.transpose(0, 1).transpose(0, 2).contiguous() 72 | if isinstance(img, torch.ByteTensor): 73 | return img.float() 74 | else: 75 | return img 76 | 77 | 78 | class SunRGBD(Dataset): 79 | def __init__(self, data_dir_root): 80 | # test_file_dirs = loadmat(train_test_file)['alltest'].squeeze() 81 | # all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs] 82 | # self.all_test = [os.path.join(data_dir_root, t) for t in all_test] 83 | import glob 84 | self.image_files = glob.glob( 85 | os.path.join(data_dir_root, 'rgb', 'rgb', '*')) 86 | self.depth_files = [ 87 | r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files] 88 | self.transform = ToTensor() 89 | 90 | def __getitem__(self, idx): 91 | image_path = self.image_files[idx] 92 | depth_path = self.depth_files[idx] 93 | 94 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0 95 | depth = np.asarray(Image.open(depth_path), dtype='uint16') / 1000.0 96 | depth[depth > 8] = -1 97 | depth = depth[..., None] 98 | return self.transform(dict(image=image, depth=depth)) 99 | 100 | def __len__(self): 101 | return len(self.image_files) 102 | 103 | 104 | def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs): 105 | dataset = SunRGBD(data_dir_root) 106 | return DataLoader(dataset, batch_size, **kwargs) 107 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/models/base_models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/models/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | from zoedepth.models.depth_model import DepthModel 27 | 28 | def build_model(config) -> DepthModel: 29 | """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface. 30 | This function should be used to construct models for training and evaluation. 31 | 32 | Args: 33 | config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder. 34 | 35 | Returns: 36 | torch.nn.Module: Model corresponding to name and version as specified in config 37 | """ 38 | module_name = f"zoedepth.models.{config.model}" 39 | try: 40 | module = import_module(module_name) 41 | except ModuleNotFoundError as e: 42 | # print the original error message 43 | print(e) 44 | raise ValueError( 45 | f"Model {config.model} not found. Refer above error for details.") from e 46 | try: 47 | get_version = getattr(module, "get_version") 48 | except AttributeError as e: 49 | raise ValueError( 50 | f"Model {config.model} has no get_version function.") from e 51 | return get_version(config.version_name).build_from_config(config) 52 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/models/layers/patch_transformer.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | import torch.nn as nn 27 | 28 | 29 | class PatchTransformerEncoder(nn.Module): 30 | def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False): 31 | """ViT-like transformer block 32 | 33 | Args: 34 | in_channels (int): Input channels 35 | patch_size (int, optional): patch size. Defaults to 10. 36 | embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128. 37 | num_heads (int, optional): number of attention heads. Defaults to 4. 38 | use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False. 39 | """ 40 | super(PatchTransformerEncoder, self).__init__() 41 | self.use_class_token = use_class_token 42 | encoder_layers = nn.TransformerEncoderLayer( 43 | embedding_dim, num_heads, dim_feedforward=1024) 44 | self.transformer_encoder = nn.TransformerEncoder( 45 | encoder_layers, num_layers=4) # takes shape S,N,E 46 | 47 | self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim, 48 | kernel_size=patch_size, stride=patch_size, padding=0) 49 | 50 | def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'): 51 | """Generate positional encodings 52 | 53 | Args: 54 | sequence_length (int): Sequence length 55 | embedding_dim (int): Embedding dimension 56 | 57 | Returns: 58 | torch.Tensor SBE: Positional encodings 59 | """ 60 | position = torch.arange( 61 | 0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1) 62 | index = torch.arange( 63 | 0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0) 64 | div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim)) 65 | pos_encoding = position * div_term 66 | pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1) 67 | pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1) 68 | return pos_encoding 69 | 70 | 71 | def forward(self, x): 72 | """Forward pass 73 | 74 | Args: 75 | x (torch.Tensor - NCHW): Input feature tensor 76 | 77 | Returns: 78 | torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim 79 | """ 80 | embeddings = self.embedding_convPxP(x).flatten( 81 | 2) # .shape = n,c,s = n, embedding_dim, s 82 | if self.use_class_token: 83 | # extra special token at start ? 84 | embeddings = nn.functional.pad(embeddings, (1, 0)) 85 | 86 | # change to S,N,E format required by transformer 87 | embeddings = embeddings.permute(2, 0, 1) 88 | S, N, E = embeddings.shape 89 | embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device) 90 | x = self.transformer_encoder(embeddings) # .shape = S, N, E 91 | return x 92 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/models/model_io.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | 27 | def load_state_dict(model, state_dict): 28 | """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict. 29 | 30 | DataParallel prefixes state_dict keys with 'module.' when saving. 31 | If the model is not a DataParallel model but the state_dict is, then prefixes are removed. 32 | If the model is a DataParallel model but the state_dict is not, then prefixes are added. 33 | """ 34 | state_dict = state_dict.get('model', state_dict) 35 | # if model is a DataParallel model, then state_dict keys are prefixed with 'module.' 36 | 37 | do_prefix = isinstance( 38 | model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel)) 39 | state = {} 40 | for k, v in state_dict.items(): 41 | if k.startswith('module.') and not do_prefix: 42 | k = k[7:] 43 | 44 | if not k.startswith('module.') and do_prefix: 45 | k = 'module.' + k 46 | 47 | state[k] = v 48 | 49 | model.load_state_dict(state) 50 | print("Loaded successfully") 51 | return model 52 | 53 | 54 | def load_wts(model, checkpoint_path): 55 | ckpt = torch.load(checkpoint_path, map_location='cpu') 56 | return load_state_dict(model, ckpt) 57 | 58 | 59 | def load_state_dict_from_url(model, url, **kwargs): 60 | state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs) 61 | return load_state_dict(model, state_dict) 62 | 63 | 64 | def load_state_from_resource(model, resource: str): 65 | """Loads weights to the model from a given resource. A resource can be of following types: 66 | 1. URL. Prefixed with "url::" 67 | e.g. url::http(s)://url.resource.com/ckpt.pt 68 | 69 | 2. Local path. Prefixed with "local::" 70 | e.g. local::/path/to/ckpt.pt 71 | 72 | 73 | Args: 74 | model (torch.nn.Module): Model 75 | resource (str): resource string 76 | 77 | Returns: 78 | torch.nn.Module: Model with loaded weights 79 | """ 80 | print(f"Using pretrained resource {resource}") 81 | 82 | if resource.startswith('url::'): 83 | url = resource.split('url::')[1] 84 | return load_state_dict_from_url(model, url, progress=True) 85 | 86 | elif resource.startswith('local::'): 87 | path = resource.split('local::')[1] 88 | return load_wts(model, path) 89 | 90 | else: 91 | raise ValueError("Invalid resource type, only url:: and local:: are supported") 92 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/models/zoedepth/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_v1 import ZoeDepth 26 | 27 | all_versions = { 28 | "v1": ZoeDepth, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/models/zoedepth/config_zoedepth.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepth", 4 | "version_name": "v1", 5 | "n_bins": 64, 6 | "bin_embedding_dim": 128, 7 | "bin_centers_type": "softplus", 8 | "n_attractors":[16, 8, 4, 1], 9 | "attractor_alpha": 1000, 10 | "attractor_gamma": 2, 11 | "attractor_kind" : "mean", 12 | "attractor_type" : "inv", 13 | "midas_model_type" : "DPT_BEiT_L_384", 14 | "min_temp": 0.0212, 15 | "max_temp": 50.0, 16 | "output_distribution": "logbinomial", 17 | "memory_efficient": true, 18 | "inverse_midas": false, 19 | "img_size": [384, 512] 20 | }, 21 | 22 | "train": { 23 | "train_midas": true, 24 | "use_pretrained_midas": true, 25 | "trainer": "zoedepth", 26 | "epochs": 5, 27 | "bs": 16, 28 | "optim_kwargs": {"lr": 0.000161, "wd": 0.01}, 29 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 30 | "same_lr": false, 31 | "w_si": 1, 32 | "w_domain": 0.2, 33 | "w_reg": 0, 34 | "w_grad": 0, 35 | "avoid_boundary": false, 36 | "random_crop": false, 37 | "input_width": 640, 38 | "input_height": 480, 39 | "midas_lr_factor": 1, 40 | "encoder_lr_factor":10, 41 | "pos_enc_lr_factor":10, 42 | "freeze_midas_bn": true 43 | 44 | }, 45 | 46 | "infer":{ 47 | "train_midas": false, 48 | "use_pretrained_midas": false, 49 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt", 50 | "force_keep_ar": true 51 | }, 52 | 53 | "eval":{ 54 | "train_midas": false, 55 | "use_pretrained_midas": false, 56 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt" 57 | } 58 | } -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/models/zoedepth/config_zoedepth_kitti.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "bin_centers_type": "normed", 4 | "img_size": [384, 768] 5 | }, 6 | 7 | "train": { 8 | }, 9 | 10 | "infer":{ 11 | "train_midas": false, 12 | "use_pretrained_midas": false, 13 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt", 14 | "force_keep_ar": true 15 | }, 16 | 17 | "eval":{ 18 | "train_midas": false, 19 | "use_pretrained_midas": false, 20 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt" 21 | } 22 | } -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/models/zoedepth_nk/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_nk_v1 import ZoeDepthNK 26 | 27 | all_versions = { 28 | "v1": ZoeDepthNK, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepthNK", 4 | "version_name": "v1", 5 | "bin_conf" : [ 6 | { 7 | "name": "nyu", 8 | "n_bins": 64, 9 | "min_depth": 1e-3, 10 | "max_depth": 10.0 11 | }, 12 | { 13 | "name": "kitti", 14 | "n_bins": 64, 15 | "min_depth": 1e-3, 16 | "max_depth": 80.0 17 | } 18 | ], 19 | "bin_embedding_dim": 128, 20 | "bin_centers_type": "softplus", 21 | "n_attractors":[16, 8, 4, 1], 22 | "attractor_alpha": 1000, 23 | "attractor_gamma": 2, 24 | "attractor_kind" : "mean", 25 | "attractor_type" : "inv", 26 | "min_temp": 0.0212, 27 | "max_temp": 50.0, 28 | "memory_efficient": true, 29 | "midas_model_type" : "DPT_BEiT_L_384", 30 | "img_size": [384, 512] 31 | }, 32 | 33 | "train": { 34 | "train_midas": true, 35 | "use_pretrained_midas": true, 36 | "trainer": "zoedepth_nk", 37 | "epochs": 5, 38 | "bs": 16, 39 | "optim_kwargs": {"lr": 0.0002512, "wd": 0.01}, 40 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 41 | "same_lr": false, 42 | "w_si": 1, 43 | "w_domain": 100, 44 | "avoid_boundary": false, 45 | "random_crop": false, 46 | "input_width": 640, 47 | "input_height": 480, 48 | "w_grad": 0, 49 | "w_reg": 0, 50 | "midas_lr_factor": 10, 51 | "encoder_lr_factor":10, 52 | "pos_enc_lr_factor":10 53 | }, 54 | 55 | "infer": { 56 | "train_midas": false, 57 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 58 | "use_pretrained_midas": false, 59 | "force_keep_ar": true 60 | }, 61 | 62 | "eval": { 63 | "train_midas": false, 64 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 65 | "use_pretrained_midas": false 66 | } 67 | } -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/utils/arg_utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def infer_type(x): # hacky way to infer type from string args 4 | if not isinstance(x, str): 5 | return x 6 | 7 | try: 8 | x = int(x) 9 | return x 10 | except ValueError: 11 | pass 12 | 13 | try: 14 | x = float(x) 15 | return x 16 | except ValueError: 17 | pass 18 | 19 | return x 20 | 21 | 22 | def parse_unknown(unknown_args): 23 | clean = [] 24 | for a in unknown_args: 25 | if "=" in a: 26 | k, v = a.split("=") 27 | clean.extend([k, v]) 28 | else: 29 | clean.append(a) 30 | 31 | keys = clean[::2] 32 | values = clean[1::2] 33 | return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)} 34 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/utils/easydict/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | EasyDict 3 | Copy/pasted from https://github.com/makinacorpus/easydict 4 | Original author: Mathieu Leplatre 5 | """ 6 | 7 | class EasyDict(dict): 8 | """ 9 | Get attributes 10 | 11 | >>> d = EasyDict({'foo':3}) 12 | >>> d['foo'] 13 | 3 14 | >>> d.foo 15 | 3 16 | >>> d.bar 17 | Traceback (most recent call last): 18 | ... 19 | AttributeError: 'EasyDict' object has no attribute 'bar' 20 | 21 | Works recursively 22 | 23 | >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}}) 24 | >>> isinstance(d.bar, dict) 25 | True 26 | >>> d.bar.x 27 | 1 28 | 29 | Bullet-proof 30 | 31 | >>> EasyDict({}) 32 | {} 33 | >>> EasyDict(d={}) 34 | {} 35 | >>> EasyDict(None) 36 | {} 37 | >>> d = {'a': 1} 38 | >>> EasyDict(**d) 39 | {'a': 1} 40 | >>> EasyDict((('a', 1), ('b', 2))) 41 | {'a': 1, 'b': 2} 42 | 43 | Set attributes 44 | 45 | >>> d = EasyDict() 46 | >>> d.foo = 3 47 | >>> d.foo 48 | 3 49 | >>> d.bar = {'prop': 'value'} 50 | >>> d.bar.prop 51 | 'value' 52 | >>> d 53 | {'foo': 3, 'bar': {'prop': 'value'}} 54 | >>> d.bar.prop = 'newer' 55 | >>> d.bar.prop 56 | 'newer' 57 | 58 | 59 | Values extraction 60 | 61 | >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]}) 62 | >>> isinstance(d.bar, list) 63 | True 64 | >>> from operator import attrgetter 65 | >>> list(map(attrgetter('x'), d.bar)) 66 | [1, 3] 67 | >>> list(map(attrgetter('y'), d.bar)) 68 | [2, 4] 69 | >>> d = EasyDict() 70 | >>> list(d.keys()) 71 | [] 72 | >>> d = EasyDict(foo=3, bar=dict(x=1, y=2)) 73 | >>> d.foo 74 | 3 75 | >>> d.bar.x 76 | 1 77 | 78 | Still like a dict though 79 | 80 | >>> o = EasyDict({'clean':True}) 81 | >>> list(o.items()) 82 | [('clean', True)] 83 | 84 | And like a class 85 | 86 | >>> class Flower(EasyDict): 87 | ... power = 1 88 | ... 89 | >>> f = Flower() 90 | >>> f.power 91 | 1 92 | >>> f = Flower({'height': 12}) 93 | >>> f.height 94 | 12 95 | >>> f['power'] 96 | 1 97 | >>> sorted(f.keys()) 98 | ['height', 'power'] 99 | 100 | update and pop items 101 | >>> d = EasyDict(a=1, b='2') 102 | >>> e = EasyDict(c=3.0, a=9.0) 103 | >>> d.update(e) 104 | >>> d.c 105 | 3.0 106 | >>> d['c'] 107 | 3.0 108 | >>> d.get('c') 109 | 3.0 110 | >>> d.update(a=4, b=4) 111 | >>> d.b 112 | 4 113 | >>> d.pop('a') 114 | 4 115 | >>> d.a 116 | Traceback (most recent call last): 117 | ... 118 | AttributeError: 'EasyDict' object has no attribute 'a' 119 | """ 120 | def __init__(self, d=None, **kwargs): 121 | if d is None: 122 | d = {} 123 | else: 124 | d = dict(d) 125 | if kwargs: 126 | d.update(**kwargs) 127 | for k, v in d.items(): 128 | setattr(self, k, v) 129 | # Class attributes 130 | for k in self.__class__.__dict__.keys(): 131 | if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'): 132 | setattr(self, k, getattr(self, k)) 133 | 134 | def __setattr__(self, name, value): 135 | if isinstance(value, (list, tuple)): 136 | value = [self.__class__(x) 137 | if isinstance(x, dict) else x for x in value] 138 | elif isinstance(value, dict) and not isinstance(value, self.__class__): 139 | value = self.__class__(value) 140 | super(EasyDict, self).__setattr__(name, value) 141 | super(EasyDict, self).__setitem__(name, value) 142 | 143 | __setitem__ = __setattr__ 144 | 145 | def update(self, e=None, **f): 146 | d = e or dict() 147 | d.update(f) 148 | for k in d: 149 | setattr(self, k, d[k]) 150 | 151 | def pop(self, k, d=None): 152 | delattr(self, k) 153 | return super(EasyDict, self).pop(k, d) 154 | 155 | 156 | if __name__ == "__main__": 157 | import doctest 158 | doctest.testmod() -------------------------------------------------------------------------------- /scripts/deforum_helpers/src/zoedepth/utils/geometry.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import numpy as np 26 | 27 | def get_intrinsics(H,W): 28 | """ 29 | Intrinsics for a pinhole camera model. 30 | Assume fov of 55 degrees and central principal point. 31 | """ 32 | f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0) 33 | cx = 0.5 * W 34 | cy = 0.5 * H 35 | return np.array([[f, 0, cx], 36 | [0, f, cy], 37 | [0, 0, 1]]) 38 | 39 | def depth_to_points(depth, R=None, t=None): 40 | 41 | K = get_intrinsics(depth.shape[1], depth.shape[2]) 42 | Kinv = np.linalg.inv(K) 43 | if R is None: 44 | R = np.eye(3) 45 | if t is None: 46 | t = np.zeros(3) 47 | 48 | # M converts from your coordinate to PyTorch3D's coordinate system 49 | M = np.eye(3) 50 | M[0, 0] = -1.0 51 | M[1, 1] = -1.0 52 | 53 | height, width = depth.shape[1:3] 54 | 55 | x = np.arange(width) 56 | y = np.arange(height) 57 | coord = np.stack(np.meshgrid(x, y), -1) 58 | coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1) # z=1 59 | coord = coord.astype(np.float32) 60 | # coord = torch.as_tensor(coord, dtype=torch.float32, device=device) 61 | coord = coord[None] # bs, h, w, 3 62 | 63 | D = depth[:, :, :, None, None] 64 | # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape ) 65 | pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None] 66 | # pts3D_1 live in your coordinate system. Convert them to Py3D's 67 | pts3D_1 = M[None, None, None, ...] @ pts3D_1 68 | # from reference to targe tviewpoint 69 | pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None] 70 | # pts3D_2 = pts3D_1 71 | # depth_2 = pts3D_2[:, :, :, 2, :] # b,1,h,w 72 | return pts3D_2[:, :, :, :3, 0][0] 73 | 74 | 75 | def create_triangles(h, w, mask=None): 76 | """ 77 | Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68 78 | Creates mesh triangle indices from a given pixel grid size. 79 | This function is not and need not be differentiable as triangle indices are 80 | fixed. 81 | Args: 82 | h: (int) denoting the height of the image. 83 | w: (int) denoting the width of the image. 84 | Returns: 85 | triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3) 86 | """ 87 | x, y = np.meshgrid(range(w - 1), range(h - 1)) 88 | tl = y * w + x 89 | tr = y * w + x + 1 90 | bl = (y + 1) * w + x 91 | br = (y + 1) * w + x + 1 92 | triangles = np.array([tl, bl, tr, br, tr, bl]) 93 | triangles = np.transpose(triangles, (1, 2, 0)).reshape( 94 | ((w - 1) * (h - 1) * 2, 3)) 95 | if mask is not None: 96 | mask = mask.reshape(-1) 97 | triangles = triangles[mask[triangles].all(1)] 98 | return triangles 99 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/ui_left.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | from types import SimpleNamespace 18 | import gradio as gr 19 | from .defaults import get_gradio_html 20 | from .gradio_funcs import change_css, handle_change_functions 21 | from .args import DeforumArgs, DeforumAnimArgs, ParseqArgs, DeforumOutputArgs, RootArgs, LoopArgs 22 | from .deforum_controlnet import setup_controlnet_ui 23 | from .ui_elements import get_tab_run, get_tab_keyframes, get_tab_prompts, get_tab_init, get_tab_hybrid, get_tab_output 24 | 25 | def set_arg_lists(): 26 | # convert dicts to NameSpaces for easy working (args.param instead of args['param'] 27 | d = SimpleNamespace(**DeforumArgs()) # default args 28 | da = SimpleNamespace(**DeforumAnimArgs()) # default anim args 29 | dp = SimpleNamespace(**ParseqArgs()) # default parseq ars 30 | dv = SimpleNamespace(**DeforumOutputArgs()) # default video args 31 | dr = SimpleNamespace(**RootArgs()) # ROOT args 32 | dloopArgs = SimpleNamespace(**LoopArgs()) # Guided imgs args 33 | return d, da, dp, dv, dr, dloopArgs 34 | 35 | def setup_deforum_left_side_ui(): 36 | d, da, dp, dv, dr, dloopArgs = set_arg_lists() 37 | # set up main info accordion on top of the UI 38 | with gr.Accordion("Info, Links and Help", open=False, elem_id='main_top_info_accord'): 39 | gr.HTML(value=get_gradio_html('main')) 40 | # show button to hide/ show gradio's info texts for each element in the UI 41 | with gr.Row(variant='compact'): 42 | show_info_on_ui = gr.Checkbox(label="Show more info", value=d.show_info_on_ui, interactive=True) 43 | with gr.Blocks(): 44 | with gr.Tabs(): 45 | # Get main tab contents: 46 | tab_run_params = get_tab_run(d, da) # Run tab 47 | tab_keyframes_params = get_tab_keyframes(d, da, dloopArgs) # Keyframes tab 48 | tab_prompts_params = get_tab_prompts(da) # Prompts tab 49 | tab_init_params = get_tab_init(d, da, dp) # Init tab 50 | controlnet_dict = setup_controlnet_ui() # ControlNet tab 51 | tab_hybrid_params = get_tab_hybrid(da) # Hybrid tab 52 | tab_output_params = get_tab_output(da, dv) # Output tab 53 | # add returned gradio elements from main tabs to locals() 54 | for key, value in {**tab_run_params, **tab_keyframes_params, **tab_prompts_params, **tab_init_params, **controlnet_dict, **tab_hybrid_params, **tab_output_params}.items(): 55 | locals()[key] = value 56 | 57 | # Gradio's Change functions - hiding and renaming elements based on other elements 58 | show_info_on_ui.change(fn=change_css, inputs=show_info_on_ui, outputs=gr.outputs.HTML()) 59 | handle_change_functions(locals()) 60 | 61 | return locals() 62 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/ui_settings.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import gradio as gr 18 | from modules import ui_components 19 | from modules.shared import opts, cmd_opts, OptionInfo 20 | from .video_audio_utilities import find_ffmpeg_binary 21 | from .subtitle_handler import get_user_values 22 | 23 | def on_ui_settings(): 24 | srt_ui_params = get_user_values() 25 | section = ('deforum', "Deforum") 26 | opts.add_option("deforum_keep_3d_models_in_vram", OptionInfo(False, "Keep 3D models in VRAM between runs", gr.Checkbox, {"interactive": True, "visible": True if not (cmd_opts.lowvram or cmd_opts.medvram) else False}, section=section)) 27 | opts.add_option("deforum_enable_persistent_settings", OptionInfo(False, "Keep settings persistent upon relaunch of webui", gr.Checkbox, {"interactive": True}, section=section)) 28 | opts.add_option("deforum_persistent_settings_path", OptionInfo("models/Deforum/deforum_persistent_settings.txt", "Path for saving your persistent settings file:", section=section)) 29 | opts.add_option("deforum_ffmpeg_location", OptionInfo(find_ffmpeg_binary(), "FFmpeg path/ location", section=section)) 30 | opts.add_option("deforum_ffmpeg_crf", OptionInfo(17, "FFmpeg CRF value", gr.Slider, {"interactive": True, "minimum": 0, "maximum": 51}, section=section)) 31 | opts.add_option("deforum_ffmpeg_preset", OptionInfo('slow', "FFmpeg Preset", gr.Dropdown, {"interactive": True, "choices": ['veryslow', 'slower', 'slow', 'medium', 'fast', 'faster', 'veryfast', 'superfast', 'ultrafast']}, section=section)) 32 | opts.add_option("deforum_debug_mode_enabled", OptionInfo(False, "Enable Dev mode - adds extra reporting in console", gr.Checkbox, {"interactive": True}, section=section)) 33 | opts.add_option("deforum_save_gen_info_as_srt", OptionInfo(False, "Save an .srt (subtitles) file with the generation info along with each animation", gr.Checkbox, {"interactive": True}, section=section)) 34 | opts.add_option("deforum_embed_srt", OptionInfo(False, "If .srt file is saved, soft-embed the subtitles into the rendered video file", gr.Checkbox, {"interactive": True}, section=section)) 35 | opts.add_option("deforum_save_gen_info_as_srt_params", OptionInfo(['Noise Schedule'], "Choose which animation params are to be saved to the .srt file (Frame # and Seed will always be saved):", ui_components.DropdownMulti, lambda: {"interactive": True, "choices": srt_ui_params}, section=section)) 36 | opts.add_option("deforum_preview", OptionInfo("Off", "Generate preview video during generation? (Preview does not include frame interpolation or upscaling.)", gr.Dropdown, {"interactive": True, "choices": ['Off', 'On', 'On, concurrent (don\'t pause generation)']}, section=section)) 37 | opts.add_option("deforum_preview_interval_frames", OptionInfo(100, "Generate preview every N frames", gr.Slider, {"interactive": True, "minimum": 10, "maximum": 500}, section=section)) 38 | -------------------------------------------------------------------------------- /scripts/deforum_helpers/webui_sd_pipeline.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | from modules.processing import StableDiffusionProcessingImg2Img 18 | from modules.shared import opts, sd_model 19 | import os 20 | 21 | def get_webui_sd_pipeline(args, root): 22 | # Set up the pipeline 23 | p = StableDiffusionProcessingImg2Img( 24 | sd_model=sd_model, 25 | outpath_samples = opts.outdir_samples or opts.outdir_img2img_samples, 26 | ) # we'll set up the rest later 27 | 28 | os.makedirs(args.outdir, exist_ok=True) 29 | p.width, p.height = map(lambda x: x - x % 8, (args.W, args.H)) 30 | p.steps = args.steps 31 | p.seed = args.seed 32 | p.sampler_name = args.sampler 33 | p.tiling = args.tiling 34 | p.restore_faces = args.restore_faces 35 | p.subseed = root.subseed 36 | p.subseed_strength = root.subseed_strength 37 | p.seed_resize_from_w = args.seed_resize_from_w 38 | p.seed_resize_from_h = args.seed_resize_from_h 39 | p.fill = args.fill 40 | p.batch_size = 1 # b.size 1 as this is DEFORUM :) 41 | p.seed = args.seed 42 | p.do_not_save_samples = True # Setting this to False will trigger webui's saving mechanism - and we will end up with duplicated files, and another folder within our destination folder - big no no. 43 | p.scheduler = args.scheduler 44 | p.mask_blur = args.mask_overlay_blur 45 | p.extra_generation_params["Mask blur"] = args.mask_overlay_blur 46 | p.n_iter = 1 47 | p.steps = args.steps 48 | p.denoising_strength = 1 - args.strength 49 | p.cfg_scale = args.scale 50 | p.image_cfg_scale = args.pix2pix_img_cfg_scale 51 | p.outpath_samples = args.outdir 52 | 53 | return p -------------------------------------------------------------------------------- /scripts/deforum_helpers/word_masking.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import os 18 | import torch 19 | from PIL import Image 20 | from torchvision import transforms 21 | from torch.nn.functional import interpolate 22 | import cv2 23 | 24 | preclipseg_transform = transforms.Compose([ 25 | transforms.ToTensor(), 26 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 27 | transforms.Resize((512, 512)), #TODO: check if the size is hardcoded 28 | ]) 29 | 30 | def find_clipseg(): 31 | basedirs = [os.getcwd()] 32 | src_basedirs = [] 33 | for basedir in basedirs: 34 | src_basedirs.append(os.path.join(os.path.sep.join(os.path.abspath(__file__).split(os.path.sep)[:-2]), 'deforum_helpers', 'src')) 35 | 36 | for basedir in src_basedirs: 37 | pth = os.path.join(basedir, './clipseg/weights/rd64-uni.pth') 38 | if os.path.exists(pth): 39 | return pth 40 | raise Exception('CLIPseg weights not found!') 41 | 42 | def setup_clipseg(root): 43 | from clipseg.models.clipseg import CLIPDensePredT 44 | model = CLIPDensePredT(version='ViT-B/16', reduce_dim=64) 45 | model.eval() 46 | model.load_state_dict(torch.load(find_clipseg(), map_location=root.device), strict=False) 47 | 48 | model.to(root.device) 49 | root.clipseg_model = model 50 | 51 | def get_word_mask(root, frame, word_mask): 52 | if root.clipseg_model is None: 53 | setup_clipseg(root) 54 | img = preclipseg_transform(frame).to(root.device, dtype=torch.float32) 55 | word_masks = [word_mask] 56 | with torch.no_grad(): 57 | preds = root.clipseg_model(img.repeat(len(word_masks),1,1,1), word_masks)[0] 58 | 59 | mask = torch.sigmoid(preds[0][0]).unsqueeze(0).unsqueeze(0) # add batch, channels dims 60 | resized_mask = interpolate(mask, size=(frame.size[1], frame.size[0]), mode='bicubic').squeeze() # rescale mask back to the target resolution 61 | numpy_array = resized_mask.multiply(255).to(dtype=torch.uint8,device='cpu').numpy() 62 | return Image.fromarray(cv2.threshold(numpy_array, 32, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]) 63 | -------------------------------------------------------------------------------- /style.css: -------------------------------------------------------------------------------- 1 | /* 2 | # Copyright (C) 2023 Deforum LLC 3 | # 4 | # This program is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU Affero General Public License as published by 6 | # the Free Software Foundation, version 3 of the License. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU Affero General Public License 14 | # along with this program. If not, see . 15 | 16 | Contact the authors: https://deforum.github.io/ 17 | */ 18 | 19 | #vid_to_interpolate_chosen_file .w-full, #pics_to_interpolate_chosen_file .w-full, #vid_to_upscale_chosen_file .w-full, #controlnet_input_video_chosen_file .w-full, #controlnet_input_video_mask_chosen_file .w-full,#vid_to_depth_chosen_file .w-full { 20 | display: flex !important; 21 | align-items: flex-start !important; 22 | justify-content: center !important; 23 | } 24 | 25 | #tab_deforum_interface #hybrid_msg_html { 26 | color: Tomato !important; 27 | margin-top: 5px !important; 28 | text-align: center !important; 29 | font-size: 20px !important; 30 | font-weight: bold !important; 31 | } 32 | 33 | #tab_deforum_interface #leres_license_msg { 34 | color: GoldenRod; 35 | } 36 | 37 | #image_buttons_deforum #img2img_tab, 38 | #image_buttons_deforum #inpaint_tab, 39 | #image_buttons_deforum #extras_tab, 40 | #save_zip_deforum, #save_deforum { 41 | display: none !important; 42 | } 43 | 44 | #main_top_info_accord .label-wrap { 45 | gap:2px; 46 | padding: 0.5rem; 47 | } 48 | #tab_deforum_interface #controlnet_not_found_html_msg, #tab_deforum_interface #depth_warp_msg_html { 49 | color: Tomato; 50 | } 51 | 52 | #below_interpolate_butts_msg { 53 | text-align: center !important; 54 | } 55 | 56 | #tab_deforum_interface #settings_path_msg { 57 | margin: 0.6em; 58 | display: flex; 59 | align-items: flex-start; 60 | justify-content: center; 61 | } 62 | 63 | #tab_deforum_interface .tabs.gradio-tabs.svelte-1g805jl .svelte-vt1mxs.gap { 64 | gap:4px !important; 65 | } 66 | 67 | #tab_deforum_interface #main_top_info_accord { 68 | padding: 1px; 69 | } 70 | 71 | #add_soundtrack .svelte-1p9xokt { 72 | padding: 2.25px; 73 | } 74 | 75 | #tab_deforum_interface .wrap.svelte-xwlu1w, #custom_setting_file { 76 | height: 85px !important; 77 | min-height: 85px !important; 78 | } 79 | 80 | #tab_deforum_interface .file-preview-holder { 81 | overflow-y: auto; 82 | max-height: 60px; 83 | } -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | import pytest 18 | import subprocess 19 | import sys 20 | import os 21 | from subprocess import Popen, PIPE, STDOUT 22 | from pathlib import Path 23 | from tenacity import retry, stop_after_delay, wait_fixed 24 | import threading 25 | import requests 26 | 27 | def pytest_addoption(parser): 28 | parser.addoption("--start-server", action="store_true", help="start the server before the test run (if not specified, you must start the server manually)") 29 | 30 | @pytest.fixture 31 | def cmdopt(request): 32 | return request.config.getoption("--start-server") 33 | 34 | @retry(wait=wait_fixed(5), stop=stop_after_delay(60)) 35 | def wait_for_service(url): 36 | response = requests.get(url, timeout=(5, 5)) 37 | print(f"Waiting for server to respond 200 at {url} (response: {response.status_code})...") 38 | assert response.status_code == 200 39 | 40 | @pytest.fixture(scope="session", autouse=True) 41 | def start_server(request): 42 | if request.config.getoption("--start-server"): 43 | 44 | # Kick off server subprocess 45 | script_directory = os.path.dirname(__file__) 46 | a1111_directory = Path(script_directory).parent.parent.parent # sd-webui/extensions/deforum/tests/ -> sd-webui 47 | print(f"Starting server in {a1111_directory}...") 48 | proc = Popen(["python", "-m", "coverage", "run", "--data-file=.coverage.server", "launch.py", 49 | "--skip-prepare-environment", "--skip-torch-cuda-test", "--test-server", "--no-half", 50 | "--disable-opt-split-attention", "--use-cpu", "all", "--add-stop-route", "--api", "--deforum-api", "--listen"], 51 | cwd=a1111_directory, 52 | stdout=PIPE, 53 | stderr=STDOUT, 54 | universal_newlines=True) 55 | 56 | # ensure server is killed at the end of the test run 57 | request.addfinalizer(proc.kill) 58 | 59 | # Spin up separate thread to capture the server output to file and stdout 60 | def server_console_manager(): 61 | with proc.stdout, open('serverlog.txt', 'ab') as logfile: 62 | for line in proc.stdout: 63 | sys.stdout.write(f"[SERVER LOG] {line}") 64 | sys.stdout.flush() 65 | logfile.write(line.encode('utf-8')) 66 | logfile.flush() 67 | proc.wait() 68 | 69 | threading.Thread(target=server_console_manager).start() 70 | 71 | # Wait for deforum API to respond 72 | wait_for_service('http://localhost:7860/deforum_api/jobs/') 73 | 74 | else: 75 | print("Checking server is already running / waiting for it to come up...") 76 | wait_for_service('http://localhost:7860/deforum_api/jobs/') -------------------------------------------------------------------------------- /tests/testdata/example_init_vid.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deforum-art/sd-webui-deforum/5d63a339dbec8d476657a1f672a4eeb6dc79ed37/tests/testdata/example_init_vid.mp4 -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2023 Deforum LLC 2 | # 3 | # This program is free software: you can redistribute it and/or modify 4 | # it under the terms of the GNU Affero General Public License as published by 5 | # the Free Software Foundation, version 3 of the License. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Affero General Public License 13 | # along with this program. If not, see . 14 | 15 | # Contact the authors: https://deforum.github.io/ 16 | 17 | from tenacity import retry, stop_after_delay, wait_fixed 18 | from pydantic_requests import PydanticSession 19 | import requests 20 | from scripts.deforum_api_models import DeforumJobStatus, DeforumJobStatusCategory, DeforumJobPhase 21 | 22 | SERVER_BASE_URL = "http://localhost:7860" 23 | API_ROOT = "/deforum_api" 24 | API_BASE_URL = SERVER_BASE_URL + API_ROOT 25 | 26 | @retry(wait=wait_fixed(2), stop=stop_after_delay(900)) 27 | def wait_for_job_to_complete(id : str): 28 | with PydanticSession( 29 | {200: DeforumJobStatus}, headers={"accept": "application/json"} 30 | ) as session: 31 | response = session.get(API_BASE_URL+"/jobs/"+id) 32 | response.raise_for_status() 33 | jobStatus : DeforumJobStatus = response.model 34 | print(f"Waiting for job {id}: status={jobStatus.status}; phase={jobStatus.phase}; execution_time:{jobStatus.execution_time}s") 35 | assert jobStatus.status != DeforumJobStatusCategory.ACCEPTED 36 | return jobStatus 37 | 38 | @retry(wait=wait_fixed(1), stop=stop_after_delay(120)) 39 | def wait_for_job_to_enter_phase(id : str, phase : DeforumJobPhase): 40 | with PydanticSession( 41 | {200: DeforumJobStatus}, headers={"accept": "application/json"} 42 | ) as session: 43 | response = session.get(API_BASE_URL+"/jobs/"+id) 44 | response.raise_for_status() 45 | jobStatus : DeforumJobStatus = response.model 46 | print(f"Waiting for job {id} to enter phase {phase}. Currently: status={jobStatus.status}; phase={jobStatus.phase}; execution_time:{jobStatus.execution_time}s") 47 | assert jobStatus.phase != phase 48 | return jobStatus 49 | 50 | @retry(wait=wait_fixed(1), stop=stop_after_delay(120)) 51 | def wait_for_job_to_enter_status(id : str, status : DeforumJobStatusCategory): 52 | with PydanticSession( 53 | {200: DeforumJobStatus}, headers={"accept": "application/json"} 54 | ) as session: 55 | response = session.get(API_BASE_URL+"/jobs/"+id) 56 | response.raise_for_status() 57 | jobStatus : DeforumJobStatus = response.model 58 | print(f"Waiting for job {id} to enter status {status}. Currently: status={jobStatus.status}; phase={jobStatus.phase}; execution_time:{jobStatus.execution_time}s") 59 | assert jobStatus.status == status 60 | return jobStatus 61 | 62 | 63 | def gpu_disabled(): 64 | response = requests.get(SERVER_BASE_URL+"/sdapi/v1/cmd-flags") 65 | response.raise_for_status() 66 | cmd_flags = response.json() 67 | return cmd_flags["use_cpu"] == ["all"] 68 | 69 | 70 | 71 | 72 | --------------------------------------------------------------------------------