├── .github
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.yml
│ ├── config.yml
│ └── feature_request.yml
├── scripts
│ └── issue_checker.py
└── workflows
│ ├── issue_checker.yaml
│ └── run_tests.yaml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── install.py
├── javascript
├── deforum-hints.js
└── deforum.js
├── preload.py
├── pytest.ini
├── requirements-dev.txt
├── requirements.txt
├── scripts
├── default_settings.txt
├── deforum.py
├── deforum_api.py
├── deforum_api_models.py
├── deforum_extend_paths.py
└── deforum_helpers
│ ├── 114763196.jpg
│ ├── RAFT.py
│ ├── animation.py
│ ├── animation_key_frames.py
│ ├── args.py
│ ├── auto_navigation.py
│ ├── colors.py
│ ├── composable_masks.py
│ ├── consistency_check.py
│ ├── defaults.py
│ ├── deforum_controlnet.py
│ ├── deforum_controlnet_gradio.py
│ ├── deforum_tqdm.py
│ ├── deprecation_utils.py
│ ├── depth.py
│ ├── depth_adabins.py
│ ├── depth_leres.py
│ ├── depth_midas.py
│ ├── depth_zoe.py
│ ├── frame_interpolation.py
│ ├── general_utils.py
│ ├── generate.py
│ ├── gradio_funcs.py
│ ├── human_masking.py
│ ├── hybrid_video.py
│ ├── image_sharpening.py
│ ├── load_images.py
│ ├── masks.py
│ ├── noise.py
│ ├── opts_overrider.py
│ ├── parseq_adapter.py
│ ├── parseq_adapter_test.py
│ ├── prompt.py
│ ├── render.py
│ ├── render_modes.py
│ ├── resume.py
│ ├── rich.py
│ ├── run_deforum.py
│ ├── save_images.py
│ ├── seed.py
│ ├── settings.py
│ ├── src
│ ├── adabins
│ │ ├── __init__.py
│ │ ├── layers.py
│ │ ├── miniViT.py
│ │ └── unet_adaptive_bins.py
│ ├── clipseg
│ │ ├── LICENSE
│ │ ├── Quickstart.ipynb
│ │ ├── Readme.md
│ │ ├── Tables.ipynb
│ │ ├── Visual_Feature_Engineering.ipynb
│ │ ├── datasets
│ │ │ ├── coco_wrapper.py
│ │ │ ├── pascal_classes.json
│ │ │ ├── pascal_zeroshot.py
│ │ │ ├── pfe_dataset.py
│ │ │ ├── phrasecut.py
│ │ │ └── utils.py
│ │ ├── environment.yml
│ │ ├── evaluation_utils.py
│ │ ├── example_image.jpg
│ │ ├── experiments
│ │ │ ├── ablation.yaml
│ │ │ ├── coco.yaml
│ │ │ ├── pascal_1shot.yaml
│ │ │ └── phrasecut.yaml
│ │ ├── general_utils.py
│ │ ├── metrics.py
│ │ ├── models
│ │ │ ├── clipseg.py
│ │ │ └── vitseg.py
│ │ ├── overview.png
│ │ ├── score.py
│ │ ├── setup.py
│ │ ├── training.py
│ │ └── weights
│ │ │ └── rd64-uni.pth
│ ├── film_interpolation
│ │ ├── film_inference.py
│ │ └── film_util.py
│ ├── infer.py
│ ├── leres
│ │ └── lib
│ │ │ ├── Resnet.py
│ │ │ ├── Resnext_torch.py
│ │ │ ├── __init__.py
│ │ │ ├── multi_depth_model_woauxi.py
│ │ │ ├── net_tools.py
│ │ │ ├── network_auxi.py
│ │ │ ├── spvcnn_classsification.py
│ │ │ ├── spvcnn_utils.py
│ │ │ └── test_utils.py
│ ├── midas
│ │ ├── backbones
│ │ │ ├── beit.py
│ │ │ ├── levit.py
│ │ │ ├── next_vit.py
│ │ │ ├── swin.py
│ │ │ ├── swin2.py
│ │ │ ├── swin_common.py
│ │ │ ├── utils.py
│ │ │ └── vit.py
│ │ ├── base_model.py
│ │ ├── blocks.py
│ │ ├── dpt_depth.py
│ │ ├── midas_net.py
│ │ ├── midas_net_custom.py
│ │ ├── model_loader.py
│ │ ├── transforms.py
│ │ └── vit.py
│ ├── model_io.py
│ ├── py3d_tools.py
│ ├── rife
│ │ ├── inference_video.py
│ │ ├── model
│ │ │ ├── loss.py
│ │ │ ├── pytorch_msssim
│ │ │ │ └── __init__.py
│ │ │ └── warplayer.py
│ │ └── rife_new_gen
│ │ │ ├── IFNet_HDv3.py
│ │ │ ├── RIFE_HDv3.py
│ │ │ └── refine.py
│ ├── utils.py
│ └── zoedepth
│ │ ├── data
│ │ ├── __init__.py
│ │ ├── data_mono.py
│ │ ├── ddad.py
│ │ ├── diml_indoor_test.py
│ │ ├── diml_outdoor_test.py
│ │ ├── diode.py
│ │ ├── hypersim.py
│ │ ├── ibims.py
│ │ ├── preprocess.py
│ │ ├── sun_rgbd_loader.py
│ │ ├── transforms.py
│ │ ├── vkitti.py
│ │ └── vkitti2.py
│ │ ├── models
│ │ ├── __init__.py
│ │ ├── base_models
│ │ │ ├── __init__.py
│ │ │ └── midas.py
│ │ ├── builder.py
│ │ ├── depth_model.py
│ │ ├── layers
│ │ │ ├── attractor.py
│ │ │ ├── dist_layers.py
│ │ │ ├── localbins_layers.py
│ │ │ └── patch_transformer.py
│ │ ├── model_io.py
│ │ ├── zoedepth
│ │ │ ├── __init__.py
│ │ │ ├── config_zoedepth.json
│ │ │ ├── config_zoedepth_kitti.json
│ │ │ └── zoedepth_v1.py
│ │ └── zoedepth_nk
│ │ │ ├── __init__.py
│ │ │ ├── config_zoedepth_nk.json
│ │ │ └── zoedepth_nk_v1.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── arg_utils.py
│ │ ├── config.py
│ │ ├── easydict
│ │ └── __init__.py
│ │ ├── geometry.py
│ │ └── misc.py
│ ├── subtitle_handler.py
│ ├── ui_elements.py
│ ├── ui_left.py
│ ├── ui_right.py
│ ├── ui_settings.py
│ ├── upscaling.py
│ ├── vid2depth.py
│ ├── video_audio_utilities.py
│ ├── webui_sd_pipeline.py
│ └── word_masking.py
├── style.css
└── tests
├── __snapshots__
├── deforum_postprocess_test.ambr
└── deforum_test.ambr
├── conftest.py
├── deforum_postprocess_test.py
├── deforum_test.py
├── testdata
├── example_init_vid.mp4
├── parseq.json
└── simple.input_settings.txt
└── utils.py
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: deforum
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 | - name: Deforum Github discussions
4 | url: https://github.com/deforum-art/deforum-for-automatic1111-webui/discussions
5 | about: Please ask and answer questions here. If you want to complain about something, don't try to circumvent issue filling by starting a discussion here 🙃
6 | - name: Deforum Discord
7 | url: https://discord.gg/deforum
8 | about: Here is our main community where we chat, discuss development and share experiments and results
9 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
1 | name: Feature request
2 | description: Suggest an idea for the Deforum extension
3 | title: "[Feature Request]: "
4 | labels: ["enhancement"]
5 |
6 | body:
7 | - type: checkboxes
8 | attributes:
9 | label: Is there an existing issue for this?
10 | description: Please search to see if an issue already exists for the feature you want, and that it's not implemented in a recent build/commit.
11 | options:
12 | - label: I have searched the existing issues and checked the recent builds/commits
13 | required: true
14 | - type: markdown
15 | attributes:
16 | value: |
17 | *Please fill this form with as much information as possible, provide screenshots and/or illustrations of the feature if possible*
18 | - type: textarea
19 | id: feature
20 | attributes:
21 | label: What would your feature do ?
22 | description: Tell us about your feature in a very clear and simple way, and what problem it would solve
23 | validations:
24 | required: true
25 | - type: textarea
26 | id: workflow
27 | attributes:
28 | label: Proposed workflow
29 | description: Please provide us with step by step information on how you'd like the feature to be accessed and used
30 | value: |
31 | 1. Go to ....
32 | 2. Press ....
33 | 3. ...
34 | validations:
35 | required: true
36 | - type: textarea
37 | id: misc
38 | attributes:
39 | label: Additional information
40 | description: Add any other context or screenshots about the feature request here.
41 | - type: textarea
42 | attributes:
43 | label: Are you going to help adding it?
44 | description: Do you want to participate in Deforum development and bring the desired feature sooner? Let us know if you are willing to add the desired feature, ideally, leave your Discord handle here, so we will contact you for a less formal conversation. Our community is welcoming and ready to provide you with any information on the project structure or how the code works. If not, however, keep in mind that if you do not want to do your new feature yourself, you will have to wait until the team picks up your issue.
45 | validations:
46 | required: true
47 |
--------------------------------------------------------------------------------
/.github/workflows/issue_checker.yaml:
--------------------------------------------------------------------------------
1 | name: Issue Checker
2 |
3 | on:
4 | issues:
5 | types: [opened, reopened, edited]
6 |
7 | jobs:
8 | check_issue:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - name: Checkout repository
12 | uses: actions/checkout@v3
13 | - name: Set up Python
14 | uses: actions/setup-python@v3
15 | with:
16 | python-version: '3.x'
17 | - name: Install dependencies
18 | run: pip install PyGithub
19 | - name: Check issue
20 | env:
21 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22 | ISSUE_NUMBER: ${{ github.event.number }}
23 | run: python .github/scripts/issue_checker.py
24 |
--------------------------------------------------------------------------------
/.github/workflows/run_tests.yaml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | - push
5 | - pull_request
6 |
7 | jobs:
8 | test:
9 | name: tests on CPU with empty model
10 | runs-on: ubuntu-latest
11 | if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
12 | steps:
13 | - name: Checkout a1111
14 | uses: actions/checkout@v3
15 | with:
16 | repository: AUTOMATIC1111/stable-diffusion-webui
17 | ref: v1.6.0
18 | - name: Checkout Controlnet extension
19 | uses: actions/checkout@v3
20 | with:
21 | repository: Mikubill/sd-webui-controlnet
22 | path: extensions/sd-webui-controlnet
23 | - name: Checkout Deforum
24 | uses: actions/checkout@v3
25 | with:
26 | path: extensions/deforum
27 | - name: Set up Python 3.10
28 | uses: actions/setup-python@v4
29 | with:
30 | python-version: 3.10.6
31 | cache: pip
32 | cache-dependency-path: |
33 | **/requirements*txt
34 | launch.py
35 | - name: Install test dependencies
36 | run: pip install wait-for-it -r extensions/deforum/requirements-dev.txt
37 | env:
38 | PIP_DISABLE_PIP_VERSION_CHECK: "1"
39 | PIP_PROGRESS_BAR: "off"
40 | - name: Setup environment
41 | run: python launch.py --skip-torch-cuda-test --exit
42 | env:
43 | PIP_DISABLE_PIP_VERSION_CHECK: "1"
44 | PIP_PROGRESS_BAR: "off"
45 | TORCH_INDEX_URL: https://download.pytorch.org/whl/cpu
46 | WEBUI_LAUNCH_LIVE_OUTPUT: "1"
47 | PYTHONUNBUFFERED: "1"
48 | - name: Start test server
49 | run: >
50 | python -m coverage run
51 | --data-file=.coverage.server
52 | launch.py
53 | --skip-prepare-environment
54 | --skip-torch-cuda-test
55 | --test-server
56 | --do-not-download-clip
57 | --no-half
58 | --disable-opt-split-attention
59 | --use-cpu all
60 | --api-server-stop
61 | --deforum-api
62 | --api
63 | 2>&1 | tee serverlog.txt &
64 | - name: Run tests (with continue-on-error due to mysterious non-zero return code on success)
65 | continue-on-error: true
66 | id: runtests
67 | run: |
68 | wait-for-it --service 127.0.0.1:7860 -t 600
69 | cd extensions/deforum
70 | python -m coverage run --data-file=.coverage.client -m pytest -vv --junitxml=tests/results.xml tests
71 | - name: Check for test failures (necessary because of continue-on-error above)
72 | id: testresults
73 | uses: mavrosxristoforos/get-xml-info@1.1.0
74 | with:
75 | xml-file: 'extensions/deforum/tests/results.xml'
76 | xpath: '//testsuite/@failures'
77 | - name: Fail if there were test failures
78 | run: |
79 | echo "Test failures: ${{ steps.testresults.outputs.info }}"
80 | [ ${{ steps.testresults.outputs.info }} -eq 0 ]
81 | - name: Kill test server
82 | if: always()
83 | run: curl -vv -XPOST http://127.0.0.1:7860/sdapi/v1/server-stop && sleep 10
84 | - name: Show coverage
85 | run: |
86 | python -m coverage combine .coverage* extensions/deforum/.coverage*
87 | python -m coverage report -i
88 | python -m coverage html -i
89 | - name: Upload main app output
90 | uses: actions/upload-artifact@v3
91 | if: always()
92 | with:
93 | name: serverlog
94 | path: serverlog.txt
95 | - name: Upload coverage HTML
96 | uses: actions/upload-artifact@v3
97 | if: always()
98 | with:
99 | name: htmlcov
100 | path: htmlcov
101 | - name: Surface failing tests
102 | if: always()
103 | uses: pmeier/pytest-results-action@main
104 | with:
105 | path: extensions/deforum/tests/results.xml
106 | summary: true
107 | display-options: fEX
108 | fail-on-empty: true
109 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | # Unnecessary compiled python files.
18 | __pycache__
19 | *.pyc
20 | *.pyo
21 |
22 | # Output Images
23 | outputs
24 |
25 | # Log files for colab-convert
26 | cc-outputs.log
27 | *.safetensors
28 | scripts/deforum_helpers/navigation.py
29 |
30 | #test output
31 | htmlcov
32 | tests/results.xml
33 | .coverage*
34 | serverlog.txt
35 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | When contributing please ping the devs via Discord https://discord.gg/deforum to make sure you addition will fit well such a large project and to get help if needed.
4 |
5 | *By contributing to this project you agree that your work will be granted copyright to Deforum LLC and licensed under the terms of the GNU Affero General Public License version 3.*
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Deforum Stable Diffusion — official extension for AUTOMATIC1111's webui
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | ## Need help? See our [FAQ](https://github.com/deforum-art/sd-webui-deforum/wiki/FAQ-&-Troubleshooting)
13 |
14 | ## Getting Started
15 |
16 | 1. Install [AUTOMATIC1111's webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui/).
17 |
18 | 2. Now two ways: either clone the repo into the `extensions` directory via git commandline launched within in the `stable-diffusion-webui` folder
19 |
20 | ```sh
21 | git clone https://github.com/deforum-art/sd-webui-deforum extensions/deforum
22 | ```
23 |
24 | Or download this repository, locate the `extensions` folder within your WebUI installation, create a folder named `deforum` and put the contents of the downloaded directory inside of it. Then restart WebUI.
25 |
26 | Or launch A1111, navigate to the Extensions tab, choose Available, find deforum in the list of available extensions and install it. Restart A1111 once the extension has been installed.
27 | 3. Open the webui, find the Deforum tab at the top of the page.
28 |
29 | 4. Enter the animation settings. Refer to [this general guide](https://docs.google.com/document/d/1pEobUknMFMkn8F5TMsv8qRzamXX_75BShMMXV8IFslI/edit) and [this guide to math keyframing functions in Deforum](https://docs.google.com/document/d/1pfW1PwbDIuW0cv-dnuyYj1UzPqe23BlSLTJsqazffXM/edit?usp=sharing). However, **in this version prompt weights less than zero don't just like in original Deforum!** Split the positive and the negative prompt in the json section using --neg argument like this "apple:\`where(cos(t)>=0, cos(t), 0)\`, snow --neg strawberry:\`where(cos(t)<0, -cos(t), 0)\`"
30 |
31 | 5. To view animation frames as they're being made, without waiting for the completion of an animation, go to the 'Settings' tab and set the value of this toolbar **above zero**. Warning: it may slow down the generation process.
32 |
33 | 
34 |
35 |
36 | 6. Run the script and see if you got it working or even got something. **In 3D mode a large delay is expected at first** as the script loads the depth models. In the end, using the default settings the whole thing should consume 6.4 GBs of VRAM at 3D mode peaks and no more than 3.8 GB VRAM in 3D mode if you launch the webui with the '--lowvram' command line argument.
37 |
38 | 7. After the generation process is completed, click the button with the self-describing name to show the video or gif result right in the GUI!
39 |
40 | 8. Join our Discord where you can post generated stuff, ask questions and more: https://discord.gg/deforum.
41 | * There's also the 'Issues' tab in the repo, for well... reporting issues ;)
42 |
43 | 9. Profit!
44 |
45 | ## Known issues
46 |
47 | * This port is not fully backward-compatible with the notebook and the local version both due to the changes in how AUTOMATIC1111's webui handles Stable Diffusion models and the changes in this script to get it to work in the new environment. *Expect* that you may not get exactly the same result or that the thing may break down because of the older settings.
48 |
49 | ## Screenshots
50 |
51 | Amazing raw Deforum animation by [Pxl.Pshr](https://www.instagram.com/pxl.pshr):
52 | * Turn Audio ON!
53 |
54 | (Audio credits: SKRILLEX, FRED AGAIN & FLOWDAN - RUMBLE (PHACE'S DNB FLIP))
55 |
56 | https://user-images.githubusercontent.com/121192995/224450647-39529b28-be04-4871-bb7a-faf7afda2ef2.mp4
57 |
58 | Setting file of that video: [here](https://github.com/deforum-art/sd-webui-deforum/files/11353167/PxlPshrWinningAnimationSettings.txt).
59 |
60 |
61 |
62 | Main extension tab:
63 |
64 | 
65 |
66 | Keyframes tab:
67 |
68 | 
69 |
70 | ## License
71 |
72 | This program is distributed under the terms of the GNU Affero Public License v3.0, copyright (c) 2023 Deforum LLC.
73 |
74 | Some of its sublicensed integrated 3rd party components may have other licenses, see LICENSE for usage terms.
75 |
--------------------------------------------------------------------------------
/install.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import launch
18 | import os
19 |
20 | req_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "requirements.txt")
21 |
22 | with open(req_file) as file:
23 | for lib in file:
24 | lib = lib.strip()
25 | if not launch.is_installed(lib):
26 | launch.run_pip(f"install {lib}", f"Deforum requirement: {lib}")
--------------------------------------------------------------------------------
/javascript/deforum.js:
--------------------------------------------------------------------------------
1 | /*
2 | # Copyright (C) 2023 Deforum LLC
3 | #
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Affero General Public License as published by
6 | # the Free Software Foundation, version 3 of the License.
7 | #
8 | # This program is distributed in the hope that it will be useful,
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU Affero General Public License
14 | # along with this program. If not, see .
15 |
16 | Contact the authors: https://deforum.github.io/
17 | */
18 |
19 | function submit_deforum(){
20 | rememberGallerySelection('deforum_gallery')
21 | showSubmitButtons('deforum', false)
22 |
23 | var id = randomId()
24 | requestProgress(id, gradioApp().getElementById('deforum_gallery_container'), gradioApp().getElementById('deforum_gallery'), function(){
25 | showSubmitButtons('deforum', true)
26 | })
27 |
28 | var res = create_submit_args(arguments)
29 |
30 | res[0] = id
31 |
32 | return res
33 | }
--------------------------------------------------------------------------------
/preload.py:
--------------------------------------------------------------------------------
1 | # 'Deforum' plugin for Automatic1111's Stable Diffusion WebUI.
2 | # Copyright (C) 2023 Deforum LLC
3 | #
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Affero General Public License as published by
6 | # the Free Software Foundation, version 3 of the License.
7 | #
8 | # This program is distributed in the hope that it will be useful,
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU Affero General Public License
14 | # along with this program. If not, see .
15 |
16 | # Contact the authors: https://deforum.github.io/
17 |
18 | def preload(parser):
19 | parser.add_argument(
20 | "--deforum-api",
21 | action="store_true",
22 | help="Enable the Deforum API",
23 | default=None,
24 | )
25 | parser.add_argument(
26 | "--deforum-simple-api",
27 | action="store_true",
28 | help="Enable the simplified version of Deforum API",
29 | default=None,
30 | )
31 | parser.add_argument(
32 | "--deforum-run-now",
33 | type=str,
34 | help="Comma-delimited list of deforum settings files to run immediately on startup",
35 | default=None,
36 | )
37 | parser.add_argument(
38 | "--deforum-terminate-after-run-now",
39 | action="store_true",
40 | help="Whether to shut down the a1111 process immediately after completing the generations passed in to '--deforum-run-now'.",
41 | default=None,
42 | )
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | filterwarnings =
3 | ignore::DeprecationWarning
4 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | coverage
2 | syrupy
3 | pytest
4 | tenacity
5 | pydantic_requests
6 | moviepy
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numexpr
2 | matplotlib
3 | pandas
4 | av
5 | pims
6 | imageio_ffmpeg
7 | rich
8 | gdown
--------------------------------------------------------------------------------
/scripts/deforum.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import os
18 |
19 | import modules.paths as ph
20 | from modules import script_callbacks
21 | from modules.shared import cmd_opts
22 | from scripts.deforum_extend_paths import deforum_sys_extend
23 |
24 |
25 | def init_deforum():
26 | # use sys.path.extend to make sure all of our files are available for importation
27 | deforum_sys_extend()
28 |
29 | # create the Models/Deforum folder, where many of the deforum related models/ packages will be downloaded
30 | os.makedirs(ph.models_path + '/Deforum', exist_ok=True)
31 |
32 | # import our on_ui_tabs and on_ui_settings functions from the respected files
33 | from deforum_helpers.ui_right import on_ui_tabs
34 | from deforum_helpers.ui_settings import on_ui_settings
35 |
36 | # trigger webui's extensions mechanism using our imported main functions -
37 | # first to create the actual deforum gui, then to make the deforum tab in webui's settings section
38 | script_callbacks.on_ui_tabs(on_ui_tabs)
39 | script_callbacks.on_ui_settings(on_ui_settings)
40 |
41 | init_deforum()
42 |
43 |
--------------------------------------------------------------------------------
/scripts/deforum_api_models.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | from pydantic import BaseModel
18 | from typing import Any, Dict, List, Optional, Union
19 | from dataclasses import dataclass
20 | from enum import Enum
21 |
22 | class Batch(BaseModel):
23 | deforum_settings : Optional[Union[Dict[str, Any],List[Dict[str, Any]]]]
24 | options_overrides : Optional[Dict[str, Any]]
25 |
26 | class DeforumJobStatusCategory(str, Enum):
27 | ACCEPTED = "ACCEPTED"
28 | SUCCEEDED = "SUCCEEDED"
29 | FAILED = "FAILED"
30 | CANCELLED = "CANCELLED"
31 |
32 | class DeforumJobPhase(str, Enum):
33 | QUEUED = "QUEUED"
34 | PREPARING = "PREPARING"
35 | GENERATING = "GENERATING"
36 | POST_PROCESSING = "POST_PROCESSING"
37 | DONE = "DONE"
38 |
39 | class DeforumJobErrorType(str, Enum):
40 | NONE = "NONE"
41 | RETRYABLE = "RETRYABLE"
42 | TERMINAL = "TERMINAL"
43 |
44 | @dataclass(frozen=True)
45 | class DeforumJobStatus(BaseModel):
46 | id: str
47 | status : DeforumJobStatusCategory
48 | phase : DeforumJobPhase
49 | error_type : DeforumJobErrorType
50 | phase_progress : float
51 | started_at: float
52 | last_updated: float
53 | execution_time: float # time between job start and the last status update
54 | update_interval_time: float # time between the last two status updates
55 | updates: int # number of status updates so far
56 | message: Optional[str]
57 | outdir: Optional[str]
58 | timestring: Optional[str]
59 | deforum_settings : Optional[List[Dict[str, Any]]]
60 | options_overrides : Optional[Dict[str, Any]]
--------------------------------------------------------------------------------
/scripts/deforum_extend_paths.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import os
18 | import sys
19 |
20 | def deforum_sys_extend():
21 | deforum_folder_name = os.path.sep.join(os.path.abspath(__file__).split(os.path.sep)[:-2])
22 |
23 | basedirs = [os.getcwd()]
24 | if 'google.colab' in sys.modules:
25 | basedirs.append('/content/gdrive/MyDrive/sd/stable-diffusion-webui') # for TheLastBen's colab
26 | for _ in basedirs:
27 | deforum_paths_to_ensure = [
28 | os.path.join(deforum_folder_name, 'scripts'),
29 | os.path.join(deforum_folder_name, 'scripts', 'deforum_helpers', 'src')
30 | ]
31 | for deforum_scripts_path_fix in deforum_paths_to_ensure:
32 | if deforum_scripts_path_fix not in sys.path:
33 | sys.path.extend([deforum_scripts_path_fix])
34 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/114763196.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deforum-art/sd-webui-deforum/5d63a339dbec8d476657a1f672a4eeb6dc79ed37/scripts/deforum_helpers/114763196.jpg
--------------------------------------------------------------------------------
/scripts/deforum_helpers/RAFT.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import torch
18 | import numpy as np
19 | import torchvision.transforms.functional as F
20 | from torchvision.models.optical_flow import Raft_Large_Weights, raft_large
21 |
22 | class RAFT:
23 | def __init__(self):
24 | weights = Raft_Large_Weights.DEFAULT
25 | self.transforms = weights.transforms()
26 | self.device = "cuda" if torch.cuda.is_available() else "cpu"
27 | self.model = raft_large(weights=weights, progress=False).to(self.device).eval()
28 |
29 | def predict(self, image1, image2, num_flow_updates:int = 50):
30 | img1 = F.to_tensor(image1)
31 | img2 = F.to_tensor(image2)
32 | img1_batch, img2_batch = img1.unsqueeze(0), img2.unsqueeze(0)
33 | img1_batch, img2_batch = self.transforms(img1_batch, img2_batch)
34 |
35 | with torch.no_grad():
36 | flow = self.model(image1=img1_batch.to(self.device), image2=img2_batch.to(self.device), num_flow_updates=num_flow_updates)[-1].cpu().numpy()[0]
37 |
38 | # align the flow array to have the shape (w, h, 2) so it's compatible with the rest of CV2's flow methods
39 | flow = np.transpose(flow, (1, 2, 0))
40 |
41 | return flow
42 |
43 | def delete_model(self):
44 | del self.model
--------------------------------------------------------------------------------
/scripts/deforum_helpers/auto_navigation.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import numpy as np
18 | import torch
19 |
20 | # reallybigname - auto-navigation functions in progress...
21 | # usage:
22 | # if auto_rotation:
23 | # rot_mat = rotate_camera_towards_depth(depth_tensor, auto_rotation_steps, w, h, fov_deg, auto_rotation_depth_target)
24 | def rotate_camera_towards_depth(depth_tensor, turn_weight, width, height, h_fov=60, target_depth=1):
25 | # Compute the depth at the target depth
26 | target_depth_index = int(target_depth * depth_tensor.shape[0])
27 | target_depth_values = depth_tensor[target_depth_index]
28 | max_depth_index = torch.argmax(target_depth_values).item()
29 | max_depth_index = (max_depth_index, target_depth_index)
30 | max_depth = target_depth_values[max_depth_index[0]].item()
31 |
32 | # Compute the normalized x and y coordinates
33 | x, y = max_depth_index
34 | x_normalized = (x / (width - 1)) * 2 - 1
35 | y_normalized = (y / (height - 1)) * 2 - 1
36 |
37 | # Calculate horizontal and vertical field of view (in radians)
38 | h_fov_rad = np.radians(h_fov)
39 | aspect_ratio = width / height
40 | v_fov_rad = h_fov_rad / aspect_ratio
41 |
42 | # Calculate the world coordinates (x, y) at the target depth
43 | x_world = np.tan(h_fov_rad / 2) * max_depth * x_normalized
44 | y_world = np.tan(v_fov_rad / 2) * max_depth * y_normalized
45 |
46 | # Compute the target position using the world coordinates and max_depth
47 | target_position = np.array([x_world, y_world, max_depth])
48 |
49 | # Assuming the camera is initially at the origin, and looking in the negative Z direction
50 | cam_position = np.array([0, 0, 0])
51 | current_direction = np.array([0, 0, -1])
52 |
53 | # Compute the direction vector and normalize it
54 | direction = target_position - cam_position
55 | direction = direction / np.linalg.norm(direction)
56 |
57 | # Compute the rotation angle based on the turn_weight (number of frames)
58 | axis = np.cross(current_direction, direction)
59 | axis = axis / np.linalg.norm(axis)
60 | angle = np.arcsin(np.linalg.norm(axis))
61 | max_angle = np.pi * (0.1 / turn_weight) # Limit the maximum rotation angle to half of the visible screen
62 | rotation_angle = np.clip(np.sign(np.cross(current_direction, direction)) * angle / turn_weight, -max_angle, max_angle)
63 |
64 | # Compute the rotation matrix
65 | rotation_matrix = np.eye(3) + np.sin(rotation_angle) * np.array([
66 | [0, -axis[2], axis[1]],
67 | [axis[2], 0, -axis[0]],
68 | [-axis[1], axis[0], 0]
69 | ]) + (1 - np.cos(rotation_angle)) * np.outer(axis, axis)
70 |
71 | # Convert the NumPy array to a PyTorch tensor
72 | rotation_matrix_tensor = torch.from_numpy(rotation_matrix).float()
73 |
74 | # Add an extra dimension to match the expected shape (1, 3, 3)
75 | rotation_matrix_tensor = rotation_matrix_tensor.unsqueeze(0)
76 |
77 | return rotation_matrix_tensor
78 |
79 | def rotation_matrix(axis, angle):
80 | axis = np.asarray(axis)
81 | axis = axis / np.linalg.norm(axis)
82 | a = np.cos(angle / 2.0)
83 | b, c, d = -axis * np.sin(angle / 2.0)
84 | aa, bb, cc, dd = a * a, b * b, c * c, d * d
85 | bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
86 | return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
87 | [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
88 | [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])
89 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/colors.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import cv2
18 | import pkg_resources
19 | from skimage.exposure import match_histograms
20 |
21 | def maintain_colors(prev_img, color_match_sample, mode):
22 |
23 | match_histograms_kwargs = {'channel_axis': -1}
24 |
25 | if mode == 'RGB':
26 | return match_histograms(prev_img, color_match_sample, **match_histograms_kwargs)
27 | elif mode == 'HSV':
28 | prev_img_hsv = cv2.cvtColor(prev_img, cv2.COLOR_RGB2HSV)
29 | color_match_hsv = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2HSV)
30 | matched_hsv = match_histograms(prev_img_hsv, color_match_hsv, **match_histograms_kwargs)
31 | return cv2.cvtColor(matched_hsv, cv2.COLOR_HSV2RGB)
32 | else: # LAB
33 | prev_img_lab = cv2.cvtColor(prev_img, cv2.COLOR_RGB2LAB)
34 | color_match_lab = cv2.cvtColor(color_match_sample, cv2.COLOR_RGB2LAB)
35 | matched_lab = match_histograms(prev_img_lab, color_match_lab, **match_histograms_kwargs)
36 | return cv2.cvtColor(matched_lab, cv2.COLOR_LAB2RGB)
37 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/deforum_controlnet_gradio.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import gradio as gr
18 | # print (cnet_1.get_modules())
19 |
20 | # *** TODO: re-enable table printing! disabled only temp! 13-04-23 ***
21 | # table = Table(title="ControlNet params",padding=0, box=box.ROUNDED)
22 |
23 | # TODO: auto infer the names and the values for the table
24 | # field_names = []
25 | # field_names += ["module", "model", "weight", "inv", "guide_start", "guide_end", "guess", "resize", "rgb_bgr", "proc res", "thr a", "thr b"]
26 | # for field_name in field_names:
27 | # table.add_column(field_name, justify="center")
28 |
29 | # cn_model_name = str(controlnet_args.cn_1_model)
30 |
31 | # rows = []
32 | # rows += [controlnet_args.cn_1_module, cn_model_name[len('control_'):] if 'control_' in cn_model_name else cn_model_name, controlnet_args.cn_1_weight, controlnet_args.cn_1_invert_image, controlnet_args.cn_1_guidance_start, controlnet_args.cn_1_guidance_end, controlnet_args.cn_1_guess_mode, controlnet_args.cn_1_resize_mode, controlnet_args.cn_1_rgbbgr_mode, controlnet_args.cn_1_processor_res, controlnet_args.cn_1_threshold_a, controlnet_args.cn_1_threshold_b]
33 | # rows = [str(x) for x in rows]
34 |
35 | # table.add_row(*rows)
36 | # console.print(table)
37 |
38 | def hide_ui_by_cn_status(choice):
39 | return gr.update(visible=True) if choice else gr.update(visible=False)
40 |
41 | def hide_file_textboxes(choice):
42 | return gr.update(visible=False) if choice else gr.update(visible=True)
43 |
44 | class ToolButton(gr.Button, gr.components.FormComponent):
45 | """Small button with single emoji as text, fits inside gradio forms"""
46 | def __init__(self, **kwargs):
47 | super().__init__(variant="tool", **kwargs)
48 |
49 | def get_block_name(self):
50 | return "button"
--------------------------------------------------------------------------------
/scripts/deforum_helpers/deforum_tqdm.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import os
18 | from math import ceil
19 | import tqdm
20 | from modules.shared import progress_print_out, opts, cmd_opts
21 |
22 | class DeforumTQDM:
23 | def __init__(self, args, anim_args, parseq_args, video_args):
24 | self._tqdm = None
25 | self._args = args
26 | self._anim_args = anim_args
27 | self._parseq_args = parseq_args
28 | self._video_args = video_args
29 |
30 | def reset(self):
31 | from .animation_key_frames import DeformAnimKeys
32 | from .parseq_adapter import ParseqAdapter
33 | deforum_total = 0
34 | # FIXME: get only amount of steps
35 | parseq_adapter = ParseqAdapter(self._parseq_args, self._anim_args, self._video_args, None, None, mute=True)
36 | keys = DeformAnimKeys(self._anim_args) if not parseq_adapter.use_parseq else parseq_adapter.anim_keys
37 |
38 | start_frame = 0
39 | if self._anim_args.resume_from_timestring:
40 | for tmp in os.listdir(self._args.outdir):
41 | filename = tmp.split("_")
42 | # don't use saved depth maps to count number of frames
43 | if self._anim_args.resume_timestring in filename and "depth" not in filename:
44 | start_frame += 1
45 | start_frame = start_frame - 1
46 | using_vid_init = self._anim_args.animation_mode == 'Video Input'
47 | turbo_steps = 1 if using_vid_init else int(self._anim_args.diffusion_cadence)
48 | if self._anim_args.resume_from_timestring:
49 | last_frame = start_frame - 1
50 | if turbo_steps > 1:
51 | last_frame -= last_frame % turbo_steps
52 | if turbo_steps > 1:
53 | turbo_next_frame_idx = last_frame
54 | turbo_prev_frame_idx = turbo_next_frame_idx
55 | start_frame = last_frame + turbo_steps
56 | frame_idx = start_frame
57 | had_first = False
58 | while frame_idx < self._anim_args.max_frames:
59 | strength = keys.strength_schedule_series[frame_idx]
60 | if not had_first and self._args.use_init and ((self._args.init_image is not None and self._args.init_image != '') or self._args.init_image_box is not None):
61 | deforum_total += int(ceil(self._args.steps * (1 - strength)))
62 | had_first = True
63 | elif not had_first:
64 | deforum_total += self._args.steps
65 | had_first = True
66 | else:
67 | deforum_total += int(ceil(self._args.steps * (1 - strength)))
68 |
69 | if turbo_steps > 1:
70 | frame_idx += turbo_steps
71 | else:
72 | frame_idx += 1
73 |
74 | self._tqdm = tqdm.tqdm(
75 | desc="Deforum progress",
76 | total=deforum_total,
77 | position=1,
78 | file=progress_print_out
79 | )
80 |
81 | def update(self):
82 | if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars:
83 | return
84 | if self._tqdm is None:
85 | self.reset()
86 | self._tqdm.update()
87 |
88 | def updateTotal(self, new_total):
89 | if not opts.multiple_tqdm or cmd_opts.disable_console_progressbars:
90 | return
91 | if self._tqdm is None:
92 | self.reset()
93 | self._tqdm.total = new_total
94 |
95 | def clear(self):
96 | if self._tqdm is not None:
97 | self._tqdm.close()
98 | self._tqdm = None
99 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/depth_adabins.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import torch
18 | import numpy as np
19 | from PIL import Image
20 | import torchvision.transforms.functional as TF
21 | from .general_utils import download_file_with_checksum
22 | from infer import InferenceHelper
23 |
24 | class AdaBinsModel:
25 | _instance = None
26 |
27 | def __new__(cls, *args, **kwargs):
28 | keep_in_vram = kwargs.get('keep_in_vram', False)
29 | if cls._instance is None:
30 | cls._instance = super().__new__(cls)
31 | cls._instance._initialize(*args, keep_in_vram=keep_in_vram)
32 | return cls._instance
33 |
34 | def _initialize(self, models_path, keep_in_vram=False):
35 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36 | self.keep_in_vram = keep_in_vram
37 | self.adabins_helper = None
38 |
39 | download_file_with_checksum(url='https://github.com/hithereai/deforum-for-automatic1111-webui/releases/download/AdaBins/AdaBins_nyu.pt', expected_checksum='643db9785c663aca72f66739427642726b03acc6c4c1d3755a4587aa2239962746410d63722d87b49fc73581dbc98ed8e3f7e996ff7b9c0d56d0fbc98e23e41a', dest_folder=models_path, dest_filename='AdaBins_nyu.pt')
40 |
41 | self.adabins_helper = InferenceHelper(models_path=models_path, dataset='nyu', device=self.device)
42 |
43 | def predict(self, img_pil, prev_img_cv2):
44 | w, h = prev_img_cv2.shape[1], prev_img_cv2.shape[0]
45 | adabins_depth = np.array([])
46 | use_adabins = True
47 | MAX_ADABINS_AREA, MIN_ADABINS_AREA = 500000, 448 * 448
48 |
49 | image_pil_area, resized = w * h, False
50 |
51 | if image_pil_area not in range(MIN_ADABINS_AREA, MAX_ADABINS_AREA + 1):
52 | scale = ((MAX_ADABINS_AREA if image_pil_area > MAX_ADABINS_AREA else MIN_ADABINS_AREA) / image_pil_area) ** 0.5
53 | depth_input = img_pil.resize((int(w * scale), int(h * scale)), Image.LANCZOS if image_pil_area > MAX_ADABINS_AREA else Image.BICUBIC)
54 | print(f"AdaBins depth resized to {depth_input.width}x{depth_input.height}")
55 | resized = True
56 | else:
57 | depth_input = img_pil
58 |
59 | try:
60 | with torch.no_grad():
61 | _, adabins_depth = self.adabins_helper.predict_pil(depth_input)
62 | if resized:
63 | adabins_depth = TF.resize(torch.from_numpy(adabins_depth), torch.Size([h, w]), interpolation=TF.InterpolationMode.BICUBIC).cpu().numpy()
64 | adabins_depth = adabins_depth.squeeze()
65 | except Exception as e:
66 | print("AdaBins exception encountered. Falling back to pure MiDaS/Zoe (only if running in Legacy Midas/Zoe+AdaBins mode)")
67 | use_adabins = False
68 | torch.cuda.empty_cache()
69 |
70 | return use_adabins, adabins_depth
71 |
72 | def to(self, device):
73 | self.device = device
74 | if self.adabins_helper is not None:
75 | self.adabins_helper.to(device)
76 |
77 | def delete_model(self):
78 | del self.adabins_helper
79 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/depth_leres.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import torch
18 | import cv2
19 | import os
20 | import numpy as np
21 | import torchvision.transforms as transforms
22 | from .general_utils import download_file_with_checksum
23 | from leres.lib.multi_depth_model_woauxi import RelDepthModel
24 | from leres.lib.net_tools import load_ckpt
25 |
26 | class LeReSDepth:
27 | def __init__(self, width=448, height=448, models_path=None, checkpoint_name='res101.pth', backbone='resnext101'):
28 | self.width = width
29 | self.height = height
30 | self.models_path = models_path
31 | self.checkpoint_name = checkpoint_name
32 | self.backbone = backbone
33 |
34 | download_file_with_checksum(url='https://cloudstor.aarnet.edu.au/plus/s/lTIJF4vrvHCAI31/download', expected_checksum='7fdc870ae6568cb28d56700d0be8fc45541e09cea7c4f84f01ab47de434cfb7463cacae699ad19fe40ee921849f9760dedf5e0dec04a62db94e169cf203f55b1', dest_folder=models_path, dest_filename=self.checkpoint_name)
35 |
36 | self.depth_model = RelDepthModel(backbone=self.backbone)
37 | self.depth_model.eval()
38 | self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
39 | self.depth_model.to(self.DEVICE)
40 | load_ckpt(os.path.join(self.models_path, self.checkpoint_name), self.depth_model, None, None)
41 |
42 | @staticmethod
43 | def scale_torch(img):
44 | if len(img.shape) == 2:
45 | img = img[np.newaxis, :, :]
46 | if img.shape[2] == 3:
47 | transform = transforms.Compose([transforms.ToTensor(),
48 | transforms.Normalize((0.485, 0.456, 0.406) , (0.229, 0.224, 0.225))])
49 | img = transform(img)
50 | else:
51 | img = img.astype(np.float32)
52 | img = torch.from_numpy(img)
53 | return img
54 |
55 | def predict(self, image):
56 | resized_image = cv2.resize(image, (self.width, self.height))
57 | img_torch = self.scale_torch(resized_image)[None, :, :, :]
58 | pred_depth = self.depth_model.inference(img_torch).cpu().numpy().squeeze()
59 | pred_depth_ori = cv2.resize(pred_depth, (image.shape[1], image.shape[0]))
60 | return torch.from_numpy(pred_depth_ori).unsqueeze(0).to(self.DEVICE)
61 |
62 | def save_raw_depth(self, depth, filepath):
63 | depth_normalized = (depth / depth.max() * 60000).astype(np.uint16)
64 | cv2.imwrite(filepath, depth_normalized)
65 |
66 | def to(self, device):
67 | self.DEVICE = device
68 | self.depth_model = self.depth_model.to(device)
69 |
70 | def delete(self):
71 | del self.depth_model
--------------------------------------------------------------------------------
/scripts/deforum_helpers/depth_midas.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import os
18 | import cv2
19 | import torch
20 | import numpy as np
21 | from .general_utils import download_file_with_checksum
22 | from midas.dpt_depth import DPTDepthModel
23 | from midas.transforms import Resize, NormalizeImage, PrepareForNet
24 | import torchvision.transforms as T
25 |
26 | class MidasDepth:
27 | def __init__(self, models_path, device, half_precision=True, midas_model_type='Midas-3-Hybrid'):
28 | if midas_model_type.lower() == 'midas-3.1-beitlarge':
29 | self.midas_model_filename = 'dpt_beit_large_512.pt'
30 | self.midas_model_checksum='66cbb00ea7bccd6e43d3fd277bd21002d8d8c2c5c487e5fcd1e1d70c691688a19122418b3ddfa94e62ab9f086957aa67bbec39afe2b41c742aaaf0699ee50b33'
31 | self.midas_model_url = 'https://github.com/isl-org/MiDaS/releases/download/v3_1/dpt_beit_large_512.pt'
32 | self.resize_px = 512
33 | self.backbone = 'beitl16_512'
34 | else:
35 | self.midas_model_filename = 'dpt_large-midas-2f21e586.pt'
36 | self.midas_model_checksum = 'fcc4829e65d00eeed0a38e9001770676535d2e95c8a16965223aba094936e1316d569563552a852d471f310f83f597e8a238987a26a950d667815e08adaebc06'
37 | self.midas_model_url = 'https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt'
38 | self.resize_px = 384
39 | self.backbone = 'vitl16_384'
40 | self.device = device
41 | self.normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
42 | self.midas_transform = T.Compose([
43 | Resize(self.resize_px, self.resize_px, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32,
44 | resize_method="minimal", image_interpolation_method=cv2.INTER_CUBIC),
45 | self.normalization,
46 | PrepareForNet()
47 | ])
48 |
49 | download_file_with_checksum(url=self.midas_model_url, expected_checksum=self.midas_model_checksum, dest_folder=models_path, dest_filename=self.midas_model_filename)
50 |
51 | self.load_midas_model(models_path, self.midas_model_filename)
52 | if half_precision:
53 | self.midas_model = self.midas_model.half()
54 |
55 | def load_midas_model(self, models_path, midas_model_filename):
56 | model_file = os.path.join(models_path, midas_model_filename)
57 | print(f"Loading MiDaS model from {midas_model_filename}...")
58 | self.midas_model = DPTDepthModel(
59 | path=model_file,
60 | backbone=self.backbone,
61 | non_negative=True,
62 | )
63 | self.midas_model.eval().to(self.device, memory_format=torch.channels_last if self.device == torch.device("cuda") else None)
64 |
65 | def predict(self, prev_img_cv2, half_precision):
66 | img_midas = prev_img_cv2.astype(np.float32) / 255.0
67 | img_midas_input = self.midas_transform({"image": img_midas})["image"]
68 | sample = torch.from_numpy(img_midas_input).float().to(self.device).unsqueeze(0)
69 |
70 | if self.device.type == "cuda" or self.device.type == "mps":
71 | sample = sample.to(memory_format=torch.channels_last)
72 | if half_precision:
73 | sample = sample.half()
74 |
75 | with torch.no_grad():
76 | midas_depth = self.midas_model.forward(sample)
77 | midas_depth = torch.nn.functional.interpolate(
78 | midas_depth.unsqueeze(1),
79 | size=img_midas.shape[:2],
80 | mode="bicubic",
81 | align_corners=False,
82 | ).squeeze().cpu().numpy()
83 |
84 | torch.cuda.empty_cache()
85 | depth_tensor = torch.from_numpy(np.expand_dims(midas_depth, axis=0)).squeeze().to(self.device)
86 |
87 | return depth_tensor
88 |
89 | def to(self, device):
90 | self.device = device
91 | self.midas_model = self.midas_model.to(device, memory_format=torch.channels_last if device == torch.device("cuda") else None)
--------------------------------------------------------------------------------
/scripts/deforum_helpers/depth_zoe.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import torch
18 | from zoedepth.models.builder import build_model
19 | from zoedepth.utils.config import get_config
20 |
21 | class ZoeDepth:
22 | def __init__(self, width=512, height=512):
23 | conf = get_config("zoedepth_nk", "infer")
24 | conf.img_size = [width, height]
25 | self.model_zoe = build_model(conf)
26 | self.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
27 | self.zoe = self.model_zoe.to(self.DEVICE)
28 | self.width = width
29 | self.height = height
30 |
31 | def predict(self, image):
32 | self.zoe.core.prep.resizer._Resize__width = self.width
33 | self.zoe.core.prep.resizer._Resize__height = self.height
34 | depth_tensor = self.zoe.infer_pil(image, output_type="tensor")
35 | return depth_tensor
36 |
37 | def to(self, device):
38 | self.DEVICE = device
39 | self.zoe = self.model_zoe.to(device)
40 |
41 | def save_raw_depth(self, depth, filepath):
42 | depth.save(filepath, format='PNG', mode='I;16')
43 |
44 | def delete(self):
45 | del self.model_zoe
46 | del self.zoe
--------------------------------------------------------------------------------
/scripts/deforum_helpers/human_masking.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import os, cv2
18 | import torch
19 | from pathlib import Path
20 | from multiprocessing import freeze_support
21 |
22 | def extract_frames(input_video_path, output_imgs_path):
23 | # Open the video file
24 | vidcap = cv2.VideoCapture(input_video_path)
25 |
26 | # Get the total number of frames in the video
27 | frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
28 |
29 | # Create the output directory if it does not exist
30 | os.makedirs(output_imgs_path, exist_ok=True)
31 |
32 | # Extract the frames
33 | for i in range(frame_count):
34 | success, image = vidcap.read()
35 | if success:
36 | cv2.imwrite(os.path.join(output_imgs_path, f"frame{i}.png"), image)
37 | print(f"{frame_count} frames extracted and saved to {output_imgs_path}")
38 |
39 | def video2humanmasks(input_frames_path, output_folder_path, output_type, fps):
40 | # freeze support is needed for video outputting
41 | freeze_support()
42 |
43 | # check if input path exists and is a directory
44 | if not os.path.exists(input_frames_path) or not os.path.isdir(input_frames_path):
45 | raise ValueError("Invalid input path: {}".format(input_frames_path))
46 |
47 | # check if output path exists and is a directory
48 | if not os.path.exists(output_folder_path) or not os.path.isdir(output_folder_path):
49 | raise ValueError("Invalid output path: {}".format(output_folder_path))
50 |
51 | # check if output_type is valid
52 | valid_output_types = ["video", "pngs", "both"]
53 | if output_type.lower() not in valid_output_types:
54 | raise ValueError("Invalid output type: {}. Must be one of {}".format(output_type, valid_output_types))
55 |
56 | # try to predict where torch cache lives, so we can try and fetch models from cache in the next step
57 | predicted_torch_model_cache_path = os.path.join(Path.home(), ".cache", "torch", "hub", "hithereai_RobustVideoMatting_master")
58 | predicted_rvm_cache_testilfe = os.path.join(predicted_torch_model_cache_path, "hubconf.py")
59 |
60 | # try to fetch the models from cache, and only if it can't be find, download from the internet (to enable offline usage)
61 | try:
62 | # Try to fetch the models from cache
63 | convert_video = torch.hub.load(predicted_torch_model_cache_path, "converter", source='local')
64 | model = torch.hub.load(predicted_torch_model_cache_path, "resnet50", source='local').cuda()
65 | except:
66 | # Download from the internet if not found in cache
67 | convert_video = torch.hub.load("hithereai/RobustVideoMatting", "converter")
68 | model = torch.hub.load("hithereai/RobustVideoMatting", "resnet50").cuda()
69 |
70 | output_alpha_vid_path = os.path.join(output_folder_path, "human_masked_video.mp4")
71 | # extract humans masks from the input folder' imgs.
72 | # in this step PNGs will be extracted only if output_type is set to PNGs. Otherwise a video will be made, and in the case of Both, the video will be extracted in the next step to PNGs
73 | convert_video(
74 | model,
75 | input_source=input_frames_path, # full path of the folder that contains all of the extracted input imgs
76 | output_type='video' if output_type.upper() in ("VIDEO", "BOTH") else 'png_sequence',
77 | output_alpha=output_alpha_vid_path if output_type.upper() in ("VIDEO", "BOTH") else output_folder_path,
78 | output_video_mbps=4,
79 | output_video_fps=fps,
80 | downsample_ratio=None, # None for auto
81 | seq_chunk=12, # Process n frames at once for better parallelism
82 | progress=True # show extraction progress
83 | )
84 |
85 | if output_type.lower() == "both":
86 | extract_frames(output_alpha_vid_path, output_folder_path)
87 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/image_sharpening.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import cv2
18 | import numpy as np
19 |
20 | def unsharp_mask(img, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0, mask=None):
21 | if amount == 0:
22 | return img
23 | # Return a sharpened version of the image, using an unsharp mask.
24 | # If mask is not None, only areas under mask are handled
25 | blurred = cv2.GaussianBlur(img, kernel_size, sigma)
26 | sharpened = float(amount + 1) * img - float(amount) * blurred
27 | sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
28 | sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
29 | sharpened = sharpened.round().astype(np.uint8)
30 | if threshold > 0:
31 | low_contrast_mask = np.absolute(img - blurred) < threshold
32 | np.copyto(sharpened, img, where=low_contrast_mask)
33 | if mask is not None:
34 | mask = np.array(mask)
35 | masked_sharpened = cv2.bitwise_and(sharpened, sharpened, mask=mask)
36 | masked_img = cv2.bitwise_and(img, img, mask=255-mask)
37 | sharpened = cv2.add(masked_img, masked_sharpened)
38 | return sharpened
--------------------------------------------------------------------------------
/scripts/deforum_helpers/masks.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import os
18 | import cv2
19 | import gc
20 | import numpy as np
21 | from PIL import Image, ImageOps
22 | from .video_audio_utilities import get_frame_name
23 | from .load_images import load_image
24 |
25 | def do_overlay_mask(args, anim_args, img, frame_idx, is_bgr_array=False):
26 | if is_bgr_array:
27 | img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
28 | img = Image.fromarray(img)
29 |
30 | if anim_args.use_mask_video:
31 | current_mask = Image.open(os.path.join(args.outdir, 'maskframes', get_frame_name(anim_args.video_mask_path) + f"{frame_idx:09}.jpg"))
32 | current_frame = Image.open(os.path.join(args.outdir, 'inputframes', get_frame_name(anim_args.video_init_path) + f"{frame_idx:09}.jpg"))
33 | elif args.use_mask:
34 | current_mask = args.mask_image if args.mask_image is not None else load_image(args.mask_file, None)
35 | if args.init_image is None and args.init_image_box is None:
36 | current_frame = img
37 | else:
38 | current_frame = load_image(args.init_image, args.init_image_box)
39 |
40 | current_mask = current_mask.resize((args.W, args.H), Image.LANCZOS)
41 | current_frame = current_frame.resize((args.W, args.H), Image.LANCZOS)
42 | current_mask = ImageOps.grayscale(current_mask)
43 |
44 | if args.invert_mask:
45 | current_mask = ImageOps.invert(current_mask)
46 |
47 | img = Image.composite(img, current_frame, current_mask)
48 |
49 | if is_bgr_array:
50 | img = np.array(img)
51 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
52 |
53 | del(current_mask, current_frame)
54 | gc.collect()
55 |
56 | return img
--------------------------------------------------------------------------------
/scripts/deforum_helpers/noise.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import torch
18 | from torch.nn.functional import interpolate
19 | import numpy as np
20 | from PIL import ImageOps
21 | import math
22 | from .animation import sample_to_cv2
23 | import cv2
24 | from modules.shared import opts
25 |
26 | DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False)
27 |
28 | deforum_noise_gen = torch.Generator(device='cpu')
29 |
30 | # 2D Perlin noise in PyTorch https://gist.github.com/vadimkantorov/ac1b097753f217c5c11bc2ff396e0a57
31 | def rand_perlin_2d(shape, res, fade = lambda t: 6*t**5 - 15*t**4 + 10*t**3):
32 | delta = (res[0] / shape[0], res[1] / shape[1])
33 | d = (shape[0] // res[0], shape[1] // res[1])
34 |
35 | grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1]), indexing='ij'), dim = -1) % 1
36 | angles = 2*math.pi*torch.rand(res[0]+1, res[1]+1, generator=deforum_noise_gen)
37 | gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim = -1)
38 |
39 | tile_grads = lambda slice1, slice2: gradients[slice1[0]:slice1[1], slice2[0]:slice2[1]].repeat_interleave(d[0], 0).repeat_interleave(d[1], 1)
40 | dot = lambda grad, shift: (torch.stack((grid[:shape[0],:shape[1],0] + shift[0], grid[:shape[0],:shape[1], 1] + shift[1] ), dim = -1) * grad[:shape[0], :shape[1]]).sum(dim = -1)
41 |
42 | n00 = dot(tile_grads([0, -1], [0, -1]), [0, 0])
43 | n10 = dot(tile_grads([1, None], [0, -1]), [-1, 0])
44 | n01 = dot(tile_grads([0, -1],[1, None]), [0, -1])
45 | n11 = dot(tile_grads([1, None], [1, None]), [-1,-1])
46 | t = fade(grid[:shape[0], :shape[1]])
47 | return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1])
48 |
49 | def rand_perlin_2d_octaves(shape, res, octaves=1, persistence=0.5):
50 | noise = torch.zeros(shape)
51 | frequency = 1
52 | amplitude = 1
53 | for _ in range(int(octaves)):
54 | noise += amplitude * rand_perlin_2d(shape, (frequency*res[0], frequency*res[1]))
55 | frequency *= 2
56 | amplitude *= persistence
57 | return noise
58 |
59 | def condition_noise_mask(noise_mask, invert_mask = False):
60 | if invert_mask:
61 | noise_mask = ImageOps.invert(noise_mask)
62 | noise_mask = np.array(noise_mask.convert("L"))
63 | noise_mask = noise_mask.astype(np.float32) / 255.0
64 | noise_mask = np.around(noise_mask, decimals=0)
65 | noise_mask = torch.from_numpy(noise_mask)
66 | #noise_mask = torch.round(noise_mask)
67 | return noise_mask
68 |
69 | def add_noise(sample, noise_amt: float, seed: int, noise_type: str, noise_args, noise_mask = None, invert_mask = False):
70 | deforum_noise_gen.manual_seed(seed) # Reproducibility
71 | perlin_w = sample.shape[0]
72 | perlin_h = sample.shape[1]
73 | perlin_w, perlin_h = map(lambda x: x - x % 64, (perlin_w, perlin_h)) # rescale perlin to multiplies of 64
74 | sample2dshape = (perlin_w, perlin_h)
75 | noise = torch.randn((sample.shape[2], perlin_w, perlin_h), generator=deforum_noise_gen) # White noise
76 | if noise_type == 'perlin':
77 | # rand_perlin_2d_octaves is between -1 and 1, so we need to shift it to be between 0 and 1
78 | # print(sample.shape)
79 | noise = noise * ((rand_perlin_2d_octaves(sample2dshape, (int(noise_args[0]), int(noise_args[1])), octaves=noise_args[2], persistence=noise_args[3]) + torch.ones(sample2dshape)) / 2)
80 | noise = interpolate(noise.unsqueeze(1), size=(sample.shape[0], sample.shape[1])).squeeze(1) # rescale perlin back to the target resolution
81 | if noise_mask is not None:
82 | noise_mask = condition_noise_mask(noise_mask, invert_mask)
83 | noise_to_add = sample_to_cv2(noise * noise_mask)
84 | else:
85 | noise_to_add = sample_to_cv2(noise)
86 | sample = cv2.addWeighted(sample, 1-noise_amt, noise_to_add, noise_amt, 0)
87 |
88 | return sample
89 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/opts_overrider.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import Any, Dict
3 | from modules.shared import opts
4 |
5 | log = logging.getLogger(__name__)
6 |
7 | class A1111OptionsOverrider(object):
8 | def __init__(self, opts_overrides: Dict[str, Any]):
9 | self.opts_overrides = opts_overrides
10 |
11 | def __enter__(self):
12 | if self.opts_overrides is not None and len(self.opts_overrides)>0:
13 | self.original_opts = {k: opts.data[k] for k in self.opts_overrides.keys() if k in opts.data}
14 | log.debug(f"Captured options to override: {self.original_opts}")
15 | log.info(f"Setting options: {self.opts_overrides}")
16 | for k, v in self.opts_overrides.items():
17 | setattr(opts, k, v)
18 | else:
19 | self.original_opts = None
20 | return self
21 |
22 | def __exit__(self, exception_type, exception_value, traceback):
23 | if (exception_type is not None):
24 | log.warning(f"Error during batch execution: {exception_type} - {exception_value}")
25 | log.debug(f"{traceback}")
26 | if (self.original_opts is not None):
27 | log.info(f"Restoring options: {self.original_opts}")
28 | for k, v in self.original_opts.items():
29 | setattr(opts, k, v)
30 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/resume.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import os
18 | import cv2
19 | from modules.shared import opts
20 |
21 | # Resume requires at least two actual frames in order to work
22 | # 'Actual' frames are defined as frames that go through generation
23 | # - Can't resume from a single frame.
24 | # - If you have a cadence of 10, you need at least 10 frames in order to resume.
25 | # - Resume grabs the last actual frame and the 2nd to last actual frame
26 | # in order to work with cadence properly and feed it the prev_img/next_img
27 |
28 | def get_resume_vars(folder, timestring, cadence):
29 | DEBUG_MODE = opts.data.get("deforum_debug_mode_enabled", False)
30 | # count previous frames
31 | frame_count = 0
32 | for item in os.listdir(folder):
33 | # don't count txt files or mp4 files
34 | if ".txt" in item or ".mp4" in item:
35 | pass
36 | else:
37 | filename = item.split("_")
38 | # other image file types may be supported in the future,
39 | # so we just count files containing timestring
40 | # that don't contain the depth keyword (depth maps are saved in same folder)
41 | if timestring in filename and "depth" not in filename:
42 | frame_count += 1
43 | # add this to debugging var
44 | if DEBUG_MODE:
45 | print(f"\033[36mResuming:\033[0m File: {filename}")
46 |
47 | print(f"\033[36mResuming:\033[0m Current frame count: {frame_count}")
48 |
49 | # get last frame from frame count corrected for any trailing cadence frames
50 | last_frame = frame_count - (frame_count % cadence)
51 |
52 | # calculate previous actual frame
53 | prev_frame = last_frame - cadence
54 |
55 | # calculate next actual frame
56 | next_frame = last_frame - 1
57 |
58 | # get prev_img/next_img from prev/next frame index (files start at 0, so subtract 1 for index var)
59 | path = os.path.join(folder, f"{timestring}_{prev_frame:09}.png")
60 | prev_img = cv2.imread(path)
61 | path = os.path.join(folder, f"{timestring}_{next_frame:09}.png")
62 | next_img = cv2.imread(path)
63 |
64 | # report resume last/next in console
65 | print(f"\033[36mResuming:\033[0m Last frame: {prev_frame} - Next frame: {next_frame} ")
66 |
67 | # returns:
68 | # last frame count, accounting for cadence
69 | # next frame count, accounting for cadence
70 | # prev frame's image cv2 BGR
71 | # next frame's image cv2 BGR
72 | return prev_frame, next_frame, prev_img, next_img
73 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/rich.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | from rich.console import Console
18 | console = Console()
--------------------------------------------------------------------------------
/scripts/deforum_helpers/save_images.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import os
18 | import cv2
19 | import gc
20 | import time
21 |
22 | def get_output_folder(output_path, batch_folder):
23 | out_path = os.path.join(output_path,time.strftime('%Y-%m'))
24 | if batch_folder != "":
25 | out_path = os.path.join(out_path, batch_folder)
26 | os.makedirs(out_path, exist_ok=True)
27 | return out_path
28 |
29 | def save_image(image, image_type, filename, args, video_args, root):
30 | if video_args.store_frames_in_ram:
31 | root.frames_cache.append({'path':os.path.join(args.outdir, filename), 'image':image, 'image_type':image_type})
32 | else:
33 | image.save(os.path.join(args.outdir, filename))
34 |
35 | def reset_frames_cache(root):
36 | root.frames_cache = []
37 | gc.collect()
38 |
39 | def dump_frames_cache(root):
40 | for image_cache in root.frames_cache:
41 | if image_cache['image_type'] == 'cv2':
42 | cv2.imwrite(image_cache['path'], image_cache['image'])
43 | elif image_cache['image_type'] == 'PIL':
44 | image_cache['image'].save(image_cache['path'])
45 | # do not reset the cache since we're going to add frame erasing later function #TODO
46 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/seed.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import random
18 |
19 | def next_seed(args, root):
20 | if args.seed_behavior == 'iter':
21 | args.seed += 1 if root.seed_internal % args.seed_iter_N == 0 else 0
22 | root.seed_internal += 1
23 | elif args.seed_behavior == 'ladder':
24 | args.seed += 2 if root.seed_internal == 0 else -1
25 | root.seed_internal = 1 if root.seed_internal == 0 else 0
26 | elif args.seed_behavior == 'alternate':
27 | args.seed += 1 if root.seed_internal == 0 else -1
28 | root.seed_internal = 1 if root.seed_internal == 0 else 0
29 | elif args.seed_behavior == 'fixed':
30 | pass # always keep seed the same
31 | else:
32 | args.seed = random.randint(0, 2**32 - 1)
33 | return args.seed
34 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/adabins/__init__.py:
--------------------------------------------------------------------------------
1 | from .unet_adaptive_bins import UnetAdaptiveBins
2 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/adabins/layers.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class PatchTransformerEncoder(nn.Module):
6 | def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4):
7 | super(PatchTransformerEncoder, self).__init__()
8 | encoder_layers = nn.TransformerEncoderLayer(embedding_dim, num_heads, dim_feedforward=1024)
9 | self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=4) # takes shape S,N,E
10 |
11 | self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim,
12 | kernel_size=patch_size, stride=patch_size, padding=0)
13 |
14 | self.positional_encodings = nn.Parameter(torch.rand(500, embedding_dim), requires_grad=True)
15 |
16 | def forward(self, x):
17 | embeddings = self.embedding_convPxP(x).flatten(2) # .shape = n,c,s = n, embedding_dim, s
18 | # embeddings = nn.functional.pad(embeddings, (1,0)) # extra special token at start ?
19 | embeddings = embeddings + self.positional_encodings[:embeddings.shape[2], :].T.unsqueeze(0)
20 |
21 | # change to S,N,E format required by transformer
22 | embeddings = embeddings.permute(2, 0, 1)
23 | x = self.transformer_encoder(embeddings) # .shape = S, N, E
24 | return x
25 |
26 |
27 | class PixelWiseDotProduct(nn.Module):
28 | def __init__(self):
29 | super(PixelWiseDotProduct, self).__init__()
30 |
31 | def forward(self, x, K):
32 | n, c, h, w = x.size()
33 | _, cout, ck = K.size()
34 | assert c == ck, "Number of channels in x and Embedding dimension (at dim 2) of K matrix must match"
35 | y = torch.matmul(x.view(n, c, h * w).permute(0, 2, 1), K.permute(0, 2, 1)) # .shape = n, hw, cout
36 | return y.permute(0, 2, 1).view(n, cout, h, w)
37 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/adabins/miniViT.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from .layers import PatchTransformerEncoder, PixelWiseDotProduct
5 |
6 |
7 | class mViT(nn.Module):
8 | def __init__(self, in_channels, n_query_channels=128, patch_size=16, dim_out=256,
9 | embedding_dim=128, num_heads=4, norm='linear'):
10 | super(mViT, self).__init__()
11 | self.norm = norm
12 | self.n_query_channels = n_query_channels
13 | self.patch_transformer = PatchTransformerEncoder(in_channels, patch_size, embedding_dim, num_heads)
14 | self.dot_product_layer = PixelWiseDotProduct()
15 |
16 | self.conv3x3 = nn.Conv2d(in_channels, embedding_dim, kernel_size=3, stride=1, padding=1)
17 | self.regressor = nn.Sequential(nn.Linear(embedding_dim, 256),
18 | nn.LeakyReLU(),
19 | nn.Linear(256, 256),
20 | nn.LeakyReLU(),
21 | nn.Linear(256, dim_out))
22 |
23 | def forward(self, x):
24 | # n, c, h, w = x.size()
25 | tgt = self.patch_transformer(x.clone()) # .shape = S, N, E
26 |
27 | x = self.conv3x3(x)
28 |
29 | regression_head, queries = tgt[0, ...], tgt[1:self.n_query_channels + 1, ...]
30 |
31 | # Change from S, N, E to N, S, E
32 | queries = queries.permute(1, 0, 2)
33 | range_attention_maps = self.dot_product_layer(x, queries) # .shape = n, n_query_channels, h, w
34 |
35 | y = self.regressor(regression_head) # .shape = N, dim_out
36 | if self.norm == 'linear':
37 | y = torch.relu(y)
38 | eps = 0.1
39 | y = y + eps
40 | elif self.norm == 'softmax':
41 | return torch.softmax(y, dim=1), range_attention_maps
42 | else:
43 | y = torch.sigmoid(y)
44 | y = y / y.sum(dim=1, keepdim=True)
45 | return y, range_attention_maps
46 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | This license does not apply to the model weights.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/Quickstart.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import torch\n",
10 | "import requests\n",
11 | "\n",
12 | "! wget https://owncloud.gwdg.de/index.php/s/ioHbRzFx6th32hn/download -O weights.zip\n",
13 | "! unzip -d weights -j weights.zip\n",
14 | "from models.clipseg import CLIPDensePredT\n",
15 | "from PIL import Image\n",
16 | "from torchvision import transforms\n",
17 | "from matplotlib import pyplot as plt\n",
18 | "\n",
19 | "# load model\n",
20 | "model = CLIPDensePredT(version='ViT-B/16', reduce_dim=64)\n",
21 | "model.eval();\n",
22 | "\n",
23 | "# non-strict, because we only stored decoder weights (not CLIP weights)\n",
24 | "model.load_state_dict(torch.load('weights/rd64-uni.pth', map_location=torch.device('cpu')), strict=False);"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {},
30 | "source": [
31 | "Load and normalize `example_image.jpg`. You can also load through an URL."
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "# load and normalize image\n",
41 | "input_image = Image.open('example_image.jpg')\n",
42 | "\n",
43 | "# or load from URL...\n",
44 | "# image_url = 'https://farm5.staticflickr.com/4141/4856248695_03475782dc_z.jpg'\n",
45 | "# input_image = Image.open(requests.get(image_url, stream=True).raw)\n",
46 | "\n",
47 | "transform = transforms.Compose([\n",
48 | " transforms.ToTensor(),\n",
49 | " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n",
50 | " transforms.Resize((352, 352)),\n",
51 | "])\n",
52 | "img = transform(input_image).unsqueeze(0)"
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {},
58 | "source": [
59 | "Predict and visualize (this might take a few seconds if running without GPU support)"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "prompts = ['a glass', 'something to fill', 'wood', 'a jar']\n",
69 | "\n",
70 | "# predict\n",
71 | "with torch.no_grad():\n",
72 | " preds = model(img.repeat(4,1,1,1), prompts)[0]\n",
73 | "\n",
74 | "# visualize prediction\n",
75 | "_, ax = plt.subplots(1, 5, figsize=(15, 4))\n",
76 | "[a.axis('off') for a in ax.flatten()]\n",
77 | "ax[0].imshow(input_image)\n",
78 | "[ax[i+1].imshow(torch.sigmoid(preds[i][0])) for i in range(4)];\n",
79 | "[ax[i+1].text(0, -15, prompts[i]) for i in range(4)];"
80 | ]
81 | }
82 | ],
83 | "metadata": {
84 | "interpreter": {
85 | "hash": "800ed241f7db2bd3aa6942aa3be6809cdb30ee6b0a9e773dfecfa9fef1f4c586"
86 | },
87 | "kernelspec": {
88 | "display_name": "Python 3",
89 | "language": "python",
90 | "name": "python3"
91 | },
92 | "language_info": {
93 | "codemirror_mode": {
94 | "name": "ipython",
95 | "version": 3
96 | },
97 | "file_extension": ".py",
98 | "mimetype": "text/x-python",
99 | "name": "python",
100 | "nbconvert_exporter": "python",
101 | "pygments_lexer": "ipython3",
102 | "version": "3.8.10"
103 | }
104 | },
105 | "nbformat": 4,
106 | "nbformat_minor": 4
107 | }
108 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/Readme.md:
--------------------------------------------------------------------------------
1 | # Image Segmentation Using Text and Image Prompts
2 | This repository contains the code used in the paper ["Image Segmentation Using Text and Image Prompts"](https://arxiv.org/abs/2112.10003).
3 |
4 | **The Paper has been accepted to CVPR 2022!**
5 |
6 |
7 |
8 | The systems allows to create segmentation models without training based on:
9 | - An arbitrary text query
10 | - Or an image with a mask highlighting stuff or an object.
11 |
12 | ### Quick Start
13 |
14 | In the `Quickstart.ipynb` notebook we provide the code for using a pre-trained CLIPSeg model. If you run the notebook locally, make sure you downloaded the `rd64-uni.pth` weights, either manually or via git lfs extension.
15 | It can also be used interactively using [MyBinder](https://mybinder.org/v2/gh/timojl/clipseg/HEAD?labpath=Quickstart.ipynb)
16 | (please note that the VM does not use a GPU, thus inference takes a few seconds).
17 |
18 |
19 | ### Dependencies
20 | This code base depends on pytorch, torchvision and clip (`pip install git+https://github.com/openai/CLIP.git`).
21 | Additional dependencies are hidden for double blind review.
22 |
23 |
24 | ### Datasets
25 |
26 | * `PhraseCut` and `PhraseCutPlus`: Referring expression dataset
27 | * `PFEPascalWrapper`: Wrapper class for PFENet's Pascal-5i implementation
28 | * `PascalZeroShot`: Wrapper class for PascalZeroShot
29 | * `COCOWrapper`: Wrapper class for COCO.
30 |
31 | ### Models
32 |
33 | * `CLIPDensePredT`: CLIPSeg model with transformer-based decoder.
34 | * `ViTDensePredT`: CLIPSeg model with transformer-based decoder.
35 |
36 | ### Third Party Dependencies
37 | For some of the datasets third party dependencies are required. Run the following commands in the `third_party` folder.
38 | ```bash
39 | git clone https://github.com/cvlab-yonsei/JoEm
40 | git clone https://github.com/Jia-Research-Lab/PFENet.git
41 | git clone https://github.com/ChenyunWu/PhraseCutDataset.git
42 | git clone https://github.com/juhongm999/hsnet.git
43 | ```
44 |
45 | ### Weights
46 |
47 | The MIT license does not apply to these weights.
48 |
49 | We provide two model weights, for D=64 (4.1MB) and D=16 (1.1MB).
50 | ```
51 | wget https://owncloud.gwdg.de/index.php/s/ioHbRzFx6th32hn/download -O weights.zip
52 | unzip -d weights -j weights.zip
53 | ```
54 |
55 |
56 | ### Training and Evaluation
57 |
58 | To train use the `training.py` script with experiment file and experiment id parameters. E.g. `python training.py phrasecut.yaml 0` will train the first phrasecut experiment which is defined by the `configuration` and first `individual_configurations` parameters. Model weights will be written in `logs/`.
59 |
60 | For evaluation use `score.py`. E.g. `python score.py phrasecut.yaml 0 0` will train the first phrasecut experiment of `test_configuration` and the first configuration in `individual_configurations`.
61 |
62 |
63 | ### Usage of PFENet Wrappers
64 |
65 | In order to use the dataset and model wrappers for PFENet, the PFENet repository needs to be cloned to the root folder.
66 | `git clone https://github.com/Jia-Research-Lab/PFENet.git `
67 |
68 |
69 | ### License
70 |
71 | The source code files in this repository (excluding model weights) are released under MIT license.
72 |
73 | ### Citation
74 | ```
75 | @InProceedings{lueddecke22_cvpr,
76 | author = {L\"uddecke, Timo and Ecker, Alexander},
77 | title = {Image Segmentation Using Text and Image Prompts},
78 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
79 | month = {June},
80 | year = {2022},
81 | pages = {7086-7096}
82 | }
83 |
84 | ```
85 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/datasets/coco_wrapper.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | from types import new_class
3 | import torch
4 | import numpy as np
5 | import os
6 | import json
7 |
8 | from os.path import join, dirname, isdir, isfile, expanduser, realpath, basename
9 | from random import shuffle, seed as set_seed
10 | from PIL import Image
11 |
12 | from itertools import combinations
13 | from torchvision import transforms
14 | from torchvision.transforms.transforms import Resize
15 |
16 | from datasets.utils import blend_image_segmentation
17 | from general_utils import get_from_repository
18 |
19 | COCO_CLASSES = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
20 |
21 | class COCOWrapper(object):
22 |
23 | def __init__(self, split, fold=0, image_size=400, aug=None, mask='separate', negative_prob=0,
24 | with_class_label=False):
25 | super().__init__()
26 |
27 | self.mask = mask
28 | self.with_class_label = with_class_label
29 | self.negative_prob = negative_prob
30 |
31 | from third_party.hsnet.data.coco import DatasetCOCO
32 |
33 | get_from_repository('COCO-20i', ['COCO-20i.tar'])
34 |
35 | foldpath = join(dirname(__file__), '../third_party/hsnet/data/splits/coco/%s/fold%d.pkl')
36 |
37 | def build_img_metadata_classwise(self):
38 | with open(foldpath % (self.split, self.fold), 'rb') as f:
39 | img_metadata_classwise = pickle.load(f)
40 | return img_metadata_classwise
41 |
42 |
43 | DatasetCOCO.build_img_metadata_classwise = build_img_metadata_classwise
44 | # DatasetCOCO.read_mask = read_mask
45 |
46 | mean = [0.485, 0.456, 0.406]
47 | std = [0.229, 0.224, 0.225]
48 | transform = transforms.Compose([
49 | transforms.Resize((image_size, image_size)),
50 | transforms.ToTensor(),
51 | transforms.Normalize(mean, std)
52 | ])
53 |
54 | self.coco = DatasetCOCO(expanduser('~/datasets/COCO-20i/'), fold, transform, split, 1, False)
55 |
56 | self.all_classes = [self.coco.class_ids]
57 | self.coco.base_path = join(expanduser('~/datasets/COCO-20i'))
58 |
59 | def __len__(self):
60 | return len(self.coco)
61 |
62 | def __getitem__(self, i):
63 | sample = self.coco[i]
64 |
65 | label_name = COCO_CLASSES[int(sample['class_id'])]
66 |
67 | img_s, seg_s = sample['support_imgs'][0], sample['support_masks'][0]
68 |
69 | if self.negative_prob > 0 and torch.rand(1).item() < self.negative_prob:
70 | new_class_id = sample['class_id']
71 | while new_class_id == sample['class_id']:
72 | sample2 = self.coco[torch.randint(0, len(self), (1,)).item()]
73 | new_class_id = sample2['class_id']
74 | img_s = sample2['support_imgs'][0]
75 | seg_s = torch.zeros_like(seg_s)
76 |
77 | mask = self.mask
78 | if mask == 'separate':
79 | supp = (img_s, seg_s)
80 | elif mask == 'text_label':
81 | # DEPRECATED
82 | supp = [int(sample['class_id'])]
83 | elif mask == 'text':
84 | supp = [label_name]
85 | else:
86 | if mask.startswith('text_and_'):
87 | mask = mask[9:]
88 | label_add = [label_name]
89 | else:
90 | label_add = []
91 |
92 | supp = label_add + blend_image_segmentation(img_s, seg_s, mode=mask)
93 |
94 | if self.with_class_label:
95 | label = (torch.zeros(0), sample['class_id'],)
96 | else:
97 | label = (torch.zeros(0), )
98 |
99 | return (sample['query_img'],) + tuple(supp), (sample['query_mask'].unsqueeze(0),) + label
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/datasets/pascal_classes.json:
--------------------------------------------------------------------------------
1 | [{"id": 1, "synonyms": ["aeroplane"]}, {"id": 2, "synonyms": ["bicycle"]}, {"id": 3, "synonyms": ["bird"]}, {"id": 4, "synonyms": ["boat"]}, {"id": 5, "synonyms": ["bottle"]}, {"id": 6, "synonyms": ["bus"]}, {"id": 7, "synonyms": ["car"]}, {"id": 8, "synonyms": ["cat"]}, {"id": 9, "synonyms": ["chair"]}, {"id": 10, "synonyms": ["cow"]}, {"id": 11, "synonyms": ["diningtable"]}, {"id": 12, "synonyms": ["dog"]}, {"id": 13, "synonyms": ["horse"]}, {"id": 14, "synonyms": ["motorbike"]}, {"id": 15, "synonyms": ["person"]}, {"id": 16, "synonyms": ["pottedplant"]}, {"id": 17, "synonyms": ["sheep"]}, {"id": 18, "synonyms": ["sofa"]}, {"id": 19, "synonyms": ["train"]}, {"id": 20, "synonyms": ["tvmonitor"]}]
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/datasets/pascal_zeroshot.py:
--------------------------------------------------------------------------------
1 | from os.path import expanduser
2 | import torch
3 | import json
4 | import torchvision
5 | from general_utils import get_from_repository
6 | from general_utils import log
7 | from torchvision import transforms
8 |
9 | PASCAL_VOC_CLASSES_ZS = [['cattle.n.01', 'motorcycle.n.01'], ['aeroplane.n.01', 'sofa.n.01'],
10 | ['cat.n.01', 'television.n.03'], ['train.n.01', 'bottle.n.01'],
11 | ['chair.n.01', 'pot_plant.n.01']]
12 |
13 |
14 | class PascalZeroShot(object):
15 |
16 | def __init__(self, split, n_unseen, image_size=224) -> None:
17 | super().__init__()
18 |
19 | import sys
20 | sys.path.append('third_party/JoEm')
21 | from third_party.JoEm.data_loader.dataset import VOCSegmentation
22 | from third_party.JoEm.data_loader import get_seen_idx, get_unseen_idx, VOC
23 |
24 | self.pascal_classes = VOC
25 | self.image_size = image_size
26 |
27 | self.transform = transforms.Compose([
28 | transforms.Resize((image_size, image_size)),
29 | ])
30 |
31 | if split == 'train':
32 | self.voc = VOCSegmentation(get_unseen_idx(n_unseen), get_seen_idx(n_unseen),
33 | split=split, transform=True, transform_args=dict(base_size=312, crop_size=312),
34 | ignore_bg=False, ignore_unseen=False, remv_unseen_img=True)
35 | elif split == 'val':
36 | self.voc = VOCSegmentation(get_unseen_idx(n_unseen), get_seen_idx(n_unseen),
37 | split=split, transform=False,
38 | ignore_bg=False, ignore_unseen=False)
39 |
40 | self.unseen_idx = get_unseen_idx(n_unseen)
41 |
42 | def __len__(self):
43 | return len(self.voc)
44 |
45 | def __getitem__(self, i):
46 |
47 | sample = self.voc[i]
48 | label = sample['label'].long()
49 | all_labels = [l for l in torch.where(torch.bincount(label.flatten())>0)[0].numpy().tolist() if l != 255]
50 | class_indices = [l for l in all_labels]
51 | class_names = [self.pascal_classes[l] for l in all_labels]
52 |
53 | image = self.transform(sample['image'])
54 |
55 | label = transforms.Resize((self.image_size, self.image_size),
56 | interpolation=torchvision.transforms.InterpolationMode.NEAREST)(label.unsqueeze(0))[0]
57 |
58 | return (image,), (label, )
59 |
60 |
61 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/datasets/utils.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import torch
4 |
5 |
6 | def blend_image_segmentation(img, seg, mode, image_size=224):
7 |
8 |
9 | if mode in {'blur_highlight', 'blur3_highlight', 'blur3_highlight01', 'blur_highlight_random', 'crop'}:
10 | if isinstance(img, np.ndarray):
11 | img = torch.from_numpy(img)
12 |
13 | if isinstance(seg, np.ndarray):
14 | seg = torch.from_numpy(seg)
15 |
16 | if mode == 'overlay':
17 | out = img * seg
18 | out = [out.astype('float32')]
19 | elif mode == 'highlight':
20 | out = img * seg[None, :, :] * 0.85 + 0.15 * img
21 | out = [out.astype('float32')]
22 | elif mode == 'highlight2':
23 | img = img / 2
24 | out = (img+0.1) * seg[None, :, :] + 0.3 * img
25 | out = [out.astype('float32')]
26 | elif mode == 'blur_highlight':
27 | from evaluation_utils import img_preprocess
28 | out = [img_preprocess((None, [img], [seg]), blur=1, bg_fac=0.5).numpy()[0] - 0.01]
29 | elif mode == 'blur3_highlight':
30 | from evaluation_utils import img_preprocess
31 | out = [img_preprocess((None, [img], [seg]), blur=3, bg_fac=0.5).numpy()[0] - 0.01]
32 | elif mode == 'blur3_highlight01':
33 | from evaluation_utils import img_preprocess
34 | out = [img_preprocess((None, [img], [seg]), blur=3, bg_fac=0.1).numpy()[0] - 0.01]
35 | elif mode == 'blur_highlight_random':
36 | from evaluation_utils import img_preprocess
37 | out = [img_preprocess((None, [img], [seg]), blur=0 + torch.randint(0, 3, (1,)).item(), bg_fac=0.1 + 0.8*torch.rand(1).item()).numpy()[0] - 0.01]
38 | elif mode == 'crop':
39 | from evaluation_utils import img_preprocess
40 | out = [img_preprocess((None, [img], [seg]), blur=1, center_context=0.1, image_size=image_size)[0].numpy()]
41 | elif mode == 'crop_blur_highlight':
42 | from evaluation_utils import img_preprocess
43 | out = [img_preprocess((None, [img], [seg]), blur=3, center_context=0.1, bg_fac=0.1, image_size=image_size)[0].numpy()]
44 | elif mode == 'crop_blur_highlight352':
45 | from evaluation_utils import img_preprocess
46 | out = [img_preprocess((None, [img], [seg]), blur=3, center_context=0.1, bg_fac=0.1, image_size=352)[0].numpy()]
47 | elif mode == 'shape':
48 | out = [np.stack([seg[:, :]]*3).astype('float32')]
49 | elif mode == 'concat':
50 | out = [np.concatenate([img, seg[None, :, :]]).astype('float32')]
51 | elif mode == 'image_only':
52 | out = [img.astype('float32')]
53 | elif mode == 'image_black':
54 | out = [img.astype('float32')*0]
55 | elif mode is None:
56 | out = [img.astype('float32')]
57 | elif mode == 'separate':
58 | out = [img.astype('float32'), seg.astype('int64')]
59 | elif mode == 'separate_img_black':
60 | out = [img.astype('float32')*0, seg.astype('int64')]
61 | elif mode == 'separate_seg_ones':
62 | out = [img.astype('float32'), np.ones_like(seg).astype('int64')]
63 | elif mode == 'separate_both_black':
64 | out = [img.astype('float32')*0, seg.astype('int64')*0]
65 | else:
66 | raise ValueError(f'invalid mode: {mode}')
67 |
68 | return out
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/environment.yml:
--------------------------------------------------------------------------------
1 | name: clipseg-environment
2 | channels:
3 | - conda-forge
4 | - pytorch
5 | dependencies:
6 | - numpy
7 | - scipy
8 | - matplotlib-base
9 | - pip
10 | - pip:
11 | - --find-links https://download.pytorch.org/whl/torch_stable.html
12 | - torch==1.10.0+cpu
13 | - torchvision==0.11.1+cpu
14 | - opencv-python
15 | - git+https://github.com/openai/CLIP.git
16 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/example_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deforum-art/sd-webui-deforum/5d63a339dbec8d476657a1f672a4eeb6dc79ed37/scripts/deforum_helpers/src/clipseg/example_image.jpg
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/experiments/ablation.yaml:
--------------------------------------------------------------------------------
1 | configuration:
2 | batch_size: 64
3 | optimizer: torch.optim.AdamW
4 |
5 | lr: 0.001
6 |
7 | trainer: experiment_setup.train_loop
8 | scorer: experiment_setup.score
9 | model: models.clipseg.CLIPDensePredT
10 |
11 | lr_scheduler: cosine
12 | T_max: 20000
13 | eta_min: 0.0001
14 |
15 | max_iterations: 20000 # <-##########################################
16 | val_interval: null
17 |
18 | # dataset
19 | dataset: datasets.phrasecut.PhraseCut # <-----------------
20 | split_mode: pascal_test
21 | split: train
22 | mask: text_and_crop_blur_highlight352
23 | image_size: 352
24 | negative_prob: 0.2
25 | mix_text_max: 0.5
26 |
27 | # general
28 | mix: True # <-----------------
29 | prompt: shuffle+
30 | norm_cond: True
31 | mix_text_min: 0.0
32 | with_visual: True
33 |
34 | # model
35 | version: 'ViT-B/16'
36 | extract_layers: [3, 7, 9]
37 | reduce_dim: 64
38 | depth: 3
39 | fix_shift: False # <-##########################################
40 |
41 | loss: torch.nn.functional.binary_cross_entropy_with_logits
42 | amp: True
43 |
44 | test_configuration_common:
45 | normalize: True
46 | image_size: 352
47 | batch_size: 32
48 | sigmoid: True
49 | split: test
50 | label_support: True
51 |
52 | test_configuration:
53 |
54 | -
55 | name: pc
56 | metric: metrics.FixedIntervalMetrics
57 | test_dataset: phrasecut
58 | mask: text
59 |
60 | -
61 | name: pc-vis
62 | metric: metrics.FixedIntervalMetrics
63 | test_dataset: phrasecut
64 | mask: crop_blur_highlight352
65 | with_visual: True
66 | visual_only: True
67 |
68 |
69 | columns: [name,
70 | pc_fgiou_best, pc_miou_best, pc_fgiou_0.5,
71 | pc-vis_fgiou_best, pc-vis_miou_best, pc-vis_fgiou_0.5,
72 | duration]
73 |
74 |
75 | individual_configurations:
76 |
77 | - {name: rd64-uni}
78 | - {name: rd64-no-pretrain, not_pretrained: True, lr: 0.0003}
79 | - {name: rd64-no-negatives, negative_prob: 0.0}
80 | - {name: rd64-neg0.5, negative_prob: 0.5}
81 | - {name: rd64-no-visual, with_visual: False, mix: False}
82 | - {name: rd16-uni, reduce_dim: 16}
83 | - {name: rd64-layer3, extract_layers: [3], depth: 1}
84 | - {name: rd64-blur-highlight, mask: text_and_blur_highlight, test_configuration: {mask: blur_highlight}}
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/experiments/pascal_1shot.yaml:
--------------------------------------------------------------------------------
1 | configuration:
2 | batch_size: 64
3 | optimizer: torch.optim.AdamW
4 |
5 | lr: 0.001
6 |
7 | trainer: experiment_setup.train_loop
8 | scorer: experiment_setup.score
9 | model: models.clipseg.CLIPDensePredT
10 |
11 | lr_scheduler: cosine
12 | T_max: 20000
13 | eta_min: 0.0001
14 |
15 | max_iterations: 20000 # <-##########################################
16 | val_interval: null
17 |
18 | # dataset
19 | dataset: datasets.phrasecut.PhraseCut
20 | split_mode: pascal_test
21 | mode: train
22 | mask: text_and_crop_blur_highlight352
23 | image_size: 352
24 | normalize: True
25 | pre_crop_image_size: [sample, 1, 1.5]
26 | aug: 1new
27 | with_visual: True
28 | split: train
29 |
30 | # general
31 | mix: True
32 | prompt: shuffle+
33 | norm_cond: True
34 | mix_text_min: 0.0
35 |
36 | # model
37 | out: 1
38 | version: 'ViT-B/16'
39 | extract_layers: [3, 7, 9]
40 | reduce_dim: 64
41 | depth: 3
42 |
43 | loss: torch.nn.functional.binary_cross_entropy_with_logits
44 | amp: True
45 |
46 | test_configuration_common:
47 | normalize: True
48 | image_size: 352
49 | metric: metrics.FixedIntervalMetrics
50 | batch_size: 1
51 | test_dataset: pascal
52 | sigmoid: True
53 | # max_iterations: 250
54 |
55 | test_configuration:
56 |
57 | -
58 | name: pas_t
59 | mask: text
60 |
61 | -
62 | name: pas_h
63 | mask: blur3_highlight01
64 |
65 | -
66 | name: pas_h2
67 | mask: crop_blur_highlight352
68 |
69 |
70 | columns: [name,
71 | pas_t_fgiou_best, pas_t_miou_best, pas_t_fgiou_ct,
72 | pas_h_fgiou_best, pas_h_miou_best, pas_h_fgiou_ct,
73 | pas_h2_fgiou_best, pas_h2_miou_best, pas_h2_fgiou_ct, pas_h2_fgiou_best_t,
74 | train_loss, duration, date
75 | ]
76 |
77 | individual_configurations:
78 |
79 | - {name: rd64-uni-phrasepas5i-0, remove_classes: [pas5i, 0], negative_prob: 0.2, mix_text_max: 0.5, test_configuration: {splits: [0], custom_threshold: 0.24}}
80 | - {name: rd64-uni-phrasepas5i-1, remove_classes: [pas5i, 1], negative_prob: 0.2, mix_text_max: 0.5, test_configuration: {splits: [1], custom_threshold: 0.24}}
81 | - {name: rd64-uni-phrasepas5i-2, remove_classes: [pas5i, 2], negative_prob: 0.2, mix_text_max: 0.5, test_configuration: {splits: [2], custom_threshold: 0.24}}
82 | - {name: rd64-uni-phrasepas5i-3, remove_classes: [pas5i, 3], negative_prob: 0.2, mix_text_max: 0.5, test_configuration: {splits: [3], custom_threshold: 0.24}}
83 |
84 |
85 | - {name: rd64-phrasepas5i-0, remove_classes: [pas5i, 0], negative_prob: 0.0, test_configuration: {splits: [0], custom_threshold: 0.28}}
86 | - {name: rd64-phrasepas5i-1, remove_classes: [pas5i, 1], negative_prob: 0.0, test_configuration: {splits: [1], custom_threshold: 0.28}}
87 | - {name: rd64-phrasepas5i-2, remove_classes: [pas5i, 2], negative_prob: 0.0, test_configuration: {splits: [2], custom_threshold: 0.28}}
88 | - {name: rd64-phrasepas5i-3, remove_classes: [pas5i, 3], negative_prob: 0.0, test_configuration: {splits: [3], custom_threshold: 0.28}}
89 |
90 |
91 | # baseline
92 | - {name: bl64-phrasepas5i-0, model: models.clipseg.CLIPDenseBaseline, remove_classes: [pas5i, 0], reduce2_dim: 64, negative_prob: 0.0, test_configuration: {splits: [0], custom_threshold: 0.24}}
93 | - {name: bl64-phrasepas5i-1, model: models.clipseg.CLIPDenseBaseline, remove_classes: [pas5i, 1], reduce2_dim: 64, negative_prob: 0.0, test_configuration: {splits: [1], custom_threshold: 0.24}}
94 | - {name: bl64-phrasepas5i-2, model: models.clipseg.CLIPDenseBaseline, remove_classes: [pas5i, 2], reduce2_dim: 64, negative_prob: 0.0, test_configuration: {splits: [2], custom_threshold: 0.24}}
95 | - {name: bl64-phrasepas5i-3, model: models.clipseg.CLIPDenseBaseline, remove_classes: [pas5i, 3], reduce2_dim: 64, negative_prob: 0.0, test_configuration: {splits: [3], custom_threshold: 0.24}}
96 |
97 | # ViT
98 | - {name: vit64-uni-phrasepas5i-0, remove_classes: [pas5i, 0], model: models.vitseg.VITDensePredT, negative_prob: 0.2, mix_text_max: 0.5, lr: 0.0001, test_configuration: {splits: [0], custom_threshold: 0.02}}
99 | - {name: vit64-uni-phrasepas5i-1, remove_classes: [pas5i, 1], model: models.vitseg.VITDensePredT, negative_prob: 0.2, mix_text_max: 0.5, lr: 0.0001, test_configuration: {splits: [1], custom_threshold: 0.02}}
100 | - {name: vit64-uni-phrasepas5i-2, remove_classes: [pas5i, 2], model: models.vitseg.VITDensePredT, negative_prob: 0.2, mix_text_max: 0.5, lr: 0.0001, test_configuration: {splits: [2], custom_threshold: 0.02}}
101 | - {name: vit64-uni-phrasepas5i-3, remove_classes: [pas5i, 3], model: models.vitseg.VITDensePredT, negative_prob: 0.2, mix_text_max: 0.5, lr: 0.0001, test_configuration: {splits: [3], custom_threshold: 0.02}}
102 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/experiments/phrasecut.yaml:
--------------------------------------------------------------------------------
1 | configuration:
2 | batch_size: 64
3 | optimizer: torch.optim.AdamW
4 |
5 | lr: 0.001
6 |
7 | trainer: experiment_setup.train_loop
8 | scorer: experiment_setup.score
9 | model: models.clipseg.CLIPDensePredT
10 |
11 | lr_scheduler: cosine
12 | T_max: 20000
13 | eta_min: 0.0001
14 |
15 | max_iterations: 20000
16 | val_interval: null
17 |
18 | # dataset
19 | dataset: datasets.phrasecut.PhraseCut # <-----------------
20 | split_mode: pascal_test
21 | split: train
22 | mask: text_and_crop_blur_highlight352
23 | image_size: 352
24 | normalize: True
25 | pre_crop_image_size: [sample, 1, 1.5]
26 | aug: 1new
27 |
28 | # general
29 | mix: False # <-----------------
30 | prompt: shuffle+
31 | norm_cond: True
32 | mix_text_min: 0.0
33 |
34 | # model
35 | out: 1
36 | extract_layers: [3, 7, 9]
37 | reduce_dim: 64
38 | depth: 3
39 | fix_shift: False
40 |
41 | loss: torch.nn.functional.binary_cross_entropy_with_logits
42 | amp: True
43 |
44 | test_configuration_common:
45 | normalize: True
46 | image_size: 352
47 | batch_size: 32
48 | # max_iterations: 5
49 | # max_iterations: 150
50 |
51 | test_configuration:
52 |
53 | -
54 | name: pc # old: phrasecut
55 | metric: metrics.FixedIntervalMetrics
56 | test_dataset: phrasecut
57 | split: test
58 | mask: text
59 | label_support: True
60 | sigmoid: True
61 |
62 |
63 | columns: [i, name, pc_miou_0.3, pc_fgiou_0.3, pc_fgiou_0.5, pc_ap, duration, date]
64 |
65 |
66 | individual_configurations:
67 |
68 | # important ones
69 |
70 |
71 | - {name: rd64-uni, version: 'ViT-B/16', reduce_dim: 64, with_visual: True, negative_prob: 0.2, mix: True, mix_text_max: 0.5}
72 |
73 | # this was accedentally trained using old mask
74 | - {name: rd128-vit16-phrasecut, version: 'ViT-B/16', reduce_dim: 128, mask: text_and_blur3_highlight01}
75 | - {name: rd64-uni-novis, version: 'ViT-B/16', reduce_dim: 64, with_visual: False, negative_prob: 0.2, mix: False}
76 | # this was accedentally trained using old mask
77 | - {name: baseline3-vit16-phrasecut, model: models.clipseg.CLIPDenseBaseline, version: 'ViT-B/16', reduce_dim: 64, reduce2_dim: 64, mask: text_and_blur3_highlight01}
78 |
79 | - {name: vit64-uni, version: 'ViT-B/16', model: models.vitseg.VITDensePredT, reduce_dim: 64, with_visual: True, only_visual: True, negative_prob: 0.2, mask: crop_blur_highlight352, lr: 0.0003}
80 | - {name: vit64-uni-novis, version: 'ViT-B/16', model: models.vitseg.VITDensePredT, with_visual: False, reduce_dim: 64, lr: 0.0001}
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deforum-art/sd-webui-deforum/5d63a339dbec8d476657a1f672a4eeb6dc79ed37/scripts/deforum_helpers/src/clipseg/overview.png
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | with open("README.md", "r", encoding="utf-8") as readme_file:
4 | readme = readme_file.read()
5 |
6 | requirements = [
7 | "numpy",
8 | "scipy",
9 | "matplotlib",
10 | "torch",
11 | "torchvision",
12 | "opencv-python",
13 | "CLIP @ git+https://github.com/openai/CLIP.git"
14 | ]
15 |
16 | setup(
17 | name='clipseg',
18 | packages=['clipseg'],
19 | package_dir={'clipseg': 'models'},
20 | package_data={'clipseg': [
21 | "../weights/*.pth",
22 | ]},
23 | version='0.0.1',
24 | url='https://github.com/timojl/clipseg',
25 | python_requires='>=3.9',
26 | install_requires=requirements,
27 | description='This repository contains the code used in the paper "Image Segmentation Using Text and Image Prompts".',
28 | long_description=readme,
29 | long_description_content_type="text/markdown",
30 | )
31 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/clipseg/weights/rd64-uni.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deforum-art/sd-webui-deforum/5d63a339dbec8d476657a1f672a4eeb6dc79ed37/scripts/deforum_helpers/src/clipseg/weights/rd64-uni.pth
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/leres/lib/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/leres/lib/multi_depth_model_woauxi.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from leres.lib import network_auxi as network
5 | from leres.lib.net_tools import get_func
6 |
7 | class RelDepthModel(nn.Module):
8 | def __init__(self, backbone='resnet50'):
9 | super(RelDepthModel, self).__init__()
10 | if backbone == 'resnet50':
11 | encoder = 'resnet50_stride32'
12 | elif backbone == 'resnext101':
13 | encoder = 'resnext101_stride32x8d'
14 | self.depth_model = DepthModel(encoder)
15 |
16 | def inference(self, rgb):
17 | with torch.no_grad():
18 | input = rgb.cuda()
19 | depth = self.depth_model(input)
20 | pred_depth_out = depth - depth.min() + 0.01
21 | return pred_depth_out
22 |
23 |
24 | class DepthModel(nn.Module):
25 | def __init__(self, encoder):
26 | super(DepthModel, self).__init__()
27 | backbone = network.__name__.split('.')[-1] + '.' + encoder
28 | self.encoder_modules = get_func(backbone)()
29 | self.decoder_modules = network.Decoder()
30 |
31 | def forward(self, x):
32 | lateral_out = self.encoder_modules(x)
33 | out_logit = self.decoder_modules(lateral_out)
34 | return out_logit
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/leres/lib/net_tools.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import torch
3 | import os
4 | from collections import OrderedDict
5 |
6 |
7 | def get_func(func_name):
8 | """Helper to return a function object by name. func_name must identify a
9 | function in this module or the path to a function relative to the base
10 | 'modeling' module.
11 | """
12 | if func_name == '':
13 | return None
14 | try:
15 | parts = func_name.split('.')
16 | # Refers to a function in this module
17 | if len(parts) == 1:
18 | return globals()[parts[0]]
19 | # Otherwise, assume we're referencing a module under modeling
20 | module_name = 'leres.lib.' + '.'.join(parts[:-1])
21 | module = importlib.import_module(module_name)
22 | return getattr(module, parts[-1])
23 | except Exception:
24 | print('Failed to f1ind function: %s', func_name)
25 | raise
26 |
27 | def load_ckpt(args, depth_model, shift_model, focal_model):
28 | """
29 | Load checkpoint.
30 | """
31 | if os.path.isfile(args):
32 | print("loading LeReS checkpoint from %s" % args)
33 | checkpoint = torch.load(args)
34 | if shift_model is not None:
35 | shift_model.load_state_dict(strip_prefix_if_present(checkpoint['shift_model'], 'module.'),
36 | strict=True)
37 | if focal_model is not None:
38 | focal_model.load_state_dict(strip_prefix_if_present(checkpoint['focal_model'], 'module.'),
39 | strict=True)
40 | depth_model.load_state_dict(strip_prefix_if_present(checkpoint['depth_model'], "module."),
41 | strict=True)
42 | del checkpoint
43 | torch.cuda.empty_cache()
44 |
45 |
46 | def strip_prefix_if_present(state_dict, prefix):
47 | keys = sorted(state_dict.keys())
48 | if not all(key.startswith(prefix) for key in keys):
49 | return state_dict
50 | stripped_state_dict = OrderedDict()
51 | for key, value in state_dict.items():
52 | stripped_state_dict[key.replace(prefix, "")] = value
53 | return stripped_state_dict
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/leres/lib/spvcnn_utils.py:
--------------------------------------------------------------------------------
1 | import torchsparse.nn.functional as spf
2 | from torchsparse.point_tensor import PointTensor
3 | from torchsparse.utils.kernel_region import *
4 | from torchsparse.utils.helpers import *
5 |
6 |
7 | __all__ = ['initial_voxelize', 'point_to_voxel', 'voxel_to_point']
8 |
9 |
10 | # z: PointTensor
11 | # return: SparseTensor
12 | def initial_voxelize(z, init_res, after_res):
13 | new_float_coord = torch.cat(
14 | [(z.C[:, :3] * init_res) / after_res, z.C[:, -1].view(-1, 1)], 1)
15 |
16 | pc_hash = spf.sphash(torch.floor(new_float_coord).int())
17 | sparse_hash = torch.unique(pc_hash)
18 | idx_query = spf.sphashquery(pc_hash, sparse_hash)
19 | counts = spf.spcount(idx_query.int(), len(sparse_hash))
20 |
21 | inserted_coords = spf.spvoxelize(torch.floor(new_float_coord), idx_query,
22 | counts)
23 | inserted_coords = torch.round(inserted_coords).int()
24 | inserted_feat = spf.spvoxelize(z.F, idx_query, counts)
25 |
26 | new_tensor = SparseTensor(inserted_feat, inserted_coords, 1)
27 | new_tensor.check()
28 | z.additional_features['idx_query'][1] = idx_query
29 | z.additional_features['counts'][1] = counts
30 | z.C = new_float_coord
31 |
32 | return new_tensor
33 |
34 |
35 | # x: SparseTensor, z: PointTensor
36 | # return: SparseTensor
37 | def point_to_voxel(x, z):
38 | if z.additional_features is None or z.additional_features.get('idx_query') is None\
39 | or z.additional_features['idx_query'].get(x.s) is None:
40 | #pc_hash = hash_gpu(torch.floor(z.C).int())
41 | pc_hash = spf.sphash(
42 | torch.cat([
43 | torch.floor(z.C[:, :3] / x.s).int() * x.s,
44 | z.C[:, -1].int().view(-1, 1)
45 | ], 1))
46 | sparse_hash = spf.sphash(x.C)
47 | idx_query = spf.sphashquery(pc_hash, sparse_hash)
48 | counts = spf.spcount(idx_query.int(), x.C.shape[0])
49 | z.additional_features['idx_query'][x.s] = idx_query
50 | z.additional_features['counts'][x.s] = counts
51 | else:
52 | idx_query = z.additional_features['idx_query'][x.s]
53 | counts = z.additional_features['counts'][x.s]
54 |
55 | inserted_feat = spf.spvoxelize(z.F, idx_query, counts)
56 | new_tensor = SparseTensor(inserted_feat, x.C, x.s)
57 | new_tensor.coord_maps = x.coord_maps
58 | new_tensor.kernel_maps = x.kernel_maps
59 |
60 | return new_tensor
61 |
62 |
63 | # x: SparseTensor, z: PointTensor
64 | # return: PointTensor
65 | def voxel_to_point(x, z, nearest=False):
66 | if z.idx_query is None or z.weights is None or z.idx_query.get(
67 | x.s) is None or z.weights.get(x.s) is None:
68 | kr = KernelRegion(2, x.s, 1)
69 | off = kr.get_kernel_offset().to(z.F.device)
70 | #old_hash = kernel_hash_gpu(torch.floor(z.C).int(), off)
71 | old_hash = spf.sphash(
72 | torch.cat([
73 | torch.floor(z.C[:, :3] / x.s).int() * x.s,
74 | z.C[:, -1].int().view(-1, 1)
75 | ], 1), off)
76 | pc_hash = spf.sphash(x.C.to(z.F.device))
77 | idx_query = spf.sphashquery(old_hash, pc_hash)
78 | weights = spf.calc_ti_weights(z.C, idx_query,
79 | scale=x.s).transpose(0, 1).contiguous()
80 | idx_query = idx_query.transpose(0, 1).contiguous()
81 | if nearest:
82 | weights[:, 1:] = 0.
83 | idx_query[:, 1:] = -1
84 | new_feat = spf.spdevoxelize(x.F, idx_query, weights)
85 | new_tensor = PointTensor(new_feat,
86 | z.C,
87 | idx_query=z.idx_query,
88 | weights=z.weights)
89 | new_tensor.additional_features = z.additional_features
90 | new_tensor.idx_query[x.s] = idx_query
91 | new_tensor.weights[x.s] = weights
92 | z.idx_query[x.s] = idx_query
93 | z.weights[x.s] = weights
94 |
95 | else:
96 | new_feat = spf.spdevoxelize(x.F, z.idx_query.get(x.s), z.weights.get(x.s))
97 | new_tensor = PointTensor(new_feat,
98 | z.C,
99 | idx_query=z.idx_query,
100 | weights=z.weights)
101 | new_tensor.additional_features = z.additional_features
102 |
103 | return new_tensor
104 |
105 |
106 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/midas/backbones/levit.py:
--------------------------------------------------------------------------------
1 | import timm
2 | import torch
3 | import torch.nn as nn
4 | import numpy as np
5 |
6 | from .utils import activations, get_activation, Transpose
7 |
8 |
9 | def forward_levit(pretrained, x):
10 | pretrained.model.forward_features(x)
11 |
12 | layer_1 = pretrained.activations["1"]
13 | layer_2 = pretrained.activations["2"]
14 | layer_3 = pretrained.activations["3"]
15 |
16 | layer_1 = pretrained.act_postprocess1(layer_1)
17 | layer_2 = pretrained.act_postprocess2(layer_2)
18 | layer_3 = pretrained.act_postprocess3(layer_3)
19 |
20 | return layer_1, layer_2, layer_3
21 |
22 |
23 | def _make_levit_backbone(
24 | model,
25 | hooks=[3, 11, 21],
26 | patch_grid=[14, 14]
27 | ):
28 | pretrained = nn.Module()
29 |
30 | pretrained.model = model
31 | pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1"))
32 | pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2"))
33 | pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3"))
34 |
35 | pretrained.activations = activations
36 |
37 | patch_grid_size = np.array(patch_grid, dtype=int)
38 |
39 | pretrained.act_postprocess1 = nn.Sequential(
40 | Transpose(1, 2),
41 | nn.Unflatten(2, torch.Size(patch_grid_size.tolist()))
42 | )
43 | pretrained.act_postprocess2 = nn.Sequential(
44 | Transpose(1, 2),
45 | nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 2).astype(int)).tolist()))
46 | )
47 | pretrained.act_postprocess3 = nn.Sequential(
48 | Transpose(1, 2),
49 | nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 4).astype(int)).tolist()))
50 | )
51 |
52 | return pretrained
53 |
54 |
55 | class ConvTransposeNorm(nn.Sequential):
56 | """
57 | Modification of
58 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: ConvNorm
59 | such that ConvTranspose2d is used instead of Conv2d.
60 | """
61 |
62 | def __init__(
63 | self, in_chs, out_chs, kernel_size=1, stride=1, pad=0, dilation=1,
64 | groups=1, bn_weight_init=1):
65 | super().__init__()
66 | self.add_module('c',
67 | nn.ConvTranspose2d(in_chs, out_chs, kernel_size, stride, pad, dilation, groups, bias=False))
68 | self.add_module('bn', nn.BatchNorm2d(out_chs))
69 |
70 | nn.init.constant_(self.bn.weight, bn_weight_init)
71 |
72 | @torch.no_grad()
73 | def fuse(self):
74 | c, bn = self._modules.values()
75 | w = bn.weight / (bn.running_var + bn.eps) ** 0.5
76 | w = c.weight * w[:, None, None, None]
77 | b = bn.bias - bn.running_mean * bn.weight / (bn.running_var + bn.eps) ** 0.5
78 | m = nn.ConvTranspose2d(
79 | w.size(1), w.size(0), w.shape[2:], stride=self.c.stride,
80 | padding=self.c.padding, dilation=self.c.dilation, groups=self.c.groups)
81 | m.weight.data.copy_(w)
82 | m.bias.data.copy_(b)
83 | return m
84 |
85 |
86 | def stem_b4_transpose(in_chs, out_chs, activation):
87 | """
88 | Modification of
89 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: stem_b16
90 | such that ConvTranspose2d is used instead of Conv2d and stem is also reduced to the half.
91 | """
92 | return nn.Sequential(
93 | ConvTransposeNorm(in_chs, out_chs, 3, 2, 1),
94 | activation(),
95 | ConvTransposeNorm(out_chs, out_chs // 2, 3, 2, 1),
96 | activation())
97 |
98 |
99 | def _make_pretrained_levit_384(pretrained, hooks=None):
100 | model = timm.create_model("levit_384", pretrained=pretrained)
101 |
102 | hooks = [3, 11, 21] if hooks == None else hooks
103 | return _make_levit_backbone(
104 | model,
105 | hooks=hooks
106 | )
107 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/midas/backbones/next_vit.py:
--------------------------------------------------------------------------------
1 | import timm
2 |
3 | import torch.nn as nn
4 |
5 | from pathlib import Path
6 | from .utils import activations, forward_default, get_activation
7 |
8 | from ..external.next_vit.classification.nextvit import *
9 |
10 |
11 | def forward_next_vit(pretrained, x):
12 | return forward_default(pretrained, x, "forward")
13 |
14 |
15 | def _make_next_vit_backbone(
16 | model,
17 | hooks=[2, 6, 36, 39],
18 | ):
19 | pretrained = nn.Module()
20 |
21 | pretrained.model = model
22 | pretrained.model.features[hooks[0]].register_forward_hook(get_activation("1"))
23 | pretrained.model.features[hooks[1]].register_forward_hook(get_activation("2"))
24 | pretrained.model.features[hooks[2]].register_forward_hook(get_activation("3"))
25 | pretrained.model.features[hooks[3]].register_forward_hook(get_activation("4"))
26 |
27 | pretrained.activations = activations
28 |
29 | return pretrained
30 |
31 |
32 | def _make_pretrained_next_vit_large_6m(hooks=None):
33 | model = timm.create_model("nextvit_large")
34 |
35 | hooks = [2, 6, 36, 39] if hooks == None else hooks
36 | return _make_next_vit_backbone(
37 | model,
38 | hooks=hooks,
39 | )
40 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/midas/backbones/swin.py:
--------------------------------------------------------------------------------
1 | import timm
2 |
3 | from .swin_common import _make_swin_backbone
4 |
5 |
6 | def _make_pretrained_swinl12_384(pretrained, hooks=None):
7 | model = timm.create_model("swin_large_patch4_window12_384", pretrained=pretrained)
8 |
9 | hooks = [1, 1, 17, 1] if hooks == None else hooks
10 | return _make_swin_backbone(
11 | model,
12 | hooks=hooks
13 | )
14 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/midas/backbones/swin2.py:
--------------------------------------------------------------------------------
1 | import timm
2 |
3 | from .swin_common import _make_swin_backbone
4 |
5 |
6 | def _make_pretrained_swin2l24_384(pretrained, hooks=None):
7 | model = timm.create_model("swinv2_large_window12to24_192to384_22kft1k", pretrained=pretrained)
8 |
9 | hooks = [1, 1, 17, 1] if hooks == None else hooks
10 | return _make_swin_backbone(
11 | model,
12 | hooks=hooks
13 | )
14 |
15 |
16 | def _make_pretrained_swin2b24_384(pretrained, hooks=None):
17 | model = timm.create_model("swinv2_base_window12to24_192to384_22kft1k", pretrained=pretrained)
18 |
19 | hooks = [1, 1, 17, 1] if hooks == None else hooks
20 | return _make_swin_backbone(
21 | model,
22 | hooks=hooks
23 | )
24 |
25 |
26 | def _make_pretrained_swin2t16_256(pretrained, hooks=None):
27 | model = timm.create_model("swinv2_tiny_window16_256", pretrained=pretrained)
28 |
29 | hooks = [1, 1, 5, 1] if hooks == None else hooks
30 | return _make_swin_backbone(
31 | model,
32 | hooks=hooks,
33 | patch_grid=[64, 64]
34 | )
35 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/midas/backbones/swin_common.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | import torch.nn as nn
4 | import numpy as np
5 |
6 | from .utils import activations, forward_default, get_activation, Transpose
7 |
8 |
9 | def forward_swin(pretrained, x):
10 | return forward_default(pretrained, x)
11 |
12 |
13 | def _make_swin_backbone(
14 | model,
15 | hooks=[1, 1, 17, 1],
16 | patch_grid=[96, 96]
17 | ):
18 | pretrained = nn.Module()
19 |
20 | pretrained.model = model
21 | pretrained.model.layers[0].blocks[hooks[0]].register_forward_hook(get_activation("1"))
22 | pretrained.model.layers[1].blocks[hooks[1]].register_forward_hook(get_activation("2"))
23 | pretrained.model.layers[2].blocks[hooks[2]].register_forward_hook(get_activation("3"))
24 | pretrained.model.layers[3].blocks[hooks[3]].register_forward_hook(get_activation("4"))
25 |
26 | pretrained.activations = activations
27 |
28 | if hasattr(model, "patch_grid"):
29 | used_patch_grid = model.patch_grid
30 | else:
31 | used_patch_grid = patch_grid
32 |
33 | patch_grid_size = np.array(used_patch_grid, dtype=int)
34 |
35 | pretrained.act_postprocess1 = nn.Sequential(
36 | Transpose(1, 2),
37 | nn.Unflatten(2, torch.Size(patch_grid_size.tolist()))
38 | )
39 | pretrained.act_postprocess2 = nn.Sequential(
40 | Transpose(1, 2),
41 | nn.Unflatten(2, torch.Size((patch_grid_size // 2).tolist()))
42 | )
43 | pretrained.act_postprocess3 = nn.Sequential(
44 | Transpose(1, 2),
45 | nn.Unflatten(2, torch.Size((patch_grid_size // 4).tolist()))
46 | )
47 | pretrained.act_postprocess4 = nn.Sequential(
48 | Transpose(1, 2),
49 | nn.Unflatten(2, torch.Size((patch_grid_size // 8).tolist()))
50 | )
51 |
52 | return pretrained
53 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/midas/base_model.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class BaseModel(torch.nn.Module):
5 | def load(self, path):
6 | """Load model from file.
7 |
8 | Args:
9 | path (str): file path
10 | """
11 | parameters = torch.load(path, map_location=torch.device('cpu'))
12 |
13 | if "optimizer" in parameters:
14 | parameters = parameters["model"]
15 |
16 | self.load_state_dict(parameters)
17 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/midas/midas_net.py:
--------------------------------------------------------------------------------
1 | """MidashNet: Network for monocular depth estimation trained by mixing several datasets.
2 | This file contains code that is adapted from
3 | https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
4 | """
5 | import torch
6 | import torch.nn as nn
7 |
8 | from .base_model import BaseModel
9 | from .blocks import FeatureFusionBlock, Interpolate, _make_encoder
10 |
11 |
12 | class MidasNet(BaseModel):
13 | """Network for monocular depth estimation.
14 | """
15 |
16 | def __init__(self, path=None, features=256, non_negative=True):
17 | """Init.
18 |
19 | Args:
20 | path (str, optional): Path to saved model. Defaults to None.
21 | features (int, optional): Number of features. Defaults to 256.
22 | backbone (str, optional): Backbone network for encoder. Defaults to resnet50
23 | """
24 | print("Loading weights: ", path)
25 |
26 | super(MidasNet, self).__init__()
27 |
28 | use_pretrained = False if path is None else True
29 |
30 | self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained)
31 |
32 | self.scratch.refinenet4 = FeatureFusionBlock(features)
33 | self.scratch.refinenet3 = FeatureFusionBlock(features)
34 | self.scratch.refinenet2 = FeatureFusionBlock(features)
35 | self.scratch.refinenet1 = FeatureFusionBlock(features)
36 |
37 | self.scratch.output_conv = nn.Sequential(
38 | nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1),
39 | Interpolate(scale_factor=2, mode="bilinear"),
40 | nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1),
41 | nn.ReLU(True),
42 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
43 | nn.ReLU(True) if non_negative else nn.Identity(),
44 | )
45 |
46 | if path:
47 | self.load(path)
48 |
49 | def forward(self, x):
50 | """Forward pass.
51 |
52 | Args:
53 | x (tensor): input data (image)
54 |
55 | Returns:
56 | tensor: depth
57 | """
58 |
59 | layer_1 = self.pretrained.layer1(x)
60 | layer_2 = self.pretrained.layer2(layer_1)
61 | layer_3 = self.pretrained.layer3(layer_2)
62 | layer_4 = self.pretrained.layer4(layer_3)
63 |
64 | layer_1_rn = self.scratch.layer1_rn(layer_1)
65 | layer_2_rn = self.scratch.layer2_rn(layer_2)
66 | layer_3_rn = self.scratch.layer3_rn(layer_3)
67 | layer_4_rn = self.scratch.layer4_rn(layer_4)
68 |
69 | path_4 = self.scratch.refinenet4(layer_4_rn)
70 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
71 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
72 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
73 |
74 | out = self.scratch.output_conv(path_1)
75 |
76 | return torch.squeeze(out, dim=1)
77 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/model_io.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import torch
4 |
5 |
6 | def save_weights(model, filename, path="./saved_models"):
7 | os.makedirs(path, exist_ok=True)
8 |
9 | fpath = os.path.join(path, filename)
10 | torch.save(model.state_dict(), fpath)
11 | return
12 |
13 | def save_checkpoint(model, optimizer, epoch, filename, root="./checkpoints"):
14 | if not os.path.isdir(root):
15 | os.makedirs(root)
16 |
17 | fpath = os.path.join(root, filename)
18 | torch.save(
19 | {
20 | "model": model.state_dict(),
21 | "optimizer": optimizer.state_dict(),
22 | "epoch": epoch
23 | }
24 | , fpath)
25 |
26 | def load_weights(model, filename, path="./saved_models"):
27 | fpath = os.path.join(path, filename)
28 | state_dict = torch.load(fpath)
29 | model.load_state_dict(state_dict)
30 | return model
31 |
32 | def load_checkpoint(fpath, model, optimizer=None):
33 | ckpt = torch.load(fpath, map_location='cpu')
34 | if ckpt is None:
35 | raise Exception(f"\nERROR Loading AdaBins_nyu.pt. Read this for a fix:\nhttps://github.com/deforum-art/deforum-for-automatic1111-webui/wiki/FAQ-&-Troubleshooting#3d-animation-mode-is-not-working-only-2d-works")
36 | if optimizer is None:
37 | optimizer = ckpt.get('optimizer', None)
38 | else:
39 | optimizer.load_state_dict(ckpt['optimizer'])
40 | epoch = ckpt['epoch']
41 |
42 | if 'model' in ckpt:
43 | ckpt = ckpt['model']
44 | load_dict = {}
45 | for k, v in ckpt.items():
46 | if k.startswith('module.'):
47 | k_ = k.replace('module.', '')
48 | load_dict[k_] = v
49 | else:
50 | load_dict[k] = v
51 |
52 | modified = {} # backward compatibility to older naming of architecture blocks
53 | for k, v in load_dict.items():
54 | if k.startswith('adaptive_bins_layer.embedding_conv.'):
55 | k_ = k.replace('adaptive_bins_layer.embedding_conv.',
56 | 'adaptive_bins_layer.conv3x3.')
57 | modified[k_] = v
58 | # del load_dict[k]
59 |
60 | elif k.startswith('adaptive_bins_layer.patch_transformer.embedding_encoder'):
61 |
62 | k_ = k.replace('adaptive_bins_layer.patch_transformer.embedding_encoder',
63 | 'adaptive_bins_layer.patch_transformer.embedding_convPxP')
64 | modified[k_] = v
65 | # del load_dict[k]
66 | else:
67 | modified[k] = v # else keep the original
68 |
69 | model.load_state_dict(modified)
70 | return model, optimizer, epoch
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/rife/model/warplayer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
5 | backwarp_tenGrid = {}
6 |
7 |
8 | def warp(tenInput, tenFlow):
9 | k = (str(tenFlow.device), str(tenFlow.size()))
10 | if k not in backwarp_tenGrid:
11 | tenHorizontal = torch.linspace(-1.0, 1.0, tenFlow.shape[3], device=device).view(
12 | 1, 1, 1, tenFlow.shape[3]).expand(tenFlow.shape[0], -1, tenFlow.shape[2], -1)
13 | tenVertical = torch.linspace(-1.0, 1.0, tenFlow.shape[2], device=device).view(
14 | 1, 1, tenFlow.shape[2], 1).expand(tenFlow.shape[0], -1, -1, tenFlow.shape[3])
15 | backwarp_tenGrid[k] = torch.cat(
16 | [tenHorizontal, tenVertical], 1).to(device)
17 |
18 | tenFlow = torch.cat([tenFlow[:, 0:1, :, :] / ((tenInput.shape[3] - 1.0) / 2.0),
19 | tenFlow[:, 1:2, :, :] / ((tenInput.shape[2] - 1.0) / 2.0)], 1)
20 |
21 | g = (backwarp_tenGrid[k] + tenFlow).permute(0, 2, 3, 1)
22 | return torch.nn.functional.grid_sample(input=tenInput, grid=g, mode='bilinear', padding_mode='border', align_corners=True)
23 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/rife/rife_new_gen/refine.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | from torch.optim import AdamW
5 | import torch.optim as optim
6 | import itertools
7 | from model.warplayer import warp
8 | from torch.nn.parallel import DistributedDataParallel as DDP
9 | import torch.nn.functional as F
10 |
11 | device = torch.device("cuda")
12 |
13 | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):
14 | return nn.Sequential(
15 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
16 | padding=padding, dilation=dilation, bias=True),
17 | nn.PReLU(out_planes)
18 | )
19 |
20 | def conv_woact(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1):
21 | return nn.Sequential(
22 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
23 | padding=padding, dilation=dilation, bias=True),
24 | )
25 |
26 | def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1):
27 | return nn.Sequential(
28 | torch.nn.ConvTranspose2d(in_channels=in_planes, out_channels=out_planes, kernel_size=4, stride=2, padding=1, bias=True),
29 | nn.PReLU(out_planes)
30 | )
31 |
32 | class Conv2(nn.Module):
33 | def __init__(self, in_planes, out_planes, stride=2):
34 | super(Conv2, self).__init__()
35 | self.conv1 = conv(in_planes, out_planes, 3, stride, 1)
36 | self.conv2 = conv(out_planes, out_planes, 3, 1, 1)
37 |
38 | def forward(self, x):
39 | x = self.conv1(x)
40 | x = self.conv2(x)
41 | return x
42 |
43 | c = 16
44 | class Contextnet(nn.Module):
45 | def __init__(self):
46 | super(Contextnet, self).__init__()
47 | self.conv1 = Conv2(3, c)
48 | self.conv2 = Conv2(c, 2*c)
49 | self.conv3 = Conv2(2*c, 4*c)
50 | self.conv4 = Conv2(4*c, 8*c)
51 |
52 | def forward(self, x, flow):
53 | x = self.conv1(x)
54 | flow = F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False) * 0.5
55 | f1 = warp(x, flow)
56 | x = self.conv2(x)
57 | flow = F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False) * 0.5
58 | f2 = warp(x, flow)
59 | x = self.conv3(x)
60 | flow = F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False) * 0.5
61 | f3 = warp(x, flow)
62 | x = self.conv4(x)
63 | flow = F.interpolate(flow, scale_factor=0.5, mode="bilinear", align_corners=False) * 0.5
64 | f4 = warp(x, flow)
65 | return [f1, f2, f3, f4]
66 |
67 | class Unet(nn.Module):
68 | def __init__(self):
69 | super(Unet, self).__init__()
70 | self.down0 = Conv2(17, 2*c)
71 | self.down1 = Conv2(4*c, 4*c)
72 | self.down2 = Conv2(8*c, 8*c)
73 | self.down3 = Conv2(16*c, 16*c)
74 | self.up0 = deconv(32*c, 8*c)
75 | self.up1 = deconv(16*c, 4*c)
76 | self.up2 = deconv(8*c, 2*c)
77 | self.up3 = deconv(4*c, c)
78 | self.conv = nn.Conv2d(c, 3, 3, 1, 1)
79 |
80 | def forward(self, img0, img1, warped_img0, warped_img1, mask, flow, c0, c1):
81 | s0 = self.down0(torch.cat((img0, img1, warped_img0, warped_img1, mask, flow), 1))
82 | s1 = self.down1(torch.cat((s0, c0[0], c1[0]), 1))
83 | s2 = self.down2(torch.cat((s1, c0[1], c1[1]), 1))
84 | s3 = self.down3(torch.cat((s2, c0[2], c1[2]), 1))
85 | x = self.up0(torch.cat((s3, c0[3], c1[3]), 1))
86 | x = self.up1(torch.cat((x, s2), 1))
87 | x = self.up2(torch.cat((x, s1), 1))
88 | x = self.up3(torch.cat((x, s0), 1))
89 | x = self.conv(x)
90 | return torch.sigmoid(x)
91 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/utils.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import math
3 | import re
4 | from io import BytesIO
5 |
6 | import matplotlib.cm
7 | import numpy as np
8 | import torch
9 | import torch.nn
10 | from PIL import Image
11 |
12 |
13 | class RunningAverage:
14 | def __init__(self):
15 | self.avg = 0
16 | self.count = 0
17 |
18 | def append(self, value):
19 | self.avg = (value + self.count * self.avg) / (self.count + 1)
20 | self.count += 1
21 |
22 | def get_value(self):
23 | return self.avg
24 |
25 |
26 | def denormalize(x, device='cpu'):
27 | mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device)
28 | std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device)
29 | return x * std + mean
30 |
31 |
32 | class RunningAverageDict:
33 | def __init__(self):
34 | self._dict = None
35 |
36 | def update(self, new_dict):
37 | if self._dict is None:
38 | self._dict = dict()
39 | for key, value in new_dict.items():
40 | self._dict[key] = RunningAverage()
41 |
42 | for key, value in new_dict.items():
43 | self._dict[key].append(value)
44 |
45 | def get_value(self):
46 | return {key: value.get_value() for key, value in self._dict.items()}
47 |
48 |
49 | def colorize(value, vmin=10, vmax=1000, cmap='magma_r'):
50 | value = value.cpu().numpy()[0, :, :]
51 | invalid_mask = value == -1
52 |
53 | # normalize
54 | vmin = value.min() if vmin is None else vmin
55 | vmax = value.max() if vmax is None else vmax
56 | if vmin != vmax:
57 | value = (value - vmin) / (vmax - vmin) # vmin..vmax
58 | else:
59 | # Avoid 0-division
60 | value = value * 0.
61 | # squeeze last dim if it exists
62 | # value = value.squeeze(axis=0)
63 | cmapper = matplotlib.cm.get_cmap(cmap)
64 | value = cmapper(value, bytes=True) # (nxmx4)
65 | value[invalid_mask] = 255
66 | img = value[:, :, :3]
67 |
68 | # return img.transpose((2, 0, 1))
69 | return img
70 |
71 |
72 | def count_parameters(model):
73 | return sum(p.numel() for p in model.parameters() if p.requires_grad)
74 |
75 |
76 | def compute_errors(gt, pred):
77 | thresh = np.maximum((gt / pred), (pred / gt))
78 | a1 = (thresh < 1.25).mean()
79 | a2 = (thresh < 1.25 ** 2).mean()
80 | a3 = (thresh < 1.25 ** 3).mean()
81 |
82 | abs_rel = np.mean(np.abs(gt - pred) / gt)
83 | sq_rel = np.mean(((gt - pred) ** 2) / gt)
84 |
85 | rmse = (gt - pred) ** 2
86 | rmse = np.sqrt(rmse.mean())
87 |
88 | rmse_log = (np.log(gt) - np.log(pred)) ** 2
89 | rmse_log = np.sqrt(rmse_log.mean())
90 |
91 | err = np.log(pred) - np.log(gt)
92 | silog = np.sqrt(np.mean(err ** 2) - np.mean(err) ** 2) * 100
93 |
94 | log_10 = (np.abs(np.log10(gt) - np.log10(pred))).mean()
95 | return dict(a1=a1, a2=a2, a3=a3, abs_rel=abs_rel, rmse=rmse, log_10=log_10, rmse_log=rmse_log,
96 | silog=silog, sq_rel=sq_rel)
97 |
98 |
99 | ##################################### Demo Utilities ############################################
100 | def b64_to_pil(b64string):
101 | image_data = re.sub('^data:image/.+;base64,', '', b64string)
102 | # image = Image.open(cStringIO.StringIO(image_data))
103 | return Image.open(BytesIO(base64.b64decode(image_data)))
104 |
105 |
106 | # Compute edge magnitudes
107 | from scipy import ndimage
108 |
109 |
110 | def edges(d):
111 | dx = ndimage.sobel(d, 0) # horizontal derivative
112 | dy = ndimage.sobel(d, 1) # vertical derivative
113 | return np.abs(dx) + np.abs(dy)
114 |
115 |
116 | class PointCloudHelper():
117 | def __init__(self, width=640, height=480):
118 | self.xx, self.yy = self.worldCoords(width, height)
119 |
120 | def worldCoords(self, width=640, height=480):
121 | hfov_degrees, vfov_degrees = 57, 43
122 | hFov = math.radians(hfov_degrees)
123 | vFov = math.radians(vfov_degrees)
124 | cx, cy = width / 2, height / 2
125 | fx = width / (2 * math.tan(hFov / 2))
126 | fy = height / (2 * math.tan(vFov / 2))
127 | xx, yy = np.tile(range(width), height), np.repeat(range(height), width)
128 | xx = (xx - cx) / fx
129 | yy = (yy - cy) / fy
130 | return xx, yy
131 |
132 | def depth_to_points(self, depth):
133 | depth[edges(depth) > 0.3] = np.nan # Hide depth edges
134 | length = depth.shape[0] * depth.shape[1]
135 | # depth[edges(depth) > 0.3] = 1e6 # Hide depth edges
136 | z = depth.reshape(length)
137 |
138 | return np.dstack((self.xx * z, self.yy * z, z)).reshape((length, 3))
139 |
140 | #####################################################################################################
141 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/data/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/data/ddad.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | import os
26 |
27 | import numpy as np
28 | import torch
29 | from PIL import Image
30 | from torch.utils.data import DataLoader, Dataset
31 | from torchvision import transforms
32 |
33 |
34 | class ToTensor(object):
35 | def __init__(self, resize_shape):
36 | # self.normalize = transforms.Normalize(
37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38 | self.normalize = lambda x : x
39 | self.resize = transforms.Resize(resize_shape)
40 |
41 | def __call__(self, sample):
42 | image, depth = sample['image'], sample['depth']
43 | image = self.to_tensor(image)
44 | image = self.normalize(image)
45 | depth = self.to_tensor(depth)
46 |
47 | image = self.resize(image)
48 |
49 | return {'image': image, 'depth': depth, 'dataset': "ddad"}
50 |
51 | def to_tensor(self, pic):
52 |
53 | if isinstance(pic, np.ndarray):
54 | img = torch.from_numpy(pic.transpose((2, 0, 1)))
55 | return img
56 |
57 | # # handle PIL Image
58 | if pic.mode == 'I':
59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False))
60 | elif pic.mode == 'I;16':
61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False))
62 | else:
63 | img = torch.ByteTensor(
64 | torch.ByteStorage.from_buffer(pic.tobytes()))
65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
66 | if pic.mode == 'YCbCr':
67 | nchannel = 3
68 | elif pic.mode == 'I;16':
69 | nchannel = 1
70 | else:
71 | nchannel = len(pic.mode)
72 | img = img.view(pic.size[1], pic.size[0], nchannel)
73 |
74 | img = img.transpose(0, 1).transpose(0, 2).contiguous()
75 |
76 | if isinstance(img, torch.ByteTensor):
77 | return img.float()
78 | else:
79 | return img
80 |
81 |
82 | class DDAD(Dataset):
83 | def __init__(self, data_dir_root, resize_shape):
84 | import glob
85 |
86 | # image paths are of the form /{outleft, depthmap}/*.png
87 | self.image_files = glob.glob(os.path.join(data_dir_root, '*.png'))
88 | self.depth_files = [r.replace("_rgb.png", "_depth.npy")
89 | for r in self.image_files]
90 | self.transform = ToTensor(resize_shape)
91 |
92 | def __getitem__(self, idx):
93 |
94 | image_path = self.image_files[idx]
95 | depth_path = self.depth_files[idx]
96 |
97 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
98 | depth = np.load(depth_path) # meters
99 |
100 | # depth[depth > 8] = -1
101 | depth = depth[..., None]
102 |
103 | sample = dict(image=image, depth=depth)
104 | sample = self.transform(sample)
105 |
106 | if idx == 0:
107 | print(sample["image"].shape)
108 |
109 | return sample
110 |
111 | def __len__(self):
112 | return len(self.image_files)
113 |
114 |
115 | def get_ddad_loader(data_dir_root, resize_shape, batch_size=1, **kwargs):
116 | dataset = DDAD(data_dir_root, resize_shape)
117 | return DataLoader(dataset, batch_size, **kwargs)
118 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/data/diml_outdoor_test.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | import os
26 |
27 | import numpy as np
28 | import torch
29 | from PIL import Image
30 | from torch.utils.data import DataLoader, Dataset
31 | from torchvision import transforms
32 |
33 |
34 | class ToTensor(object):
35 | def __init__(self):
36 | # self.normalize = transforms.Normalize(
37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38 | self.normalize = lambda x : x
39 |
40 | def __call__(self, sample):
41 | image, depth = sample['image'], sample['depth']
42 | image = self.to_tensor(image)
43 | image = self.normalize(image)
44 | depth = self.to_tensor(depth)
45 |
46 | return {'image': image, 'depth': depth, 'dataset': "diml_outdoor"}
47 |
48 | def to_tensor(self, pic):
49 |
50 | if isinstance(pic, np.ndarray):
51 | img = torch.from_numpy(pic.transpose((2, 0, 1)))
52 | return img
53 |
54 | # # handle PIL Image
55 | if pic.mode == 'I':
56 | img = torch.from_numpy(np.array(pic, np.int32, copy=False))
57 | elif pic.mode == 'I;16':
58 | img = torch.from_numpy(np.array(pic, np.int16, copy=False))
59 | else:
60 | img = torch.ByteTensor(
61 | torch.ByteStorage.from_buffer(pic.tobytes()))
62 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
63 | if pic.mode == 'YCbCr':
64 | nchannel = 3
65 | elif pic.mode == 'I;16':
66 | nchannel = 1
67 | else:
68 | nchannel = len(pic.mode)
69 | img = img.view(pic.size[1], pic.size[0], nchannel)
70 |
71 | img = img.transpose(0, 1).transpose(0, 2).contiguous()
72 | if isinstance(img, torch.ByteTensor):
73 | return img.float()
74 | else:
75 | return img
76 |
77 |
78 | class DIML_Outdoor(Dataset):
79 | def __init__(self, data_dir_root):
80 | import glob
81 |
82 | # image paths are of the form /{outleft, depthmap}/*.png
83 | self.image_files = glob.glob(os.path.join(
84 | data_dir_root, "*", 'outleft', '*.png'))
85 | self.depth_files = [r.replace("outleft", "depthmap")
86 | for r in self.image_files]
87 | self.transform = ToTensor()
88 |
89 | def __getitem__(self, idx):
90 | image_path = self.image_files[idx]
91 | depth_path = self.depth_files[idx]
92 |
93 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
94 | depth = np.asarray(Image.open(depth_path),
95 | dtype='uint16') / 1000.0 # mm to meters
96 |
97 | # depth[depth > 8] = -1
98 | depth = depth[..., None]
99 |
100 | sample = dict(image=image, depth=depth, dataset="diml_outdoor")
101 |
102 | # return sample
103 | return self.transform(sample)
104 |
105 | def __len__(self):
106 | return len(self.image_files)
107 |
108 |
109 | def get_diml_outdoor_loader(data_dir_root, batch_size=1, **kwargs):
110 | dataset = DIML_Outdoor(data_dir_root)
111 | return DataLoader(dataset, batch_size, **kwargs)
112 |
113 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/HR")
114 | # get_diml_outdoor_loader(data_dir_root="datasets/diml/outdoor/test/LR")
115 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/data/diode.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | import os
26 |
27 | import numpy as np
28 | import torch
29 | from PIL import Image
30 | from torch.utils.data import DataLoader, Dataset
31 | from torchvision import transforms
32 |
33 |
34 | class ToTensor(object):
35 | def __init__(self):
36 | # self.normalize = transforms.Normalize(
37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38 | self.normalize = lambda x : x
39 | self.resize = transforms.Resize(480)
40 |
41 | def __call__(self, sample):
42 | image, depth = sample['image'], sample['depth']
43 | image = self.to_tensor(image)
44 | image = self.normalize(image)
45 | depth = self.to_tensor(depth)
46 |
47 | image = self.resize(image)
48 |
49 | return {'image': image, 'depth': depth, 'dataset': "diode"}
50 |
51 | def to_tensor(self, pic):
52 |
53 | if isinstance(pic, np.ndarray):
54 | img = torch.from_numpy(pic.transpose((2, 0, 1)))
55 | return img
56 |
57 | # # handle PIL Image
58 | if pic.mode == 'I':
59 | img = torch.from_numpy(np.array(pic, np.int32, copy=False))
60 | elif pic.mode == 'I;16':
61 | img = torch.from_numpy(np.array(pic, np.int16, copy=False))
62 | else:
63 | img = torch.ByteTensor(
64 | torch.ByteStorage.from_buffer(pic.tobytes()))
65 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
66 | if pic.mode == 'YCbCr':
67 | nchannel = 3
68 | elif pic.mode == 'I;16':
69 | nchannel = 1
70 | else:
71 | nchannel = len(pic.mode)
72 | img = img.view(pic.size[1], pic.size[0], nchannel)
73 |
74 | img = img.transpose(0, 1).transpose(0, 2).contiguous()
75 |
76 | if isinstance(img, torch.ByteTensor):
77 | return img.float()
78 | else:
79 | return img
80 |
81 |
82 | class DIODE(Dataset):
83 | def __init__(self, data_dir_root):
84 | import glob
85 |
86 | # image paths are of the form /scene_#/scan_#/*.png
87 | self.image_files = glob.glob(
88 | os.path.join(data_dir_root, '*', '*', '*.png'))
89 | self.depth_files = [r.replace(".png", "_depth.npy")
90 | for r in self.image_files]
91 | self.depth_mask_files = [
92 | r.replace(".png", "_depth_mask.npy") for r in self.image_files]
93 | self.transform = ToTensor()
94 |
95 | def __getitem__(self, idx):
96 | image_path = self.image_files[idx]
97 | depth_path = self.depth_files[idx]
98 | depth_mask_path = self.depth_mask_files[idx]
99 |
100 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
101 | depth = np.load(depth_path) # in meters
102 | valid = np.load(depth_mask_path) # binary
103 |
104 | # depth[depth > 8] = -1
105 | # depth = depth[..., None]
106 |
107 | sample = dict(image=image, depth=depth, valid=valid)
108 |
109 | # return sample
110 | sample = self.transform(sample)
111 |
112 | if idx == 0:
113 | print(sample["image"].shape)
114 |
115 | return sample
116 |
117 | def __len__(self):
118 | return len(self.image_files)
119 |
120 |
121 | def get_diode_loader(data_dir_root, batch_size=1, **kwargs):
122 | dataset = DIODE(data_dir_root)
123 | return DataLoader(dataset, batch_size, **kwargs)
124 |
125 | # get_diode_loader(data_dir_root="datasets/diode/val/outdoor")
126 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/data/ibims.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | import os
26 |
27 | import numpy as np
28 | import torch
29 | from PIL import Image
30 | from torch.utils.data import DataLoader, Dataset
31 | from torchvision import transforms as T
32 |
33 |
34 | class iBims(Dataset):
35 | def __init__(self, config):
36 | root_folder = config.ibims_root
37 | with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f:
38 | imglist = f.read().split()
39 |
40 | samples = []
41 | for basename in imglist:
42 | img_path = os.path.join(root_folder, 'rgb', basename + ".png")
43 | depth_path = os.path.join(root_folder, 'depth', basename + ".png")
44 | valid_mask_path = os.path.join(
45 | root_folder, 'mask_invalid', basename+".png")
46 | transp_mask_path = os.path.join(
47 | root_folder, 'mask_transp', basename+".png")
48 |
49 | samples.append(
50 | (img_path, depth_path, valid_mask_path, transp_mask_path))
51 |
52 | self.samples = samples
53 | # self.normalize = T.Normalize(
54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
55 | self.normalize = lambda x : x
56 |
57 | def __getitem__(self, idx):
58 | img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx]
59 |
60 | img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0
61 | depth = np.asarray(Image.open(depth_path),
62 | dtype=np.uint16).astype('float')*50.0/65535
63 |
64 | mask_valid = np.asarray(Image.open(valid_mask_path))
65 | mask_transp = np.asarray(Image.open(transp_mask_path))
66 |
67 | # depth = depth * mask_valid * mask_transp
68 | depth = np.where(mask_valid * mask_transp, depth, -1)
69 |
70 | img = torch.from_numpy(img).permute(2, 0, 1)
71 | img = self.normalize(img)
72 | depth = torch.from_numpy(depth).unsqueeze(0)
73 | return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims')
74 |
75 | def __len__(self):
76 | return len(self.samples)
77 |
78 |
79 | def get_ibims_loader(config, batch_size=1, **kwargs):
80 | dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs)
81 | return dataloader
82 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/data/sun_rgbd_loader.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | import os
26 |
27 | import numpy as np
28 | import torch
29 | from PIL import Image
30 | from torch.utils.data import DataLoader, Dataset
31 | from torchvision import transforms
32 |
33 |
34 | class ToTensor(object):
35 | def __init__(self):
36 | # self.normalize = transforms.Normalize(
37 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
38 | self.normalize = lambda x : x
39 |
40 | def __call__(self, sample):
41 | image, depth = sample['image'], sample['depth']
42 | image = self.to_tensor(image)
43 | image = self.normalize(image)
44 | depth = self.to_tensor(depth)
45 |
46 | return {'image': image, 'depth': depth, 'dataset': "sunrgbd"}
47 |
48 | def to_tensor(self, pic):
49 |
50 | if isinstance(pic, np.ndarray):
51 | img = torch.from_numpy(pic.transpose((2, 0, 1)))
52 | return img
53 |
54 | # # handle PIL Image
55 | if pic.mode == 'I':
56 | img = torch.from_numpy(np.array(pic, np.int32, copy=False))
57 | elif pic.mode == 'I;16':
58 | img = torch.from_numpy(np.array(pic, np.int16, copy=False))
59 | else:
60 | img = torch.ByteTensor(
61 | torch.ByteStorage.from_buffer(pic.tobytes()))
62 | # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
63 | if pic.mode == 'YCbCr':
64 | nchannel = 3
65 | elif pic.mode == 'I;16':
66 | nchannel = 1
67 | else:
68 | nchannel = len(pic.mode)
69 | img = img.view(pic.size[1], pic.size[0], nchannel)
70 |
71 | img = img.transpose(0, 1).transpose(0, 2).contiguous()
72 | if isinstance(img, torch.ByteTensor):
73 | return img.float()
74 | else:
75 | return img
76 |
77 |
78 | class SunRGBD(Dataset):
79 | def __init__(self, data_dir_root):
80 | # test_file_dirs = loadmat(train_test_file)['alltest'].squeeze()
81 | # all_test = [t[0].replace("/n/fs/sun3d/data/", "") for t in test_file_dirs]
82 | # self.all_test = [os.path.join(data_dir_root, t) for t in all_test]
83 | import glob
84 | self.image_files = glob.glob(
85 | os.path.join(data_dir_root, 'rgb', 'rgb', '*'))
86 | self.depth_files = [
87 | r.replace("rgb/rgb", "gt/gt").replace("jpg", "png") for r in self.image_files]
88 | self.transform = ToTensor()
89 |
90 | def __getitem__(self, idx):
91 | image_path = self.image_files[idx]
92 | depth_path = self.depth_files[idx]
93 |
94 | image = np.asarray(Image.open(image_path), dtype=np.float32) / 255.0
95 | depth = np.asarray(Image.open(depth_path), dtype='uint16') / 1000.0
96 | depth[depth > 8] = -1
97 | depth = depth[..., None]
98 | return self.transform(dict(image=image, depth=depth))
99 |
100 | def __len__(self):
101 | return len(self.image_files)
102 |
103 |
104 | def get_sunrgbd_loader(data_dir_root, batch_size=1, **kwargs):
105 | dataset = SunRGBD(data_dir_root)
106 | return DataLoader(dataset, batch_size, **kwargs)
107 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/models/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/models/base_models/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/models/builder.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | from importlib import import_module
26 | from zoedepth.models.depth_model import DepthModel
27 |
28 | def build_model(config) -> DepthModel:
29 | """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface.
30 | This function should be used to construct models for training and evaluation.
31 |
32 | Args:
33 | config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder.
34 |
35 | Returns:
36 | torch.nn.Module: Model corresponding to name and version as specified in config
37 | """
38 | module_name = f"zoedepth.models.{config.model}"
39 | try:
40 | module = import_module(module_name)
41 | except ModuleNotFoundError as e:
42 | # print the original error message
43 | print(e)
44 | raise ValueError(
45 | f"Model {config.model} not found. Refer above error for details.") from e
46 | try:
47 | get_version = getattr(module, "get_version")
48 | except AttributeError as e:
49 | raise ValueError(
50 | f"Model {config.model} has no get_version function.") from e
51 | return get_version(config.version_name).build_from_config(config)
52 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/models/layers/patch_transformer.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | import torch
26 | import torch.nn as nn
27 |
28 |
29 | class PatchTransformerEncoder(nn.Module):
30 | def __init__(self, in_channels, patch_size=10, embedding_dim=128, num_heads=4, use_class_token=False):
31 | """ViT-like transformer block
32 |
33 | Args:
34 | in_channels (int): Input channels
35 | patch_size (int, optional): patch size. Defaults to 10.
36 | embedding_dim (int, optional): Embedding dimension in transformer model. Defaults to 128.
37 | num_heads (int, optional): number of attention heads. Defaults to 4.
38 | use_class_token (bool, optional): Whether to use extra token at the start for global accumulation (called as "class token"). Defaults to False.
39 | """
40 | super(PatchTransformerEncoder, self).__init__()
41 | self.use_class_token = use_class_token
42 | encoder_layers = nn.TransformerEncoderLayer(
43 | embedding_dim, num_heads, dim_feedforward=1024)
44 | self.transformer_encoder = nn.TransformerEncoder(
45 | encoder_layers, num_layers=4) # takes shape S,N,E
46 |
47 | self.embedding_convPxP = nn.Conv2d(in_channels, embedding_dim,
48 | kernel_size=patch_size, stride=patch_size, padding=0)
49 |
50 | def positional_encoding_1d(self, sequence_length, batch_size, embedding_dim, device='cpu'):
51 | """Generate positional encodings
52 |
53 | Args:
54 | sequence_length (int): Sequence length
55 | embedding_dim (int): Embedding dimension
56 |
57 | Returns:
58 | torch.Tensor SBE: Positional encodings
59 | """
60 | position = torch.arange(
61 | 0, sequence_length, dtype=torch.float32, device=device).unsqueeze(1)
62 | index = torch.arange(
63 | 0, embedding_dim, 2, dtype=torch.float32, device=device).unsqueeze(0)
64 | div_term = torch.exp(index * (-torch.log(torch.tensor(10000.0, device=device)) / embedding_dim))
65 | pos_encoding = position * div_term
66 | pos_encoding = torch.cat([torch.sin(pos_encoding), torch.cos(pos_encoding)], dim=1)
67 | pos_encoding = pos_encoding.unsqueeze(1).repeat(1, batch_size, 1)
68 | return pos_encoding
69 |
70 |
71 | def forward(self, x):
72 | """Forward pass
73 |
74 | Args:
75 | x (torch.Tensor - NCHW): Input feature tensor
76 |
77 | Returns:
78 | torch.Tensor - SNE: Transformer output embeddings. S - sequence length (=HW/patch_size^2), N - batch size, E - embedding dim
79 | """
80 | embeddings = self.embedding_convPxP(x).flatten(
81 | 2) # .shape = n,c,s = n, embedding_dim, s
82 | if self.use_class_token:
83 | # extra special token at start ?
84 | embeddings = nn.functional.pad(embeddings, (1, 0))
85 |
86 | # change to S,N,E format required by transformer
87 | embeddings = embeddings.permute(2, 0, 1)
88 | S, N, E = embeddings.shape
89 | embeddings = embeddings + self.positional_encoding_1d(S, N, E, device=embeddings.device)
90 | x = self.transformer_encoder(embeddings) # .shape = S, N, E
91 | return x
92 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/models/model_io.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | import torch
26 |
27 | def load_state_dict(model, state_dict):
28 | """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict.
29 |
30 | DataParallel prefixes state_dict keys with 'module.' when saving.
31 | If the model is not a DataParallel model but the state_dict is, then prefixes are removed.
32 | If the model is a DataParallel model but the state_dict is not, then prefixes are added.
33 | """
34 | state_dict = state_dict.get('model', state_dict)
35 | # if model is a DataParallel model, then state_dict keys are prefixed with 'module.'
36 |
37 | do_prefix = isinstance(
38 | model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel))
39 | state = {}
40 | for k, v in state_dict.items():
41 | if k.startswith('module.') and not do_prefix:
42 | k = k[7:]
43 |
44 | if not k.startswith('module.') and do_prefix:
45 | k = 'module.' + k
46 |
47 | state[k] = v
48 |
49 | model.load_state_dict(state)
50 | print("Loaded successfully")
51 | return model
52 |
53 |
54 | def load_wts(model, checkpoint_path):
55 | ckpt = torch.load(checkpoint_path, map_location='cpu')
56 | return load_state_dict(model, ckpt)
57 |
58 |
59 | def load_state_dict_from_url(model, url, **kwargs):
60 | state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs)
61 | return load_state_dict(model, state_dict)
62 |
63 |
64 | def load_state_from_resource(model, resource: str):
65 | """Loads weights to the model from a given resource. A resource can be of following types:
66 | 1. URL. Prefixed with "url::"
67 | e.g. url::http(s)://url.resource.com/ckpt.pt
68 |
69 | 2. Local path. Prefixed with "local::"
70 | e.g. local::/path/to/ckpt.pt
71 |
72 |
73 | Args:
74 | model (torch.nn.Module): Model
75 | resource (str): resource string
76 |
77 | Returns:
78 | torch.nn.Module: Model with loaded weights
79 | """
80 | print(f"Using pretrained resource {resource}")
81 |
82 | if resource.startswith('url::'):
83 | url = resource.split('url::')[1]
84 | return load_state_dict_from_url(model, url, progress=True)
85 |
86 | elif resource.startswith('local::'):
87 | path = resource.split('local::')[1]
88 | return load_wts(model, path)
89 |
90 | else:
91 | raise ValueError("Invalid resource type, only url:: and local:: are supported")
92 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/models/zoedepth/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | from .zoedepth_v1 import ZoeDepth
26 |
27 | all_versions = {
28 | "v1": ZoeDepth,
29 | }
30 |
31 | get_version = lambda v : all_versions[v]
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/models/zoedepth/config_zoedepth.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": {
3 | "name": "ZoeDepth",
4 | "version_name": "v1",
5 | "n_bins": 64,
6 | "bin_embedding_dim": 128,
7 | "bin_centers_type": "softplus",
8 | "n_attractors":[16, 8, 4, 1],
9 | "attractor_alpha": 1000,
10 | "attractor_gamma": 2,
11 | "attractor_kind" : "mean",
12 | "attractor_type" : "inv",
13 | "midas_model_type" : "DPT_BEiT_L_384",
14 | "min_temp": 0.0212,
15 | "max_temp": 50.0,
16 | "output_distribution": "logbinomial",
17 | "memory_efficient": true,
18 | "inverse_midas": false,
19 | "img_size": [384, 512]
20 | },
21 |
22 | "train": {
23 | "train_midas": true,
24 | "use_pretrained_midas": true,
25 | "trainer": "zoedepth",
26 | "epochs": 5,
27 | "bs": 16,
28 | "optim_kwargs": {"lr": 0.000161, "wd": 0.01},
29 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
30 | "same_lr": false,
31 | "w_si": 1,
32 | "w_domain": 0.2,
33 | "w_reg": 0,
34 | "w_grad": 0,
35 | "avoid_boundary": false,
36 | "random_crop": false,
37 | "input_width": 640,
38 | "input_height": 480,
39 | "midas_lr_factor": 1,
40 | "encoder_lr_factor":10,
41 | "pos_enc_lr_factor":10,
42 | "freeze_midas_bn": true
43 |
44 | },
45 |
46 | "infer":{
47 | "train_midas": false,
48 | "use_pretrained_midas": false,
49 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt",
50 | "force_keep_ar": true
51 | },
52 |
53 | "eval":{
54 | "train_midas": false,
55 | "use_pretrained_midas": false,
56 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_N.pt"
57 | }
58 | }
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/models/zoedepth/config_zoedepth_kitti.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": {
3 | "bin_centers_type": "normed",
4 | "img_size": [384, 768]
5 | },
6 |
7 | "train": {
8 | },
9 |
10 | "infer":{
11 | "train_midas": false,
12 | "use_pretrained_midas": false,
13 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt",
14 | "force_keep_ar": true
15 | },
16 |
17 | "eval":{
18 | "train_midas": false,
19 | "use_pretrained_midas": false,
20 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
21 | }
22 | }
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/models/zoedepth_nk/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | from .zoedepth_nk_v1 import ZoeDepthNK
26 |
27 | all_versions = {
28 | "v1": ZoeDepthNK,
29 | }
30 |
31 | get_version = lambda v : all_versions[v]
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": {
3 | "name": "ZoeDepthNK",
4 | "version_name": "v1",
5 | "bin_conf" : [
6 | {
7 | "name": "nyu",
8 | "n_bins": 64,
9 | "min_depth": 1e-3,
10 | "max_depth": 10.0
11 | },
12 | {
13 | "name": "kitti",
14 | "n_bins": 64,
15 | "min_depth": 1e-3,
16 | "max_depth": 80.0
17 | }
18 | ],
19 | "bin_embedding_dim": 128,
20 | "bin_centers_type": "softplus",
21 | "n_attractors":[16, 8, 4, 1],
22 | "attractor_alpha": 1000,
23 | "attractor_gamma": 2,
24 | "attractor_kind" : "mean",
25 | "attractor_type" : "inv",
26 | "min_temp": 0.0212,
27 | "max_temp": 50.0,
28 | "memory_efficient": true,
29 | "midas_model_type" : "DPT_BEiT_L_384",
30 | "img_size": [384, 512]
31 | },
32 |
33 | "train": {
34 | "train_midas": true,
35 | "use_pretrained_midas": true,
36 | "trainer": "zoedepth_nk",
37 | "epochs": 5,
38 | "bs": 16,
39 | "optim_kwargs": {"lr": 0.0002512, "wd": 0.01},
40 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
41 | "same_lr": false,
42 | "w_si": 1,
43 | "w_domain": 100,
44 | "avoid_boundary": false,
45 | "random_crop": false,
46 | "input_width": 640,
47 | "input_height": 480,
48 | "w_grad": 0,
49 | "w_reg": 0,
50 | "midas_lr_factor": 10,
51 | "encoder_lr_factor":10,
52 | "pos_enc_lr_factor":10
53 | },
54 |
55 | "infer": {
56 | "train_midas": false,
57 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
58 | "use_pretrained_midas": false,
59 | "force_keep_ar": true
60 | },
61 |
62 | "eval": {
63 | "train_midas": false,
64 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
65 | "use_pretrained_midas": false
66 | }
67 | }
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/utils/arg_utils.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | def infer_type(x): # hacky way to infer type from string args
4 | if not isinstance(x, str):
5 | return x
6 |
7 | try:
8 | x = int(x)
9 | return x
10 | except ValueError:
11 | pass
12 |
13 | try:
14 | x = float(x)
15 | return x
16 | except ValueError:
17 | pass
18 |
19 | return x
20 |
21 |
22 | def parse_unknown(unknown_args):
23 | clean = []
24 | for a in unknown_args:
25 | if "=" in a:
26 | k, v = a.split("=")
27 | clean.extend([k, v])
28 | else:
29 | clean.append(a)
30 |
31 | keys = clean[::2]
32 | values = clean[1::2]
33 | return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)}
34 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/utils/easydict/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | EasyDict
3 | Copy/pasted from https://github.com/makinacorpus/easydict
4 | Original author: Mathieu Leplatre
5 | """
6 |
7 | class EasyDict(dict):
8 | """
9 | Get attributes
10 |
11 | >>> d = EasyDict({'foo':3})
12 | >>> d['foo']
13 | 3
14 | >>> d.foo
15 | 3
16 | >>> d.bar
17 | Traceback (most recent call last):
18 | ...
19 | AttributeError: 'EasyDict' object has no attribute 'bar'
20 |
21 | Works recursively
22 |
23 | >>> d = EasyDict({'foo':3, 'bar':{'x':1, 'y':2}})
24 | >>> isinstance(d.bar, dict)
25 | True
26 | >>> d.bar.x
27 | 1
28 |
29 | Bullet-proof
30 |
31 | >>> EasyDict({})
32 | {}
33 | >>> EasyDict(d={})
34 | {}
35 | >>> EasyDict(None)
36 | {}
37 | >>> d = {'a': 1}
38 | >>> EasyDict(**d)
39 | {'a': 1}
40 | >>> EasyDict((('a', 1), ('b', 2)))
41 | {'a': 1, 'b': 2}
42 |
43 | Set attributes
44 |
45 | >>> d = EasyDict()
46 | >>> d.foo = 3
47 | >>> d.foo
48 | 3
49 | >>> d.bar = {'prop': 'value'}
50 | >>> d.bar.prop
51 | 'value'
52 | >>> d
53 | {'foo': 3, 'bar': {'prop': 'value'}}
54 | >>> d.bar.prop = 'newer'
55 | >>> d.bar.prop
56 | 'newer'
57 |
58 |
59 | Values extraction
60 |
61 | >>> d = EasyDict({'foo':0, 'bar':[{'x':1, 'y':2}, {'x':3, 'y':4}]})
62 | >>> isinstance(d.bar, list)
63 | True
64 | >>> from operator import attrgetter
65 | >>> list(map(attrgetter('x'), d.bar))
66 | [1, 3]
67 | >>> list(map(attrgetter('y'), d.bar))
68 | [2, 4]
69 | >>> d = EasyDict()
70 | >>> list(d.keys())
71 | []
72 | >>> d = EasyDict(foo=3, bar=dict(x=1, y=2))
73 | >>> d.foo
74 | 3
75 | >>> d.bar.x
76 | 1
77 |
78 | Still like a dict though
79 |
80 | >>> o = EasyDict({'clean':True})
81 | >>> list(o.items())
82 | [('clean', True)]
83 |
84 | And like a class
85 |
86 | >>> class Flower(EasyDict):
87 | ... power = 1
88 | ...
89 | >>> f = Flower()
90 | >>> f.power
91 | 1
92 | >>> f = Flower({'height': 12})
93 | >>> f.height
94 | 12
95 | >>> f['power']
96 | 1
97 | >>> sorted(f.keys())
98 | ['height', 'power']
99 |
100 | update and pop items
101 | >>> d = EasyDict(a=1, b='2')
102 | >>> e = EasyDict(c=3.0, a=9.0)
103 | >>> d.update(e)
104 | >>> d.c
105 | 3.0
106 | >>> d['c']
107 | 3.0
108 | >>> d.get('c')
109 | 3.0
110 | >>> d.update(a=4, b=4)
111 | >>> d.b
112 | 4
113 | >>> d.pop('a')
114 | 4
115 | >>> d.a
116 | Traceback (most recent call last):
117 | ...
118 | AttributeError: 'EasyDict' object has no attribute 'a'
119 | """
120 | def __init__(self, d=None, **kwargs):
121 | if d is None:
122 | d = {}
123 | else:
124 | d = dict(d)
125 | if kwargs:
126 | d.update(**kwargs)
127 | for k, v in d.items():
128 | setattr(self, k, v)
129 | # Class attributes
130 | for k in self.__class__.__dict__.keys():
131 | if not (k.startswith('__') and k.endswith('__')) and not k in ('update', 'pop'):
132 | setattr(self, k, getattr(self, k))
133 |
134 | def __setattr__(self, name, value):
135 | if isinstance(value, (list, tuple)):
136 | value = [self.__class__(x)
137 | if isinstance(x, dict) else x for x in value]
138 | elif isinstance(value, dict) and not isinstance(value, self.__class__):
139 | value = self.__class__(value)
140 | super(EasyDict, self).__setattr__(name, value)
141 | super(EasyDict, self).__setitem__(name, value)
142 |
143 | __setitem__ = __setattr__
144 |
145 | def update(self, e=None, **f):
146 | d = e or dict()
147 | d.update(f)
148 | for k in d:
149 | setattr(self, k, d[k])
150 |
151 | def pop(self, k, d=None):
152 | delattr(self, k)
153 | return super(EasyDict, self).pop(k, d)
154 |
155 |
156 | if __name__ == "__main__":
157 | import doctest
158 | doctest.testmod()
--------------------------------------------------------------------------------
/scripts/deforum_helpers/src/zoedepth/utils/geometry.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | import numpy as np
26 |
27 | def get_intrinsics(H,W):
28 | """
29 | Intrinsics for a pinhole camera model.
30 | Assume fov of 55 degrees and central principal point.
31 | """
32 | f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0)
33 | cx = 0.5 * W
34 | cy = 0.5 * H
35 | return np.array([[f, 0, cx],
36 | [0, f, cy],
37 | [0, 0, 1]])
38 |
39 | def depth_to_points(depth, R=None, t=None):
40 |
41 | K = get_intrinsics(depth.shape[1], depth.shape[2])
42 | Kinv = np.linalg.inv(K)
43 | if R is None:
44 | R = np.eye(3)
45 | if t is None:
46 | t = np.zeros(3)
47 |
48 | # M converts from your coordinate to PyTorch3D's coordinate system
49 | M = np.eye(3)
50 | M[0, 0] = -1.0
51 | M[1, 1] = -1.0
52 |
53 | height, width = depth.shape[1:3]
54 |
55 | x = np.arange(width)
56 | y = np.arange(height)
57 | coord = np.stack(np.meshgrid(x, y), -1)
58 | coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1) # z=1
59 | coord = coord.astype(np.float32)
60 | # coord = torch.as_tensor(coord, dtype=torch.float32, device=device)
61 | coord = coord[None] # bs, h, w, 3
62 |
63 | D = depth[:, :, :, None, None]
64 | # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape )
65 | pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None]
66 | # pts3D_1 live in your coordinate system. Convert them to Py3D's
67 | pts3D_1 = M[None, None, None, ...] @ pts3D_1
68 | # from reference to targe tviewpoint
69 | pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None]
70 | # pts3D_2 = pts3D_1
71 | # depth_2 = pts3D_2[:, :, :, 2, :] # b,1,h,w
72 | return pts3D_2[:, :, :, :3, 0][0]
73 |
74 |
75 | def create_triangles(h, w, mask=None):
76 | """
77 | Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68
78 | Creates mesh triangle indices from a given pixel grid size.
79 | This function is not and need not be differentiable as triangle indices are
80 | fixed.
81 | Args:
82 | h: (int) denoting the height of the image.
83 | w: (int) denoting the width of the image.
84 | Returns:
85 | triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3)
86 | """
87 | x, y = np.meshgrid(range(w - 1), range(h - 1))
88 | tl = y * w + x
89 | tr = y * w + x + 1
90 | bl = (y + 1) * w + x
91 | br = (y + 1) * w + x + 1
92 | triangles = np.array([tl, bl, tr, br, tr, bl])
93 | triangles = np.transpose(triangles, (1, 2, 0)).reshape(
94 | ((w - 1) * (h - 1) * 2, 3))
95 | if mask is not None:
96 | mask = mask.reshape(-1)
97 | triangles = triangles[mask[triangles].all(1)]
98 | return triangles
99 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/ui_left.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | from types import SimpleNamespace
18 | import gradio as gr
19 | from .defaults import get_gradio_html
20 | from .gradio_funcs import change_css, handle_change_functions
21 | from .args import DeforumArgs, DeforumAnimArgs, ParseqArgs, DeforumOutputArgs, RootArgs, LoopArgs
22 | from .deforum_controlnet import setup_controlnet_ui
23 | from .ui_elements import get_tab_run, get_tab_keyframes, get_tab_prompts, get_tab_init, get_tab_hybrid, get_tab_output
24 |
25 | def set_arg_lists():
26 | # convert dicts to NameSpaces for easy working (args.param instead of args['param']
27 | d = SimpleNamespace(**DeforumArgs()) # default args
28 | da = SimpleNamespace(**DeforumAnimArgs()) # default anim args
29 | dp = SimpleNamespace(**ParseqArgs()) # default parseq ars
30 | dv = SimpleNamespace(**DeforumOutputArgs()) # default video args
31 | dr = SimpleNamespace(**RootArgs()) # ROOT args
32 | dloopArgs = SimpleNamespace(**LoopArgs()) # Guided imgs args
33 | return d, da, dp, dv, dr, dloopArgs
34 |
35 | def setup_deforum_left_side_ui():
36 | d, da, dp, dv, dr, dloopArgs = set_arg_lists()
37 | # set up main info accordion on top of the UI
38 | with gr.Accordion("Info, Links and Help", open=False, elem_id='main_top_info_accord'):
39 | gr.HTML(value=get_gradio_html('main'))
40 | # show button to hide/ show gradio's info texts for each element in the UI
41 | with gr.Row(variant='compact'):
42 | show_info_on_ui = gr.Checkbox(label="Show more info", value=d.show_info_on_ui, interactive=True)
43 | with gr.Blocks():
44 | with gr.Tabs():
45 | # Get main tab contents:
46 | tab_run_params = get_tab_run(d, da) # Run tab
47 | tab_keyframes_params = get_tab_keyframes(d, da, dloopArgs) # Keyframes tab
48 | tab_prompts_params = get_tab_prompts(da) # Prompts tab
49 | tab_init_params = get_tab_init(d, da, dp) # Init tab
50 | controlnet_dict = setup_controlnet_ui() # ControlNet tab
51 | tab_hybrid_params = get_tab_hybrid(da) # Hybrid tab
52 | tab_output_params = get_tab_output(da, dv) # Output tab
53 | # add returned gradio elements from main tabs to locals()
54 | for key, value in {**tab_run_params, **tab_keyframes_params, **tab_prompts_params, **tab_init_params, **controlnet_dict, **tab_hybrid_params, **tab_output_params}.items():
55 | locals()[key] = value
56 |
57 | # Gradio's Change functions - hiding and renaming elements based on other elements
58 | show_info_on_ui.change(fn=change_css, inputs=show_info_on_ui, outputs=gr.outputs.HTML())
59 | handle_change_functions(locals())
60 |
61 | return locals()
62 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/ui_settings.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import gradio as gr
18 | from modules import ui_components
19 | from modules.shared import opts, cmd_opts, OptionInfo
20 | from .video_audio_utilities import find_ffmpeg_binary
21 | from .subtitle_handler import get_user_values
22 |
23 | def on_ui_settings():
24 | srt_ui_params = get_user_values()
25 | section = ('deforum', "Deforum")
26 | opts.add_option("deforum_keep_3d_models_in_vram", OptionInfo(False, "Keep 3D models in VRAM between runs", gr.Checkbox, {"interactive": True, "visible": True if not (cmd_opts.lowvram or cmd_opts.medvram) else False}, section=section))
27 | opts.add_option("deforum_enable_persistent_settings", OptionInfo(False, "Keep settings persistent upon relaunch of webui", gr.Checkbox, {"interactive": True}, section=section))
28 | opts.add_option("deforum_persistent_settings_path", OptionInfo("models/Deforum/deforum_persistent_settings.txt", "Path for saving your persistent settings file:", section=section))
29 | opts.add_option("deforum_ffmpeg_location", OptionInfo(find_ffmpeg_binary(), "FFmpeg path/ location", section=section))
30 | opts.add_option("deforum_ffmpeg_crf", OptionInfo(17, "FFmpeg CRF value", gr.Slider, {"interactive": True, "minimum": 0, "maximum": 51}, section=section))
31 | opts.add_option("deforum_ffmpeg_preset", OptionInfo('slow', "FFmpeg Preset", gr.Dropdown, {"interactive": True, "choices": ['veryslow', 'slower', 'slow', 'medium', 'fast', 'faster', 'veryfast', 'superfast', 'ultrafast']}, section=section))
32 | opts.add_option("deforum_debug_mode_enabled", OptionInfo(False, "Enable Dev mode - adds extra reporting in console", gr.Checkbox, {"interactive": True}, section=section))
33 | opts.add_option("deforum_save_gen_info_as_srt", OptionInfo(False, "Save an .srt (subtitles) file with the generation info along with each animation", gr.Checkbox, {"interactive": True}, section=section))
34 | opts.add_option("deforum_embed_srt", OptionInfo(False, "If .srt file is saved, soft-embed the subtitles into the rendered video file", gr.Checkbox, {"interactive": True}, section=section))
35 | opts.add_option("deforum_save_gen_info_as_srt_params", OptionInfo(['Noise Schedule'], "Choose which animation params are to be saved to the .srt file (Frame # and Seed will always be saved):", ui_components.DropdownMulti, lambda: {"interactive": True, "choices": srt_ui_params}, section=section))
36 | opts.add_option("deforum_preview", OptionInfo("Off", "Generate preview video during generation? (Preview does not include frame interpolation or upscaling.)", gr.Dropdown, {"interactive": True, "choices": ['Off', 'On', 'On, concurrent (don\'t pause generation)']}, section=section))
37 | opts.add_option("deforum_preview_interval_frames", OptionInfo(100, "Generate preview every N frames", gr.Slider, {"interactive": True, "minimum": 10, "maximum": 500}, section=section))
38 |
--------------------------------------------------------------------------------
/scripts/deforum_helpers/webui_sd_pipeline.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | from modules.processing import StableDiffusionProcessingImg2Img
18 | from modules.shared import opts, sd_model
19 | import os
20 |
21 | def get_webui_sd_pipeline(args, root):
22 | # Set up the pipeline
23 | p = StableDiffusionProcessingImg2Img(
24 | sd_model=sd_model,
25 | outpath_samples = opts.outdir_samples or opts.outdir_img2img_samples,
26 | ) # we'll set up the rest later
27 |
28 | os.makedirs(args.outdir, exist_ok=True)
29 | p.width, p.height = map(lambda x: x - x % 8, (args.W, args.H))
30 | p.steps = args.steps
31 | p.seed = args.seed
32 | p.sampler_name = args.sampler
33 | p.tiling = args.tiling
34 | p.restore_faces = args.restore_faces
35 | p.subseed = root.subseed
36 | p.subseed_strength = root.subseed_strength
37 | p.seed_resize_from_w = args.seed_resize_from_w
38 | p.seed_resize_from_h = args.seed_resize_from_h
39 | p.fill = args.fill
40 | p.batch_size = 1 # b.size 1 as this is DEFORUM :)
41 | p.seed = args.seed
42 | p.do_not_save_samples = True # Setting this to False will trigger webui's saving mechanism - and we will end up with duplicated files, and another folder within our destination folder - big no no.
43 | p.scheduler = args.scheduler
44 | p.mask_blur = args.mask_overlay_blur
45 | p.extra_generation_params["Mask blur"] = args.mask_overlay_blur
46 | p.n_iter = 1
47 | p.steps = args.steps
48 | p.denoising_strength = 1 - args.strength
49 | p.cfg_scale = args.scale
50 | p.image_cfg_scale = args.pix2pix_img_cfg_scale
51 | p.outpath_samples = args.outdir
52 |
53 | return p
--------------------------------------------------------------------------------
/scripts/deforum_helpers/word_masking.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import os
18 | import torch
19 | from PIL import Image
20 | from torchvision import transforms
21 | from torch.nn.functional import interpolate
22 | import cv2
23 |
24 | preclipseg_transform = transforms.Compose([
25 | transforms.ToTensor(),
26 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
27 | transforms.Resize((512, 512)), #TODO: check if the size is hardcoded
28 | ])
29 |
30 | def find_clipseg():
31 | basedirs = [os.getcwd()]
32 | src_basedirs = []
33 | for basedir in basedirs:
34 | src_basedirs.append(os.path.join(os.path.sep.join(os.path.abspath(__file__).split(os.path.sep)[:-2]), 'deforum_helpers', 'src'))
35 |
36 | for basedir in src_basedirs:
37 | pth = os.path.join(basedir, './clipseg/weights/rd64-uni.pth')
38 | if os.path.exists(pth):
39 | return pth
40 | raise Exception('CLIPseg weights not found!')
41 |
42 | def setup_clipseg(root):
43 | from clipseg.models.clipseg import CLIPDensePredT
44 | model = CLIPDensePredT(version='ViT-B/16', reduce_dim=64)
45 | model.eval()
46 | model.load_state_dict(torch.load(find_clipseg(), map_location=root.device), strict=False)
47 |
48 | model.to(root.device)
49 | root.clipseg_model = model
50 |
51 | def get_word_mask(root, frame, word_mask):
52 | if root.clipseg_model is None:
53 | setup_clipseg(root)
54 | img = preclipseg_transform(frame).to(root.device, dtype=torch.float32)
55 | word_masks = [word_mask]
56 | with torch.no_grad():
57 | preds = root.clipseg_model(img.repeat(len(word_masks),1,1,1), word_masks)[0]
58 |
59 | mask = torch.sigmoid(preds[0][0]).unsqueeze(0).unsqueeze(0) # add batch, channels dims
60 | resized_mask = interpolate(mask, size=(frame.size[1], frame.size[0]), mode='bicubic').squeeze() # rescale mask back to the target resolution
61 | numpy_array = resized_mask.multiply(255).to(dtype=torch.uint8,device='cpu').numpy()
62 | return Image.fromarray(cv2.threshold(numpy_array, 32, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1])
63 |
--------------------------------------------------------------------------------
/style.css:
--------------------------------------------------------------------------------
1 | /*
2 | # Copyright (C) 2023 Deforum LLC
3 | #
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Affero General Public License as published by
6 | # the Free Software Foundation, version 3 of the License.
7 | #
8 | # This program is distributed in the hope that it will be useful,
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | # GNU General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU Affero General Public License
14 | # along with this program. If not, see .
15 |
16 | Contact the authors: https://deforum.github.io/
17 | */
18 |
19 | #vid_to_interpolate_chosen_file .w-full, #pics_to_interpolate_chosen_file .w-full, #vid_to_upscale_chosen_file .w-full, #controlnet_input_video_chosen_file .w-full, #controlnet_input_video_mask_chosen_file .w-full,#vid_to_depth_chosen_file .w-full {
20 | display: flex !important;
21 | align-items: flex-start !important;
22 | justify-content: center !important;
23 | }
24 |
25 | #tab_deforum_interface #hybrid_msg_html {
26 | color: Tomato !important;
27 | margin-top: 5px !important;
28 | text-align: center !important;
29 | font-size: 20px !important;
30 | font-weight: bold !important;
31 | }
32 |
33 | #tab_deforum_interface #leres_license_msg {
34 | color: GoldenRod;
35 | }
36 |
37 | #image_buttons_deforum #img2img_tab,
38 | #image_buttons_deforum #inpaint_tab,
39 | #image_buttons_deforum #extras_tab,
40 | #save_zip_deforum, #save_deforum {
41 | display: none !important;
42 | }
43 |
44 | #main_top_info_accord .label-wrap {
45 | gap:2px;
46 | padding: 0.5rem;
47 | }
48 | #tab_deforum_interface #controlnet_not_found_html_msg, #tab_deforum_interface #depth_warp_msg_html {
49 | color: Tomato;
50 | }
51 |
52 | #below_interpolate_butts_msg {
53 | text-align: center !important;
54 | }
55 |
56 | #tab_deforum_interface #settings_path_msg {
57 | margin: 0.6em;
58 | display: flex;
59 | align-items: flex-start;
60 | justify-content: center;
61 | }
62 |
63 | #tab_deforum_interface .tabs.gradio-tabs.svelte-1g805jl .svelte-vt1mxs.gap {
64 | gap:4px !important;
65 | }
66 |
67 | #tab_deforum_interface #main_top_info_accord {
68 | padding: 1px;
69 | }
70 |
71 | #add_soundtrack .svelte-1p9xokt {
72 | padding: 2.25px;
73 | }
74 |
75 | #tab_deforum_interface .wrap.svelte-xwlu1w, #custom_setting_file {
76 | height: 85px !important;
77 | min-height: 85px !important;
78 | }
79 |
80 | #tab_deforum_interface .file-preview-holder {
81 | overflow-y: auto;
82 | max-height: 60px;
83 | }
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | import pytest
18 | import subprocess
19 | import sys
20 | import os
21 | from subprocess import Popen, PIPE, STDOUT
22 | from pathlib import Path
23 | from tenacity import retry, stop_after_delay, wait_fixed
24 | import threading
25 | import requests
26 |
27 | def pytest_addoption(parser):
28 | parser.addoption("--start-server", action="store_true", help="start the server before the test run (if not specified, you must start the server manually)")
29 |
30 | @pytest.fixture
31 | def cmdopt(request):
32 | return request.config.getoption("--start-server")
33 |
34 | @retry(wait=wait_fixed(5), stop=stop_after_delay(60))
35 | def wait_for_service(url):
36 | response = requests.get(url, timeout=(5, 5))
37 | print(f"Waiting for server to respond 200 at {url} (response: {response.status_code})...")
38 | assert response.status_code == 200
39 |
40 | @pytest.fixture(scope="session", autouse=True)
41 | def start_server(request):
42 | if request.config.getoption("--start-server"):
43 |
44 | # Kick off server subprocess
45 | script_directory = os.path.dirname(__file__)
46 | a1111_directory = Path(script_directory).parent.parent.parent # sd-webui/extensions/deforum/tests/ -> sd-webui
47 | print(f"Starting server in {a1111_directory}...")
48 | proc = Popen(["python", "-m", "coverage", "run", "--data-file=.coverage.server", "launch.py",
49 | "--skip-prepare-environment", "--skip-torch-cuda-test", "--test-server", "--no-half",
50 | "--disable-opt-split-attention", "--use-cpu", "all", "--add-stop-route", "--api", "--deforum-api", "--listen"],
51 | cwd=a1111_directory,
52 | stdout=PIPE,
53 | stderr=STDOUT,
54 | universal_newlines=True)
55 |
56 | # ensure server is killed at the end of the test run
57 | request.addfinalizer(proc.kill)
58 |
59 | # Spin up separate thread to capture the server output to file and stdout
60 | def server_console_manager():
61 | with proc.stdout, open('serverlog.txt', 'ab') as logfile:
62 | for line in proc.stdout:
63 | sys.stdout.write(f"[SERVER LOG] {line}")
64 | sys.stdout.flush()
65 | logfile.write(line.encode('utf-8'))
66 | logfile.flush()
67 | proc.wait()
68 |
69 | threading.Thread(target=server_console_manager).start()
70 |
71 | # Wait for deforum API to respond
72 | wait_for_service('http://localhost:7860/deforum_api/jobs/')
73 |
74 | else:
75 | print("Checking server is already running / waiting for it to come up...")
76 | wait_for_service('http://localhost:7860/deforum_api/jobs/')
--------------------------------------------------------------------------------
/tests/testdata/example_init_vid.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deforum-art/sd-webui-deforum/5d63a339dbec8d476657a1f672a4eeb6dc79ed37/tests/testdata/example_init_vid.mp4
--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2023 Deforum LLC
2 | #
3 | # This program is free software: you can redistribute it and/or modify
4 | # it under the terms of the GNU Affero General Public License as published by
5 | # the Free Software Foundation, version 3 of the License.
6 | #
7 | # This program is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | # GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Affero General Public License
13 | # along with this program. If not, see .
14 |
15 | # Contact the authors: https://deforum.github.io/
16 |
17 | from tenacity import retry, stop_after_delay, wait_fixed
18 | from pydantic_requests import PydanticSession
19 | import requests
20 | from scripts.deforum_api_models import DeforumJobStatus, DeforumJobStatusCategory, DeforumJobPhase
21 |
22 | SERVER_BASE_URL = "http://localhost:7860"
23 | API_ROOT = "/deforum_api"
24 | API_BASE_URL = SERVER_BASE_URL + API_ROOT
25 |
26 | @retry(wait=wait_fixed(2), stop=stop_after_delay(900))
27 | def wait_for_job_to_complete(id : str):
28 | with PydanticSession(
29 | {200: DeforumJobStatus}, headers={"accept": "application/json"}
30 | ) as session:
31 | response = session.get(API_BASE_URL+"/jobs/"+id)
32 | response.raise_for_status()
33 | jobStatus : DeforumJobStatus = response.model
34 | print(f"Waiting for job {id}: status={jobStatus.status}; phase={jobStatus.phase}; execution_time:{jobStatus.execution_time}s")
35 | assert jobStatus.status != DeforumJobStatusCategory.ACCEPTED
36 | return jobStatus
37 |
38 | @retry(wait=wait_fixed(1), stop=stop_after_delay(120))
39 | def wait_for_job_to_enter_phase(id : str, phase : DeforumJobPhase):
40 | with PydanticSession(
41 | {200: DeforumJobStatus}, headers={"accept": "application/json"}
42 | ) as session:
43 | response = session.get(API_BASE_URL+"/jobs/"+id)
44 | response.raise_for_status()
45 | jobStatus : DeforumJobStatus = response.model
46 | print(f"Waiting for job {id} to enter phase {phase}. Currently: status={jobStatus.status}; phase={jobStatus.phase}; execution_time:{jobStatus.execution_time}s")
47 | assert jobStatus.phase != phase
48 | return jobStatus
49 |
50 | @retry(wait=wait_fixed(1), stop=stop_after_delay(120))
51 | def wait_for_job_to_enter_status(id : str, status : DeforumJobStatusCategory):
52 | with PydanticSession(
53 | {200: DeforumJobStatus}, headers={"accept": "application/json"}
54 | ) as session:
55 | response = session.get(API_BASE_URL+"/jobs/"+id)
56 | response.raise_for_status()
57 | jobStatus : DeforumJobStatus = response.model
58 | print(f"Waiting for job {id} to enter status {status}. Currently: status={jobStatus.status}; phase={jobStatus.phase}; execution_time:{jobStatus.execution_time}s")
59 | assert jobStatus.status == status
60 | return jobStatus
61 |
62 |
63 | def gpu_disabled():
64 | response = requests.get(SERVER_BASE_URL+"/sdapi/v1/cmd-flags")
65 | response.raise_for_status()
66 | cmd_flags = response.json()
67 | return cmd_flags["use_cpu"] == ["all"]
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------