├── .gitignore
├── LICENSE
├── README.md
├── assets
├── example_images
│ └── statue.jpg
└── readme
│ ├── dark
│ ├── follow-cta-rev2.png
│ └── header-rev1.png
│ ├── examples
│ ├── canny_example_1.png
│ ├── canny_result1.png
│ ├── canny_result2.png
│ ├── depth_example_1.png
│ ├── depth_example_2.png
│ ├── depth_example_3.png
│ ├── depth_result1.png
│ ├── depth_result2.png
│ ├── furry1.png
│ ├── furry2.png
│ ├── furry3.png
│ ├── furry4.png
│ ├── hed_example_1.png
│ ├── hed_example_2.png
│ ├── hed_result1.png
│ ├── picture-0-rev1.png
│ ├── picture-1-rev1.png
│ ├── picture-2-rev1.png
│ ├── picture-3-rev1.png
│ ├── picture-4-rev1.png
│ ├── picture-5-rev1.png
│ ├── picture-6-rev1.png
│ ├── picture-7-rev1.png
│ ├── result_12.png
│ ├── result_13.png
│ ├── result_14.png
│ ├── result_15.png
│ ├── result_18.png
│ ├── result_19.png
│ ├── result_21.png
│ ├── result_22.png
│ ├── result_23.png
│ └── result_24.png
│ └── light
│ ├── controlnet-canny-header-rev1.png
│ ├── flux-controlnet-collections.png
│ ├── flux-lora-collection-rev1.png
│ ├── follow-cta-rev2.png
│ ├── header-rev1.png
│ ├── join-our-discord-rev1.png
│ └── lora-photorealism-header-rev1.png
├── cog.yaml
├── gradio_demo.py
├── image_datasets
├── canny_dataset.py
└── dataset.py
├── main.py
├── models_licence
└── LICENSE-FLUX1-dev
├── predict.py
├── requirements.txt
├── src
└── flux
│ ├── __init__.py
│ ├── __main__.py
│ ├── annotator
│ ├── canny
│ │ └── __init__.py
│ ├── ckpts
│ │ └── ckpts.txt
│ ├── dwpose
│ │ ├── __init__.py
│ │ ├── onnxdet.py
│ │ ├── onnxpose.py
│ │ ├── util.py
│ │ └── wholebody.py
│ ├── hed
│ │ └── __init__.py
│ ├── midas
│ │ ├── LICENSE
│ │ ├── __init__.py
│ │ ├── api.py
│ │ ├── midas
│ │ │ ├── __init__.py
│ │ │ ├── base_model.py
│ │ │ ├── blocks.py
│ │ │ ├── dpt_depth.py
│ │ │ ├── midas_net.py
│ │ │ ├── midas_net_custom.py
│ │ │ ├── transforms.py
│ │ │ └── vit.py
│ │ └── utils.py
│ ├── mlsd
│ │ ├── LICENSE
│ │ ├── __init__.py
│ │ ├── models
│ │ │ ├── mbv2_mlsd_large.py
│ │ │ └── mbv2_mlsd_tiny.py
│ │ └── utils.py
│ ├── tile
│ │ ├── __init__.py
│ │ └── guided_filter.py
│ ├── util.py
│ └── zoe
│ │ ├── LICENSE
│ │ ├── __init__.py
│ │ └── zoedepth
│ │ ├── data
│ │ ├── __init__.py
│ │ ├── data_mono.py
│ │ ├── ddad.py
│ │ ├── diml_indoor_test.py
│ │ ├── diml_outdoor_test.py
│ │ ├── diode.py
│ │ ├── hypersim.py
│ │ ├── ibims.py
│ │ ├── preprocess.py
│ │ ├── sun_rgbd_loader.py
│ │ ├── transforms.py
│ │ ├── vkitti.py
│ │ └── vkitti2.py
│ │ ├── models
│ │ ├── __init__.py
│ │ ├── base_models
│ │ │ ├── __init__.py
│ │ │ ├── midas.py
│ │ │ └── midas_repo
│ │ │ │ ├── .gitignore
│ │ │ │ ├── Dockerfile
│ │ │ │ ├── LICENSE
│ │ │ │ ├── README.md
│ │ │ │ ├── environment.yaml
│ │ │ │ ├── hubconf.py
│ │ │ │ ├── input
│ │ │ │ └── .placeholder
│ │ │ │ ├── midas
│ │ │ │ ├── backbones
│ │ │ │ │ ├── beit.py
│ │ │ │ │ ├── levit.py
│ │ │ │ │ ├── next_vit.py
│ │ │ │ │ ├── swin.py
│ │ │ │ │ ├── swin2.py
│ │ │ │ │ ├── swin_common.py
│ │ │ │ │ ├── utils.py
│ │ │ │ │ └── vit.py
│ │ │ │ ├── base_model.py
│ │ │ │ ├── blocks.py
│ │ │ │ ├── dpt_depth.py
│ │ │ │ ├── midas_net.py
│ │ │ │ ├── midas_net_custom.py
│ │ │ │ ├── model_loader.py
│ │ │ │ └── transforms.py
│ │ │ │ ├── mobile
│ │ │ │ ├── README.md
│ │ │ │ ├── android
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── EXPLORE_THE_CODE.md
│ │ │ │ │ ├── LICENSE
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── app
│ │ │ │ │ │ ├── .gitignore
│ │ │ │ │ │ ├── build.gradle
│ │ │ │ │ │ ├── proguard-rules.pro
│ │ │ │ │ │ └── src
│ │ │ │ │ │ │ ├── androidTest
│ │ │ │ │ │ │ ├── assets
│ │ │ │ │ │ │ │ ├── fox-mobilenet_v1_1.0_224_support.txt
│ │ │ │ │ │ │ │ └── fox-mobilenet_v1_1.0_224_task_api.txt
│ │ │ │ │ │ │ └── java
│ │ │ │ │ │ │ │ ├── AndroidManifest.xml
│ │ │ │ │ │ │ │ └── org
│ │ │ │ │ │ │ │ └── tensorflow
│ │ │ │ │ │ │ │ └── lite
│ │ │ │ │ │ │ │ └── examples
│ │ │ │ │ │ │ │ └── classification
│ │ │ │ │ │ │ │ └── ClassifierTest.java
│ │ │ │ │ │ │ └── main
│ │ │ │ │ │ │ ├── AndroidManifest.xml
│ │ │ │ │ │ │ ├── java
│ │ │ │ │ │ │ └── org
│ │ │ │ │ │ │ │ └── tensorflow
│ │ │ │ │ │ │ │ └── lite
│ │ │ │ │ │ │ │ └── examples
│ │ │ │ │ │ │ │ └── classification
│ │ │ │ │ │ │ │ ├── CameraActivity.java
│ │ │ │ │ │ │ │ ├── CameraConnectionFragment.java
│ │ │ │ │ │ │ │ ├── ClassifierActivity.java
│ │ │ │ │ │ │ │ ├── LegacyCameraConnectionFragment.java
│ │ │ │ │ │ │ │ └── customview
│ │ │ │ │ │ │ │ ├── AutoFitTextureView.java
│ │ │ │ │ │ │ │ ├── OverlayView.java
│ │ │ │ │ │ │ │ ├── RecognitionScoreView.java
│ │ │ │ │ │ │ │ └── ResultsView.java
│ │ │ │ │ │ │ └── res
│ │ │ │ │ │ │ ├── drawable-v24
│ │ │ │ │ │ │ └── ic_launcher_foreground.xml
│ │ │ │ │ │ │ ├── drawable
│ │ │ │ │ │ │ ├── bottom_sheet_bg.xml
│ │ │ │ │ │ │ ├── ic_baseline_add.xml
│ │ │ │ │ │ │ ├── ic_baseline_remove.xml
│ │ │ │ │ │ │ ├── ic_launcher_background.xml
│ │ │ │ │ │ │ └── rectangle.xml
│ │ │ │ │ │ │ ├── layout
│ │ │ │ │ │ │ ├── tfe_ic_activity_camera.xml
│ │ │ │ │ │ │ ├── tfe_ic_camera_connection_fragment.xml
│ │ │ │ │ │ │ └── tfe_ic_layout_bottom_sheet.xml
│ │ │ │ │ │ │ ├── mipmap-anydpi-v26
│ │ │ │ │ │ │ ├── ic_launcher.xml
│ │ │ │ │ │ │ └── ic_launcher_round.xml
│ │ │ │ │ │ │ └── values
│ │ │ │ │ │ │ ├── colors.xml
│ │ │ │ │ │ │ ├── dimens.xml
│ │ │ │ │ │ │ ├── strings.xml
│ │ │ │ │ │ │ └── styles.xml
│ │ │ │ │ ├── build.gradle
│ │ │ │ │ ├── gradle.properties
│ │ │ │ │ ├── gradle
│ │ │ │ │ │ └── wrapper
│ │ │ │ │ │ │ ├── gradle-wrapper.jar
│ │ │ │ │ │ │ └── gradle-wrapper.properties
│ │ │ │ │ ├── gradlew
│ │ │ │ │ ├── gradlew.bat
│ │ │ │ │ ├── lib_support
│ │ │ │ │ │ ├── build.gradle
│ │ │ │ │ │ ├── proguard-rules.pro
│ │ │ │ │ │ └── src
│ │ │ │ │ │ │ └── main
│ │ │ │ │ │ │ ├── AndroidManifest.xml
│ │ │ │ │ │ │ └── java
│ │ │ │ │ │ │ └── org
│ │ │ │ │ │ │ └── tensorflow
│ │ │ │ │ │ │ └── lite
│ │ │ │ │ │ │ └── examples
│ │ │ │ │ │ │ └── classification
│ │ │ │ │ │ │ └── tflite
│ │ │ │ │ │ │ ├── Classifier.java
│ │ │ │ │ │ │ ├── ClassifierFloatEfficientNet.java
│ │ │ │ │ │ │ ├── ClassifierFloatMobileNet.java
│ │ │ │ │ │ │ ├── ClassifierQuantizedEfficientNet.java
│ │ │ │ │ │ │ └── ClassifierQuantizedMobileNet.java
│ │ │ │ │ ├── lib_task_api
│ │ │ │ │ │ ├── build.gradle
│ │ │ │ │ │ ├── proguard-rules.pro
│ │ │ │ │ │ └── src
│ │ │ │ │ │ │ └── main
│ │ │ │ │ │ │ ├── AndroidManifest.xml
│ │ │ │ │ │ │ └── java
│ │ │ │ │ │ │ └── org
│ │ │ │ │ │ │ └── tensorflow
│ │ │ │ │ │ │ └── lite
│ │ │ │ │ │ │ └── examples
│ │ │ │ │ │ │ └── classification
│ │ │ │ │ │ │ └── tflite
│ │ │ │ │ │ │ ├── Classifier.java
│ │ │ │ │ │ │ ├── ClassifierFloatEfficientNet.java
│ │ │ │ │ │ │ ├── ClassifierFloatMobileNet.java
│ │ │ │ │ │ │ ├── ClassifierQuantizedEfficientNet.java
│ │ │ │ │ │ │ └── ClassifierQuantizedMobileNet.java
│ │ │ │ │ ├── models
│ │ │ │ │ │ ├── build.gradle
│ │ │ │ │ │ ├── download.gradle
│ │ │ │ │ │ ├── proguard-rules.pro
│ │ │ │ │ │ └── src
│ │ │ │ │ │ │ └── main
│ │ │ │ │ │ │ ├── AndroidManifest.xml
│ │ │ │ │ │ │ └── assets
│ │ │ │ │ │ │ ├── labels.txt
│ │ │ │ │ │ │ ├── labels_without_background.txt
│ │ │ │ │ │ │ └── run_tflite.py
│ │ │ │ │ └── settings.gradle
│ │ │ │ └── ios
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── LICENSE
│ │ │ │ │ ├── Midas.xcodeproj
│ │ │ │ │ ├── project.pbxproj
│ │ │ │ │ ├── project.xcworkspace
│ │ │ │ │ │ ├── contents.xcworkspacedata
│ │ │ │ │ │ ├── xcshareddata
│ │ │ │ │ │ │ └── IDEWorkspaceChecks.plist
│ │ │ │ │ │ └── xcuserdata
│ │ │ │ │ │ │ └── admin.xcuserdatad
│ │ │ │ │ │ │ └── UserInterfaceState.xcuserstate
│ │ │ │ │ └── xcuserdata
│ │ │ │ │ │ └── admin.xcuserdatad
│ │ │ │ │ │ └── xcschemes
│ │ │ │ │ │ └── xcschememanagement.plist
│ │ │ │ │ ├── Midas
│ │ │ │ │ ├── AppDelegate.swift
│ │ │ │ │ ├── Assets.xcassets
│ │ │ │ │ │ ├── AppIcon.appiconset
│ │ │ │ │ │ │ └── Contents.json
│ │ │ │ │ │ └── Contents.json
│ │ │ │ │ ├── Camera Feed
│ │ │ │ │ │ ├── CameraFeedManager.swift
│ │ │ │ │ │ └── PreviewView.swift
│ │ │ │ │ ├── Cells
│ │ │ │ │ │ └── InfoCell.swift
│ │ │ │ │ ├── Constants.swift
│ │ │ │ │ ├── Extensions
│ │ │ │ │ │ ├── CGSizeExtension.swift
│ │ │ │ │ │ ├── CVPixelBufferExtension.swift
│ │ │ │ │ │ └── TFLiteExtension.swift
│ │ │ │ │ ├── Info.plist
│ │ │ │ │ ├── ModelDataHandler
│ │ │ │ │ │ └── ModelDataHandler.swift
│ │ │ │ │ ├── Storyboards
│ │ │ │ │ │ └── Base.lproj
│ │ │ │ │ │ │ ├── Launch Screen.storyboard
│ │ │ │ │ │ │ └── Main.storyboard
│ │ │ │ │ ├── ViewControllers
│ │ │ │ │ │ └── ViewController.swift
│ │ │ │ │ └── Views
│ │ │ │ │ │ └── OverlayView.swift
│ │ │ │ │ ├── Podfile
│ │ │ │ │ ├── README.md
│ │ │ │ │ └── RunScripts
│ │ │ │ │ └── download_models.sh
│ │ │ │ ├── output
│ │ │ │ └── .placeholder
│ │ │ │ ├── ros
│ │ │ │ ├── LICENSE
│ │ │ │ ├── README.md
│ │ │ │ ├── additions
│ │ │ │ │ ├── do_catkin_make.sh
│ │ │ │ │ ├── downloads.sh
│ │ │ │ │ ├── install_ros_melodic_ubuntu_17_18.sh
│ │ │ │ │ ├── install_ros_noetic_ubuntu_20.sh
│ │ │ │ │ └── make_package_cpp.sh
│ │ │ │ ├── launch_midas_cpp.sh
│ │ │ │ ├── midas_cpp
│ │ │ │ │ ├── CMakeLists.txt
│ │ │ │ │ ├── launch
│ │ │ │ │ │ ├── midas_cpp.launch
│ │ │ │ │ │ └── midas_talker_listener.launch
│ │ │ │ │ ├── package.xml
│ │ │ │ │ ├── scripts
│ │ │ │ │ │ ├── listener.py
│ │ │ │ │ │ ├── listener_original.py
│ │ │ │ │ │ └── talker.py
│ │ │ │ │ └── src
│ │ │ │ │ │ └── main.cpp
│ │ │ │ └── run_talker_listener_test.sh
│ │ │ │ ├── run.py
│ │ │ │ ├── tf
│ │ │ │ ├── README.md
│ │ │ │ ├── input
│ │ │ │ │ └── .placeholder
│ │ │ │ ├── make_onnx_model.py
│ │ │ │ ├── output
│ │ │ │ │ └── .placeholder
│ │ │ │ ├── run_onnx.py
│ │ │ │ ├── run_pb.py
│ │ │ │ ├── transforms.py
│ │ │ │ └── utils.py
│ │ │ │ └── utils.py
│ │ ├── builder.py
│ │ ├── depth_model.py
│ │ ├── layers
│ │ │ ├── attractor.py
│ │ │ ├── dist_layers.py
│ │ │ ├── localbins_layers.py
│ │ │ └── patch_transformer.py
│ │ ├── model_io.py
│ │ ├── zoedepth
│ │ │ ├── __init__.py
│ │ │ ├── config_zoedepth.json
│ │ │ ├── config_zoedepth_kitti.json
│ │ │ └── zoedepth_v1.py
│ │ └── zoedepth_nk
│ │ │ ├── __init__.py
│ │ │ ├── config_zoedepth_nk.json
│ │ │ └── zoedepth_nk_v1.py
│ │ ├── trainers
│ │ ├── base_trainer.py
│ │ ├── builder.py
│ │ ├── loss.py
│ │ ├── zoedepth_nk_trainer.py
│ │ └── zoedepth_trainer.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── arg_utils.py
│ │ ├── config.py
│ │ ├── easydict
│ │ └── __init__.py
│ │ ├── geometry.py
│ │ └── misc.py
│ ├── api.py
│ ├── cli.py
│ ├── controlnet.py
│ ├── math.py
│ ├── model.py
│ ├── modules
│ ├── autoencoder.py
│ ├── conditioner.py
│ └── layers.py
│ ├── sampling.py
│ ├── util.py
│ └── xflux_pipeline.py
├── train_configs
├── test_canny_controlnet.yaml
├── test_finetune.yaml
└── test_lora.yaml
├── train_flux_deepspeed.py
├── train_flux_deepspeed_controlnet.py
└── train_flux_lora_deepspeed.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | Makefile
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | weights/
25 |
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache/
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 | cover/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | .pybuilder/
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | # For a library or package, you might want to ignore these files since the code is
90 | # intended to run in multiple environments; otherwise, check them in:
91 | # .python-version
92 |
93 | # pipenv
94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
97 | # install all needed dependencies.
98 | #Pipfile.lock
99 |
100 | # poetry
101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | # This is especially recommended for binary packages to ensure reproducibility, and is more
103 | # commonly ignored for libraries.
104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 |
107 | # pdm
108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | # in version control.
112 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
113 | .pdm.toml
114 | .pdm-python
115 | .pdm-build/
116 |
117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118 | __pypackages__/
119 |
120 | # Celery stuff
121 | celerybeat-schedule
122 | celerybeat.pid
123 |
124 | # SageMath parsed files
125 | *.sage.py
126 |
127 | # Environments
128 | .env
129 | .venv
130 | env/
131 | venv/
132 | ENV/
133 | env.bak/
134 | venv.bak/
135 |
136 | # Spyder project settings
137 | .spyderproject
138 | .spyproject
139 |
140 | # Rope project settings
141 | .ropeproject
142 |
143 | # mkdocs documentation
144 | /site
145 |
146 | # mypy
147 | .mypy_cache/
148 | .dmypy.json
149 | dmypy.json
150 |
151 | # Pyre type checker
152 | .pyre/
153 |
154 | # pytype static type analyzer
155 | .pytype/
156 |
157 | # Cython debug symbols
158 | cython_debug/
159 |
160 | # PyCharm
161 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163 | # and can be added to the global gitignore or merged into this file. For a more nuclear
164 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
165 | #.idea/
166 |
167 | .DS_Store
168 |
--------------------------------------------------------------------------------
/assets/example_images/statue.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/example_images/statue.jpg
--------------------------------------------------------------------------------
/assets/readme/dark/follow-cta-rev2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/dark/follow-cta-rev2.png
--------------------------------------------------------------------------------
/assets/readme/dark/header-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/dark/header-rev1.png
--------------------------------------------------------------------------------
/assets/readme/examples/canny_example_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/canny_example_1.png
--------------------------------------------------------------------------------
/assets/readme/examples/canny_result1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/canny_result1.png
--------------------------------------------------------------------------------
/assets/readme/examples/canny_result2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/canny_result2.png
--------------------------------------------------------------------------------
/assets/readme/examples/depth_example_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/depth_example_1.png
--------------------------------------------------------------------------------
/assets/readme/examples/depth_example_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/depth_example_2.png
--------------------------------------------------------------------------------
/assets/readme/examples/depth_example_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/depth_example_3.png
--------------------------------------------------------------------------------
/assets/readme/examples/depth_result1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/depth_result1.png
--------------------------------------------------------------------------------
/assets/readme/examples/depth_result2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/depth_result2.png
--------------------------------------------------------------------------------
/assets/readme/examples/furry1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/furry1.png
--------------------------------------------------------------------------------
/assets/readme/examples/furry2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/furry2.png
--------------------------------------------------------------------------------
/assets/readme/examples/furry3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/furry3.png
--------------------------------------------------------------------------------
/assets/readme/examples/furry4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/furry4.png
--------------------------------------------------------------------------------
/assets/readme/examples/hed_example_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/hed_example_1.png
--------------------------------------------------------------------------------
/assets/readme/examples/hed_example_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/hed_example_2.png
--------------------------------------------------------------------------------
/assets/readme/examples/hed_result1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/hed_result1.png
--------------------------------------------------------------------------------
/assets/readme/examples/picture-0-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-0-rev1.png
--------------------------------------------------------------------------------
/assets/readme/examples/picture-1-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-1-rev1.png
--------------------------------------------------------------------------------
/assets/readme/examples/picture-2-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-2-rev1.png
--------------------------------------------------------------------------------
/assets/readme/examples/picture-3-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-3-rev1.png
--------------------------------------------------------------------------------
/assets/readme/examples/picture-4-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-4-rev1.png
--------------------------------------------------------------------------------
/assets/readme/examples/picture-5-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-5-rev1.png
--------------------------------------------------------------------------------
/assets/readme/examples/picture-6-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-6-rev1.png
--------------------------------------------------------------------------------
/assets/readme/examples/picture-7-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-7-rev1.png
--------------------------------------------------------------------------------
/assets/readme/examples/result_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_12.png
--------------------------------------------------------------------------------
/assets/readme/examples/result_13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_13.png
--------------------------------------------------------------------------------
/assets/readme/examples/result_14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_14.png
--------------------------------------------------------------------------------
/assets/readme/examples/result_15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_15.png
--------------------------------------------------------------------------------
/assets/readme/examples/result_18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_18.png
--------------------------------------------------------------------------------
/assets/readme/examples/result_19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_19.png
--------------------------------------------------------------------------------
/assets/readme/examples/result_21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_21.png
--------------------------------------------------------------------------------
/assets/readme/examples/result_22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_22.png
--------------------------------------------------------------------------------
/assets/readme/examples/result_23.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_23.png
--------------------------------------------------------------------------------
/assets/readme/examples/result_24.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_24.png
--------------------------------------------------------------------------------
/assets/readme/light/controlnet-canny-header-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/controlnet-canny-header-rev1.png
--------------------------------------------------------------------------------
/assets/readme/light/flux-controlnet-collections.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/flux-controlnet-collections.png
--------------------------------------------------------------------------------
/assets/readme/light/flux-lora-collection-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/flux-lora-collection-rev1.png
--------------------------------------------------------------------------------
/assets/readme/light/follow-cta-rev2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/follow-cta-rev2.png
--------------------------------------------------------------------------------
/assets/readme/light/header-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/header-rev1.png
--------------------------------------------------------------------------------
/assets/readme/light/join-our-discord-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/join-our-discord-rev1.png
--------------------------------------------------------------------------------
/assets/readme/light/lora-photorealism-header-rev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/lora-photorealism-header-rev1.png
--------------------------------------------------------------------------------
/cog.yaml:
--------------------------------------------------------------------------------
1 | # Configuration for Cog ⚙️
2 | # Reference: https://cog.run/yaml
3 |
4 | build:
5 | gpu: true
6 | cuda: "12.1"
7 | python_version: "3.11"
8 | python_packages:
9 | - "accelerate==0.30.1"
10 | - "deepspeed==0.14.4"
11 | - "einops==0.8.0"
12 | - "transformers==4.43.3"
13 | - "huggingface-hub==0.24.5"
14 | - "einops==0.8.0"
15 | - "pandas==2.2.2"
16 | - "opencv-python==4.10.0.84"
17 | - "pillow==10.4.0"
18 | - "optimum-quanto==0.2.4"
19 | - "sentencepiece==0.2.0"
20 | run:
21 | - curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.8.2/pget_linux_x86_64" && chmod +x /usr/local/bin/pget
22 |
23 | # predict.py defines how predictions are run on your model
24 | predict: "predict.py:Predictor"
25 |
--------------------------------------------------------------------------------
/image_datasets/canny_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | import numpy as np
4 | from PIL import Image
5 | import torch
6 | from torch.utils.data import Dataset, DataLoader
7 | import json
8 | import random
9 | import cv2
10 |
11 |
12 | def canny_processor(image, low_threshold=100, high_threshold=200):
13 | image = np.array(image)
14 | image = cv2.Canny(image, low_threshold, high_threshold)
15 | image = image[:, :, None]
16 | image = np.concatenate([image, image, image], axis=2)
17 | canny_image = Image.fromarray(image)
18 | return canny_image
19 |
20 |
21 | def c_crop(image):
22 | width, height = image.size
23 | new_size = min(width, height)
24 | left = (width - new_size) / 2
25 | top = (height - new_size) / 2
26 | right = (width + new_size) / 2
27 | bottom = (height + new_size) / 2
28 | return image.crop((left, top, right, bottom))
29 |
30 | class CustomImageDataset(Dataset):
31 | def __init__(self, img_dir, img_size=512):
32 | self.images = [os.path.join(img_dir, i) for i in os.listdir(img_dir) if '.jpg' in i or '.png' in i]
33 | self.images.sort()
34 | self.img_size = img_size
35 |
36 | def __len__(self):
37 | return len(self.images)
38 |
39 | def __getitem__(self, idx):
40 | try:
41 | img = Image.open(self.images[idx])
42 | img = c_crop(img)
43 | img = img.resize((self.img_size, self.img_size))
44 | hint = canny_processor(img)
45 | img = torch.from_numpy((np.array(img) / 127.5) - 1)
46 | img = img.permute(2, 0, 1)
47 | hint = torch.from_numpy((np.array(hint) / 127.5) - 1)
48 | hint = hint.permute(2, 0, 1)
49 | json_path = self.images[idx].split('.')[0] + '.json'
50 | prompt = json.load(open(json_path))['caption']
51 | return img, hint, prompt
52 | except Exception as e:
53 | print(e)
54 | return self.__getitem__(random.randint(0, len(self.images) - 1))
55 |
56 |
57 | def loader(train_batch_size, num_workers, **args):
58 | dataset = CustomImageDataset(**args)
59 | return DataLoader(dataset, batch_size=train_batch_size, num_workers=num_workers, shuffle=True)
60 |
--------------------------------------------------------------------------------
/image_datasets/dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | import numpy as np
4 | from PIL import Image
5 | import torch
6 | from torch.utils.data import Dataset, DataLoader
7 | import json
8 | import random
9 |
10 | def image_resize(img, max_size=512):
11 | w, h = img.size
12 | if w >= h:
13 | new_w = max_size
14 | new_h = int((max_size / w) * h)
15 | else:
16 | new_h = max_size
17 | new_w = int((max_size / h) * w)
18 | return img.resize((new_w, new_h))
19 |
20 | def c_crop(image):
21 | width, height = image.size
22 | new_size = min(width, height)
23 | left = (width - new_size) / 2
24 | top = (height - new_size) / 2
25 | right = (width + new_size) / 2
26 | bottom = (height + new_size) / 2
27 | return image.crop((left, top, right, bottom))
28 |
29 | def crop_to_aspect_ratio(image, ratio="16:9"):
30 | width, height = image.size
31 | ratio_map = {
32 | "16:9": (16, 9),
33 | "4:3": (4, 3),
34 | "1:1": (1, 1)
35 | }
36 | target_w, target_h = ratio_map[ratio]
37 | target_ratio_value = target_w / target_h
38 |
39 | current_ratio = width / height
40 |
41 | if current_ratio > target_ratio_value:
42 | new_width = int(height * target_ratio_value)
43 | offset = (width - new_width) // 2
44 | crop_box = (offset, 0, offset + new_width, height)
45 | else:
46 | new_height = int(width / target_ratio_value)
47 | offset = (height - new_height) // 2
48 | crop_box = (0, offset, width, offset + new_height)
49 |
50 | cropped_img = image.crop(crop_box)
51 | return cropped_img
52 |
53 |
54 | class CustomImageDataset(Dataset):
55 | def __init__(self, img_dir, img_size=512, caption_type='json', random_ratio=False):
56 | self.images = [os.path.join(img_dir, i) for i in os.listdir(img_dir) if '.jpg' in i or '.png' in i]
57 | self.images.sort()
58 | self.img_size = img_size
59 | self.caption_type = caption_type
60 | self.random_ratio = random_ratio
61 |
62 | def __len__(self):
63 | return len(self.images)
64 |
65 | def __getitem__(self, idx):
66 | try:
67 | img = Image.open(self.images[idx]).convert('RGB')
68 | if self.random_ratio:
69 | ratio = random.choice(["16:9", "default", "1:1", "4:3"])
70 | if ratio != "default":
71 | img = crop_to_aspect_ratio(img, ratio)
72 | img = image_resize(img, self.img_size)
73 | w, h = img.size
74 | new_w = (w // 32) * 32
75 | new_h = (h // 32) * 32
76 | img = img.resize((new_w, new_h))
77 | img = torch.from_numpy((np.array(img) / 127.5) - 1)
78 | img = img.permute(2, 0, 1)
79 | json_path = self.images[idx].split('.')[0] + '.' + self.caption_type
80 | if self.caption_type == "json":
81 | prompt = json.load(open(json_path))['caption']
82 | else:
83 | prompt = open(json_path).read()
84 | return img, prompt
85 | except Exception as e:
86 | print(e)
87 | return self.__getitem__(random.randint(0, len(self.images) - 1))
88 |
89 |
90 | def loader(train_batch_size, num_workers, **args):
91 | dataset = CustomImageDataset(**args)
92 | return DataLoader(dataset, batch_size=train_batch_size, num_workers=num_workers, shuffle=True)
93 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate==0.30.1
2 | deepspeed==0.14.4
3 | einops==0.8.0
4 | transformers==4.43.3
5 | huggingface-hub==0.24.5
6 | optimum-quanto
7 | datasets
8 | omegaconf
9 | diffusers
10 | sentencepiece
11 | opencv-python
12 | matplotlib
13 | onnxruntime
14 | torchvision
15 | timm
16 |
--------------------------------------------------------------------------------
/src/flux/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | from ._version import version as __version__ # type: ignore
3 | from ._version import version_tuple
4 | except ImportError:
5 | __version__ = "unknown (no version information available)"
6 | version_tuple = (0, 0, "unknown", "noinfo")
7 |
8 | from pathlib import Path
9 |
10 | PACKAGE = __package__.replace("_", "-")
11 | PACKAGE_ROOT = Path(__file__).parent
12 |
--------------------------------------------------------------------------------
/src/flux/__main__.py:
--------------------------------------------------------------------------------
1 | from .cli import app
2 |
3 | if __name__ == "__main__":
4 | app()
5 |
--------------------------------------------------------------------------------
/src/flux/annotator/canny/__init__.py:
--------------------------------------------------------------------------------
1 | import cv2
2 |
3 |
4 | class CannyDetector:
5 | def __call__(self, img, low_threshold, high_threshold):
6 | return cv2.Canny(img, low_threshold, high_threshold)
7 |
--------------------------------------------------------------------------------
/src/flux/annotator/ckpts/ckpts.txt:
--------------------------------------------------------------------------------
1 | Weights here.
--------------------------------------------------------------------------------
/src/flux/annotator/dwpose/__init__.py:
--------------------------------------------------------------------------------
1 | # Openpose
2 | # Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
3 | # 2nd Edited by https://github.com/Hzzone/pytorch-openpose
4 | # 3rd Edited by ControlNet
5 | # 4th Edited by ControlNet (added face and correct hands)
6 |
7 | import os
8 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
9 |
10 | import torch
11 | import numpy as np
12 | from . import util
13 | from .wholebody import Wholebody
14 |
15 | def draw_pose(pose, H, W):
16 | bodies = pose['bodies']
17 | faces = pose['faces']
18 | hands = pose['hands']
19 | candidate = bodies['candidate']
20 | subset = bodies['subset']
21 | canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
22 |
23 | canvas = util.draw_bodypose(canvas, candidate, subset)
24 |
25 | canvas = util.draw_handpose(canvas, hands)
26 |
27 | canvas = util.draw_facepose(canvas, faces)
28 |
29 | return canvas
30 |
31 |
32 | class DWposeDetector:
33 | def __init__(self, device):
34 |
35 | self.pose_estimation = Wholebody(device)
36 |
37 | def __call__(self, oriImg):
38 | oriImg = oriImg.copy()
39 | H, W, C = oriImg.shape
40 | with torch.no_grad():
41 | candidate, subset = self.pose_estimation(oriImg)
42 | nums, keys, locs = candidate.shape
43 | candidate[..., 0] /= float(W)
44 | candidate[..., 1] /= float(H)
45 | body = candidate[:,:18].copy()
46 | body = body.reshape(nums*18, locs)
47 | score = subset[:,:18]
48 | for i in range(len(score)):
49 | for j in range(len(score[i])):
50 | if score[i][j] > 0.3:
51 | score[i][j] = int(18*i+j)
52 | else:
53 | score[i][j] = -1
54 |
55 | un_visible = subset<0.3
56 | candidate[un_visible] = -1
57 |
58 | foot = candidate[:,18:24]
59 |
60 | faces = candidate[:,24:92]
61 |
62 | hands = candidate[:,92:113]
63 | hands = np.vstack([hands, candidate[:,113:]])
64 |
65 | bodies = dict(candidate=body, subset=score)
66 | pose = dict(bodies=bodies, hands=hands, faces=faces)
67 |
68 | return draw_pose(pose, H, W)
69 |
--------------------------------------------------------------------------------
/src/flux/annotator/dwpose/wholebody.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | import onnxruntime as ort
5 | from huggingface_hub import hf_hub_download
6 | from .onnxdet import inference_detector
7 | from .onnxpose import inference_pose
8 |
9 |
10 | class Wholebody:
11 | def __init__(self, device="cuda:0"):
12 | providers = ['CPUExecutionProvider'] if device == 'cpu' else ['CUDAExecutionProvider']
13 | onnx_det = hf_hub_download("yzd-v/DWPose", "yolox_l.onnx")
14 | onnx_pose = hf_hub_download("yzd-v/DWPose", "dw-ll_ucoco_384.onnx")
15 |
16 | self.session_det = ort.InferenceSession(path_or_bytes=onnx_det, providers=providers)
17 | self.session_pose = ort.InferenceSession(path_or_bytes=onnx_pose, providers=providers)
18 |
19 | def __call__(self, oriImg):
20 | det_result = inference_detector(self.session_det, oriImg)
21 | keypoints, scores = inference_pose(self.session_pose, det_result, oriImg)
22 |
23 | keypoints_info = np.concatenate(
24 | (keypoints, scores[..., None]), axis=-1)
25 | # compute neck joint
26 | neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
27 | # neck score when visualizing pred
28 | neck[:, 2:4] = np.logical_and(
29 | keypoints_info[:, 5, 2:4] > 0.3,
30 | keypoints_info[:, 6, 2:4] > 0.3).astype(int)
31 | new_keypoints_info = np.insert(
32 | keypoints_info, 17, neck, axis=1)
33 | mmpose_idx = [
34 | 17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3
35 | ]
36 | openpose_idx = [
37 | 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17
38 | ]
39 | new_keypoints_info[:, openpose_idx] = \
40 | new_keypoints_info[:, mmpose_idx]
41 | keypoints_info = new_keypoints_info
42 |
43 | keypoints, scores = keypoints_info[
44 | ..., :2], keypoints_info[..., 2]
45 |
46 | return keypoints, scores
47 |
48 |
49 |
--------------------------------------------------------------------------------
/src/flux/annotator/midas/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/flux/annotator/midas/__init__.py:
--------------------------------------------------------------------------------
1 | # Midas Depth Estimation
2 | # From https://github.com/isl-org/MiDaS
3 | # MIT LICENSE
4 |
5 | import cv2
6 | import numpy as np
7 | import torch
8 |
9 | from einops import rearrange
10 | from .api import MiDaSInference
11 |
12 |
13 | class MidasDetector:
14 | def __init__(self):
15 | self.model = MiDaSInference(model_type="dpt_hybrid").cuda()
16 |
17 | def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1):
18 | assert input_image.ndim == 3
19 | image_depth = input_image
20 | with torch.no_grad():
21 | image_depth = torch.from_numpy(image_depth).float().cuda()
22 | image_depth = image_depth / 127.5 - 1.0
23 | image_depth = rearrange(image_depth, 'h w c -> 1 c h w')
24 | depth = self.model(image_depth)[0]
25 |
26 | depth_pt = depth.clone()
27 | depth_pt -= torch.min(depth_pt)
28 | depth_pt /= torch.max(depth_pt)
29 | depth_pt = depth_pt.cpu().numpy()
30 | depth_image = (depth_pt * 255.0).clip(0, 255).astype(np.uint8)
31 |
32 | depth_np = depth.cpu().numpy()
33 | x = cv2.Sobel(depth_np, cv2.CV_32F, 1, 0, ksize=3)
34 | y = cv2.Sobel(depth_np, cv2.CV_32F, 0, 1, ksize=3)
35 | z = np.ones_like(x) * a
36 | x[depth_pt < bg_th] = 0
37 | y[depth_pt < bg_th] = 0
38 | normal = np.stack([x, y, z], axis=2)
39 | normal /= np.sum(normal ** 2.0, axis=2, keepdims=True) ** 0.5
40 | normal_image = (normal * 127.5 + 127.5).clip(0, 255).astype(np.uint8)
41 |
42 | return depth_image, normal_image
43 |
--------------------------------------------------------------------------------
/src/flux/annotator/midas/midas/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/midas/midas/__init__.py
--------------------------------------------------------------------------------
/src/flux/annotator/midas/midas/base_model.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class BaseModel(torch.nn.Module):
5 | def load(self, path):
6 | """Load model from file.
7 |
8 | Args:
9 | path (str): file path
10 | """
11 | parameters = torch.load(path, map_location=torch.device('cpu'))
12 |
13 | if "optimizer" in parameters:
14 | parameters = parameters["model"]
15 |
16 | self.load_state_dict(parameters)
17 |
--------------------------------------------------------------------------------
/src/flux/annotator/midas/midas/dpt_depth.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from .base_model import BaseModel
6 | from .blocks import (
7 | FeatureFusionBlock,
8 | FeatureFusionBlock_custom,
9 | Interpolate,
10 | _make_encoder,
11 | forward_vit,
12 | )
13 |
14 |
15 | def _make_fusion_block(features, use_bn):
16 | return FeatureFusionBlock_custom(
17 | features,
18 | nn.ReLU(False),
19 | deconv=False,
20 | bn=use_bn,
21 | expand=False,
22 | align_corners=True,
23 | )
24 |
25 |
26 | class DPT(BaseModel):
27 | def __init__(
28 | self,
29 | head,
30 | features=256,
31 | backbone="vitb_rn50_384",
32 | readout="project",
33 | channels_last=False,
34 | use_bn=False,
35 | ):
36 |
37 | super(DPT, self).__init__()
38 |
39 | self.channels_last = channels_last
40 |
41 | hooks = {
42 | "vitb_rn50_384": [0, 1, 8, 11],
43 | "vitb16_384": [2, 5, 8, 11],
44 | "vitl16_384": [5, 11, 17, 23],
45 | }
46 |
47 | # Instantiate backbone and reassemble blocks
48 | self.pretrained, self.scratch = _make_encoder(
49 | backbone,
50 | features,
51 | False, # Set to true of you want to train from scratch, uses ImageNet weights
52 | groups=1,
53 | expand=False,
54 | exportable=False,
55 | hooks=hooks[backbone],
56 | use_readout=readout,
57 | )
58 |
59 | self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
60 | self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
61 | self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
62 | self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
63 |
64 | self.scratch.output_conv = head
65 |
66 |
67 | def forward(self, x):
68 | if self.channels_last == True:
69 | x.contiguous(memory_format=torch.channels_last)
70 |
71 | layer_1, layer_2, layer_3, layer_4 = forward_vit(self.pretrained, x)
72 |
73 | layer_1_rn = self.scratch.layer1_rn(layer_1)
74 | layer_2_rn = self.scratch.layer2_rn(layer_2)
75 | layer_3_rn = self.scratch.layer3_rn(layer_3)
76 | layer_4_rn = self.scratch.layer4_rn(layer_4)
77 |
78 | path_4 = self.scratch.refinenet4(layer_4_rn)
79 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
80 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
81 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
82 |
83 | out = self.scratch.output_conv(path_1)
84 |
85 | return out
86 |
87 |
88 | class DPTDepthModel(DPT):
89 | def __init__(self, path=None, non_negative=True, **kwargs):
90 | features = kwargs["features"] if "features" in kwargs else 256
91 |
92 | head = nn.Sequential(
93 | nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1),
94 | Interpolate(scale_factor=2, mode="bilinear", align_corners=True),
95 | nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1),
96 | nn.ReLU(True),
97 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
98 | nn.ReLU(True) if non_negative else nn.Identity(),
99 | nn.Identity(),
100 | )
101 |
102 | super().__init__(head, **kwargs)
103 |
104 | if path is not None:
105 | self.load(path)
106 |
107 | def forward(self, x):
108 | return super().forward(x).squeeze(dim=1)
109 |
110 |
--------------------------------------------------------------------------------
/src/flux/annotator/midas/midas/midas_net.py:
--------------------------------------------------------------------------------
1 | """MidashNet: Network for monocular depth estimation trained by mixing several datasets.
2 | This file contains code that is adapted from
3 | https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
4 | """
5 | import torch
6 | import torch.nn as nn
7 |
8 | from .base_model import BaseModel
9 | from .blocks import FeatureFusionBlock, Interpolate, _make_encoder
10 |
11 |
12 | class MidasNet(BaseModel):
13 | """Network for monocular depth estimation.
14 | """
15 |
16 | def __init__(self, path=None, features=256, non_negative=True):
17 | """Init.
18 |
19 | Args:
20 | path (str, optional): Path to saved model. Defaults to None.
21 | features (int, optional): Number of features. Defaults to 256.
22 | backbone (str, optional): Backbone network for encoder. Defaults to resnet50
23 | """
24 | print("Loading weights: ", path)
25 |
26 | super(MidasNet, self).__init__()
27 |
28 | use_pretrained = False if path is None else True
29 |
30 | self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained)
31 |
32 | self.scratch.refinenet4 = FeatureFusionBlock(features)
33 | self.scratch.refinenet3 = FeatureFusionBlock(features)
34 | self.scratch.refinenet2 = FeatureFusionBlock(features)
35 | self.scratch.refinenet1 = FeatureFusionBlock(features)
36 |
37 | self.scratch.output_conv = nn.Sequential(
38 | nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1),
39 | Interpolate(scale_factor=2, mode="bilinear"),
40 | nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1),
41 | nn.ReLU(True),
42 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
43 | nn.ReLU(True) if non_negative else nn.Identity(),
44 | )
45 |
46 | if path:
47 | self.load(path)
48 |
49 | def forward(self, x):
50 | """Forward pass.
51 |
52 | Args:
53 | x (tensor): input data (image)
54 |
55 | Returns:
56 | tensor: depth
57 | """
58 |
59 | layer_1 = self.pretrained.layer1(x)
60 | layer_2 = self.pretrained.layer2(layer_1)
61 | layer_3 = self.pretrained.layer3(layer_2)
62 | layer_4 = self.pretrained.layer4(layer_3)
63 |
64 | layer_1_rn = self.scratch.layer1_rn(layer_1)
65 | layer_2_rn = self.scratch.layer2_rn(layer_2)
66 | layer_3_rn = self.scratch.layer3_rn(layer_3)
67 | layer_4_rn = self.scratch.layer4_rn(layer_4)
68 |
69 | path_4 = self.scratch.refinenet4(layer_4_rn)
70 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
71 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
72 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
73 |
74 | out = self.scratch.output_conv(path_1)
75 |
76 | return torch.squeeze(out, dim=1)
77 |
--------------------------------------------------------------------------------
/src/flux/annotator/mlsd/__init__.py:
--------------------------------------------------------------------------------
1 | # MLSD Line Detection
2 | # From https://github.com/navervision/mlsd
3 | # Apache-2.0 license
4 |
5 | import cv2
6 | import numpy as np
7 | import torch
8 | import os
9 |
10 | from einops import rearrange
11 | from huggingface_hub import hf_hub_download
12 | from .models.mbv2_mlsd_tiny import MobileV2_MLSD_Tiny
13 | from .models.mbv2_mlsd_large import MobileV2_MLSD_Large
14 | from .utils import pred_lines
15 |
16 | from ...annotator.util import annotator_ckpts_path
17 |
18 |
19 | class MLSDdetector:
20 | def __init__(self):
21 | model_path = os.path.join(annotator_ckpts_path, "mlsd_large_512_fp32.pth")
22 | if not os.path.exists(model_path):
23 | model_path = hf_hub_download("lllyasviel/Annotators", "mlsd_large_512_fp32.pth")
24 | model = MobileV2_MLSD_Large()
25 | model.load_state_dict(torch.load(model_path), strict=True)
26 | self.model = model.cuda().eval()
27 |
28 | def __call__(self, input_image, thr_v, thr_d):
29 | assert input_image.ndim == 3
30 | img = input_image
31 | img_output = np.zeros_like(img)
32 | try:
33 | with torch.no_grad():
34 | lines = pred_lines(img, self.model, [img.shape[0], img.shape[1]], thr_v, thr_d)
35 | for line in lines:
36 | x_start, y_start, x_end, y_end = [int(val) for val in line]
37 | cv2.line(img_output, (x_start, y_start), (x_end, y_end), [255, 255, 255], 1)
38 | except Exception as e:
39 | pass
40 | return img_output[:, :, 0]
41 |
--------------------------------------------------------------------------------
/src/flux/annotator/tile/__init__.py:
--------------------------------------------------------------------------------
1 | import random
2 | import cv2
3 | from .guided_filter import FastGuidedFilter
4 |
5 |
6 | class TileDetector:
7 | # https://huggingface.co/xinsir/controlnet-tile-sdxl-1.0
8 | def __init__(self):
9 | pass
10 |
11 | def __call__(self, image):
12 | blur_strength = random.sample([i / 10. for i in range(10, 201, 2)], k=1)[0]
13 | radius = random.sample([i for i in range(1, 40, 2)], k=1)[0]
14 | eps = random.sample([i / 1000. for i in range(1, 101, 2)], k=1)[0]
15 | scale_factor = random.sample([i / 10. for i in range(10, 181, 5)], k=1)[0]
16 |
17 | ksize = int(blur_strength)
18 | if ksize % 2 == 0:
19 | ksize += 1
20 |
21 | if random.random() > 0.5:
22 | image = cv2.GaussianBlur(image, (ksize, ksize), blur_strength / 2)
23 | if random.random() > 0.5:
24 | filter = FastGuidedFilter(image, radius, eps, scale_factor)
25 | image = filter.filter(image)
26 | return image
27 |
--------------------------------------------------------------------------------
/src/flux/annotator/util.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import os
4 |
5 |
6 | annotator_ckpts_path = os.path.join(os.path.dirname(__file__), 'ckpts')
7 |
8 |
9 | def HWC3(x):
10 | assert x.dtype == np.uint8
11 | if x.ndim == 2:
12 | x = x[:, :, None]
13 | assert x.ndim == 3
14 | H, W, C = x.shape
15 | assert C == 1 or C == 3 or C == 4
16 | if C == 3:
17 | return x
18 | if C == 1:
19 | return np.concatenate([x, x, x], axis=2)
20 | if C == 4:
21 | color = x[:, :, 0:3].astype(np.float32)
22 | alpha = x[:, :, 3:4].astype(np.float32) / 255.0
23 | y = color * alpha + 255.0 * (1.0 - alpha)
24 | y = y.clip(0, 255).astype(np.uint8)
25 | return y
26 |
27 |
28 | def resize_image(input_image, resolution):
29 | H, W, C = input_image.shape
30 | H = float(H)
31 | W = float(W)
32 | k = float(resolution) / min(H, W)
33 | H *= k
34 | W *= k
35 | H = int(np.round(H / 64.0)) * 64
36 | W = int(np.round(W / 64.0)) * 64
37 | img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
38 | return img
39 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/__init__.py:
--------------------------------------------------------------------------------
1 | # ZoeDepth
2 | # https://github.com/isl-org/ZoeDepth
3 |
4 | import os
5 | import cv2
6 | import numpy as np
7 | import torch
8 |
9 | from einops import rearrange
10 | from .zoedepth.models.zoedepth.zoedepth_v1 import ZoeDepth
11 | from .zoedepth.utils.config import get_config
12 | from ...annotator.util import annotator_ckpts_path
13 | from huggingface_hub import hf_hub_download
14 |
15 |
16 | class ZoeDetector:
17 | def __init__(self):
18 | model_path = os.path.join(annotator_ckpts_path, "ZoeD_M12_N.pt")
19 | if not os.path.exists(model_path):
20 | model_path = hf_hub_download("lllyasviel/Annotators", "ZoeD_M12_N.pt")
21 | conf = get_config("zoedepth", "infer")
22 | model = ZoeDepth.build_from_config(conf)
23 | model.load_state_dict(torch.load(model_path)['model'], strict=False)
24 | model = model.cuda()
25 | model.device = 'cuda'
26 | model.eval()
27 | self.model = model
28 |
29 | def __call__(self, input_image):
30 | assert input_image.ndim == 3
31 | image_depth = input_image
32 | with torch.no_grad():
33 | image_depth = torch.from_numpy(image_depth).float().cuda()
34 | image_depth = image_depth / 255.0
35 | image_depth = rearrange(image_depth, 'h w c -> 1 c h w')
36 | depth = self.model.infer(image_depth)
37 |
38 | depth = depth[0, 0].cpu().numpy()
39 |
40 | vmin = np.percentile(depth, 2)
41 | vmax = np.percentile(depth, 85)
42 |
43 | depth -= vmin
44 | depth /= vmax - vmin
45 | depth = 1.0 - depth
46 | depth_image = (depth * 255.0).clip(0, 255).astype(np.uint8)
47 |
48 | return depth_image
49 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/data/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/data/ibims.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | import os
26 |
27 | import numpy as np
28 | import torch
29 | from PIL import Image
30 | from torch.utils.data import DataLoader, Dataset
31 | from torchvision import transforms as T
32 |
33 |
34 | class iBims(Dataset):
35 | def __init__(self, config):
36 | root_folder = config.ibims_root
37 | with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f:
38 | imglist = f.read().split()
39 |
40 | samples = []
41 | for basename in imglist:
42 | img_path = os.path.join(root_folder, 'rgb', basename + ".png")
43 | depth_path = os.path.join(root_folder, 'depth', basename + ".png")
44 | valid_mask_path = os.path.join(
45 | root_folder, 'mask_invalid', basename+".png")
46 | transp_mask_path = os.path.join(
47 | root_folder, 'mask_transp', basename+".png")
48 |
49 | samples.append(
50 | (img_path, depth_path, valid_mask_path, transp_mask_path))
51 |
52 | self.samples = samples
53 | # self.normalize = T.Normalize(
54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
55 | self.normalize = lambda x : x
56 |
57 | def __getitem__(self, idx):
58 | img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx]
59 |
60 | img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0
61 | depth = np.asarray(Image.open(depth_path),
62 | dtype=np.uint16).astype('float')*50.0/65535
63 |
64 | mask_valid = np.asarray(Image.open(valid_mask_path))
65 | mask_transp = np.asarray(Image.open(transp_mask_path))
66 |
67 | # depth = depth * mask_valid * mask_transp
68 | depth = np.where(mask_valid * mask_transp, depth, -1)
69 |
70 | img = torch.from_numpy(img).permute(2, 0, 1)
71 | img = self.normalize(img)
72 | depth = torch.from_numpy(depth).unsqueeze(0)
73 | return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims')
74 |
75 | def __len__(self):
76 | return len(self.samples)
77 |
78 |
79 | def get_ibims_loader(config, batch_size=1, **kwargs):
80 | dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs)
81 | return dataloader
82 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | *.png
107 | *.pfm
108 | *.jpg
109 | *.jpeg
110 | *.pt
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/Dockerfile:
--------------------------------------------------------------------------------
1 | # enables cuda support in docker
2 | FROM nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04
3 |
4 | # install python 3.6, pip and requirements for opencv-python
5 | # (see https://github.com/NVIDIA/nvidia-docker/issues/864)
6 | RUN apt-get update && apt-get -y install \
7 | python3 \
8 | python3-pip \
9 | libsm6 \
10 | libxext6 \
11 | libxrender-dev \
12 | curl \
13 | && rm -rf /var/lib/apt/lists/*
14 |
15 | # install python dependencies
16 | RUN pip3 install --upgrade pip
17 | RUN pip3 install torch~=1.8 torchvision opencv-python-headless~=3.4 timm
18 |
19 | # copy inference code
20 | WORKDIR /opt/MiDaS
21 | COPY ./midas ./midas
22 | COPY ./*.py ./
23 |
24 | # download model weights so the docker image can be used offline
25 | RUN cd weights && {curl -OL https://github.com/isl-org/MiDaS/releases/download/v3/dpt_hybrid_384.pt; cd -; }
26 | RUN python3 run.py --model_type dpt_hybrid; exit 0
27 |
28 | # entrypoint (dont forget to mount input and output directories)
29 | CMD python3 run.py --model_type dpt_hybrid
30 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/environment.yaml:
--------------------------------------------------------------------------------
1 | name: midas-py310
2 | channels:
3 | - pytorch
4 | - defaults
5 | dependencies:
6 | - nvidia::cudatoolkit=11.7
7 | - python=3.10.8
8 | - pytorch::pytorch=1.13.0
9 | - torchvision=0.14.0
10 | - pip=22.3.1
11 | - numpy=1.23.4
12 | - pip:
13 | - opencv-python==4.6.0.66
14 | - imutils==0.5.4
15 | - timm==0.6.12
16 | - einops==0.6.0
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/input/.placeholder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/input/.placeholder
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/levit.py:
--------------------------------------------------------------------------------
1 | import timm
2 | import torch
3 | import torch.nn as nn
4 | import numpy as np
5 |
6 | from .utils import activations, get_activation, Transpose
7 |
8 |
9 | def forward_levit(pretrained, x):
10 | pretrained.model.forward_features(x)
11 |
12 | layer_1 = pretrained.activations["1"]
13 | layer_2 = pretrained.activations["2"]
14 | layer_3 = pretrained.activations["3"]
15 |
16 | layer_1 = pretrained.act_postprocess1(layer_1)
17 | layer_2 = pretrained.act_postprocess2(layer_2)
18 | layer_3 = pretrained.act_postprocess3(layer_3)
19 |
20 | return layer_1, layer_2, layer_3
21 |
22 |
23 | def _make_levit_backbone(
24 | model,
25 | hooks=[3, 11, 21],
26 | patch_grid=[14, 14]
27 | ):
28 | pretrained = nn.Module()
29 |
30 | pretrained.model = model
31 | pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1"))
32 | pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2"))
33 | pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3"))
34 |
35 | pretrained.activations = activations
36 |
37 | patch_grid_size = np.array(patch_grid, dtype=int)
38 |
39 | pretrained.act_postprocess1 = nn.Sequential(
40 | Transpose(1, 2),
41 | nn.Unflatten(2, torch.Size(patch_grid_size.tolist()))
42 | )
43 | pretrained.act_postprocess2 = nn.Sequential(
44 | Transpose(1, 2),
45 | nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 2).astype(int)).tolist()))
46 | )
47 | pretrained.act_postprocess3 = nn.Sequential(
48 | Transpose(1, 2),
49 | nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 4).astype(int)).tolist()))
50 | )
51 |
52 | return pretrained
53 |
54 |
55 | class ConvTransposeNorm(nn.Sequential):
56 | """
57 | Modification of
58 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: ConvNorm
59 | such that ConvTranspose2d is used instead of Conv2d.
60 | """
61 |
62 | def __init__(
63 | self, in_chs, out_chs, kernel_size=1, stride=1, pad=0, dilation=1,
64 | groups=1, bn_weight_init=1):
65 | super().__init__()
66 | self.add_module('c',
67 | nn.ConvTranspose2d(in_chs, out_chs, kernel_size, stride, pad, dilation, groups, bias=False))
68 | self.add_module('bn', nn.BatchNorm2d(out_chs))
69 |
70 | nn.init.constant_(self.bn.weight, bn_weight_init)
71 |
72 | @torch.no_grad()
73 | def fuse(self):
74 | c, bn = self._modules.values()
75 | w = bn.weight / (bn.running_var + bn.eps) ** 0.5
76 | w = c.weight * w[:, None, None, None]
77 | b = bn.bias - bn.running_mean * bn.weight / (bn.running_var + bn.eps) ** 0.5
78 | m = nn.ConvTranspose2d(
79 | w.size(1), w.size(0), w.shape[2:], stride=self.c.stride,
80 | padding=self.c.padding, dilation=self.c.dilation, groups=self.c.groups)
81 | m.weight.data.copy_(w)
82 | m.bias.data.copy_(b)
83 | return m
84 |
85 |
86 | def stem_b4_transpose(in_chs, out_chs, activation):
87 | """
88 | Modification of
89 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: stem_b16
90 | such that ConvTranspose2d is used instead of Conv2d and stem is also reduced to the half.
91 | """
92 | return nn.Sequential(
93 | ConvTransposeNorm(in_chs, out_chs, 3, 2, 1),
94 | activation(),
95 | ConvTransposeNorm(out_chs, out_chs // 2, 3, 2, 1),
96 | activation())
97 |
98 |
99 | def _make_pretrained_levit_384(pretrained, hooks=None):
100 | model = timm.create_model("levit_384", pretrained=pretrained)
101 |
102 | hooks = [3, 11, 21] if hooks == None else hooks
103 | return _make_levit_backbone(
104 | model,
105 | hooks=hooks
106 | )
107 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/next_vit.py:
--------------------------------------------------------------------------------
1 | import timm
2 |
3 | import torch.nn as nn
4 |
5 | from pathlib import Path
6 | from .utils import activations, forward_default, get_activation
7 |
8 | from ..external.next_vit.classification.nextvit import *
9 |
10 |
11 | def forward_next_vit(pretrained, x):
12 | return forward_default(pretrained, x, "forward")
13 |
14 |
15 | def _make_next_vit_backbone(
16 | model,
17 | hooks=[2, 6, 36, 39],
18 | ):
19 | pretrained = nn.Module()
20 |
21 | pretrained.model = model
22 | pretrained.model.features[hooks[0]].register_forward_hook(get_activation("1"))
23 | pretrained.model.features[hooks[1]].register_forward_hook(get_activation("2"))
24 | pretrained.model.features[hooks[2]].register_forward_hook(get_activation("3"))
25 | pretrained.model.features[hooks[3]].register_forward_hook(get_activation("4"))
26 |
27 | pretrained.activations = activations
28 |
29 | return pretrained
30 |
31 |
32 | def _make_pretrained_next_vit_large_6m(hooks=None):
33 | model = timm.create_model("nextvit_large")
34 |
35 | hooks = [2, 6, 36, 39] if hooks == None else hooks
36 | return _make_next_vit_backbone(
37 | model,
38 | hooks=hooks,
39 | )
40 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin.py:
--------------------------------------------------------------------------------
1 | import timm
2 |
3 | from .swin_common import _make_swin_backbone
4 |
5 |
6 | def _make_pretrained_swinl12_384(pretrained, hooks=None):
7 | model = timm.create_model("swin_large_patch4_window12_384", pretrained=pretrained)
8 |
9 | hooks = [1, 1, 17, 1] if hooks == None else hooks
10 | return _make_swin_backbone(
11 | model,
12 | hooks=hooks
13 | )
14 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin2.py:
--------------------------------------------------------------------------------
1 | import timm
2 |
3 | from .swin_common import _make_swin_backbone
4 |
5 |
6 | def _make_pretrained_swin2l24_384(pretrained, hooks=None):
7 | model = timm.create_model("swinv2_large_window12to24_192to384_22kft1k", pretrained=pretrained)
8 |
9 | hooks = [1, 1, 17, 1] if hooks == None else hooks
10 | return _make_swin_backbone(
11 | model,
12 | hooks=hooks
13 | )
14 |
15 |
16 | def _make_pretrained_swin2b24_384(pretrained, hooks=None):
17 | model = timm.create_model("swinv2_base_window12to24_192to384_22kft1k", pretrained=pretrained)
18 |
19 | hooks = [1, 1, 17, 1] if hooks == None else hooks
20 | return _make_swin_backbone(
21 | model,
22 | hooks=hooks
23 | )
24 |
25 |
26 | def _make_pretrained_swin2t16_256(pretrained, hooks=None):
27 | model = timm.create_model("swinv2_tiny_window16_256", pretrained=pretrained)
28 |
29 | hooks = [1, 1, 5, 1] if hooks == None else hooks
30 | return _make_swin_backbone(
31 | model,
32 | hooks=hooks,
33 | patch_grid=[64, 64]
34 | )
35 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin_common.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | import torch.nn as nn
4 | import numpy as np
5 |
6 | from .utils import activations, forward_default, get_activation, Transpose
7 |
8 |
9 | def forward_swin(pretrained, x):
10 | return forward_default(pretrained, x)
11 |
12 |
13 | def _make_swin_backbone(
14 | model,
15 | hooks=[1, 1, 17, 1],
16 | patch_grid=[96, 96]
17 | ):
18 | pretrained = nn.Module()
19 |
20 | pretrained.model = model
21 | pretrained.model.layers[0].blocks[hooks[0]].register_forward_hook(get_activation("1"))
22 | pretrained.model.layers[1].blocks[hooks[1]].register_forward_hook(get_activation("2"))
23 | pretrained.model.layers[2].blocks[hooks[2]].register_forward_hook(get_activation("3"))
24 | pretrained.model.layers[3].blocks[hooks[3]].register_forward_hook(get_activation("4"))
25 |
26 | pretrained.activations = activations
27 |
28 | if hasattr(model, "patch_grid"):
29 | used_patch_grid = model.patch_grid
30 | else:
31 | used_patch_grid = patch_grid
32 |
33 | patch_grid_size = np.array(used_patch_grid, dtype=int)
34 |
35 | pretrained.act_postprocess1 = nn.Sequential(
36 | Transpose(1, 2),
37 | nn.Unflatten(2, torch.Size(patch_grid_size.tolist()))
38 | )
39 | pretrained.act_postprocess2 = nn.Sequential(
40 | Transpose(1, 2),
41 | nn.Unflatten(2, torch.Size((patch_grid_size // 2).tolist()))
42 | )
43 | pretrained.act_postprocess3 = nn.Sequential(
44 | Transpose(1, 2),
45 | nn.Unflatten(2, torch.Size((patch_grid_size // 4).tolist()))
46 | )
47 | pretrained.act_postprocess4 = nn.Sequential(
48 | Transpose(1, 2),
49 | nn.Unflatten(2, torch.Size((patch_grid_size // 8).tolist()))
50 | )
51 |
52 | return pretrained
53 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/base_model.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class BaseModel(torch.nn.Module):
5 | def load(self, path):
6 | """Load model from file.
7 |
8 | Args:
9 | path (str): file path
10 | """
11 | parameters = torch.load(path, map_location=torch.device('cpu'))
12 |
13 | if "optimizer" in parameters:
14 | parameters = parameters["model"]
15 |
16 | self.load_state_dict(parameters)
17 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net.py:
--------------------------------------------------------------------------------
1 | """MidashNet: Network for monocular depth estimation trained by mixing several datasets.
2 | This file contains code that is adapted from
3 | https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
4 | """
5 | import torch
6 | import torch.nn as nn
7 |
8 | from .base_model import BaseModel
9 | from .blocks import FeatureFusionBlock, Interpolate, _make_encoder
10 |
11 |
12 | class MidasNet(BaseModel):
13 | """Network for monocular depth estimation.
14 | """
15 |
16 | def __init__(self, path=None, features=256, non_negative=True):
17 | """Init.
18 |
19 | Args:
20 | path (str, optional): Path to saved model. Defaults to None.
21 | features (int, optional): Number of features. Defaults to 256.
22 | backbone (str, optional): Backbone network for encoder. Defaults to resnet50
23 | """
24 | print("Loading weights: ", path)
25 |
26 | super(MidasNet, self).__init__()
27 |
28 | use_pretrained = False if path is None else True
29 |
30 | self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained)
31 |
32 | self.scratch.refinenet4 = FeatureFusionBlock(features)
33 | self.scratch.refinenet3 = FeatureFusionBlock(features)
34 | self.scratch.refinenet2 = FeatureFusionBlock(features)
35 | self.scratch.refinenet1 = FeatureFusionBlock(features)
36 |
37 | self.scratch.output_conv = nn.Sequential(
38 | nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1),
39 | Interpolate(scale_factor=2, mode="bilinear"),
40 | nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1),
41 | nn.ReLU(True),
42 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
43 | nn.ReLU(True) if non_negative else nn.Identity(),
44 | )
45 |
46 | if path:
47 | self.load(path)
48 |
49 | def forward(self, x):
50 | """Forward pass.
51 |
52 | Args:
53 | x (tensor): input data (image)
54 |
55 | Returns:
56 | tensor: depth
57 | """
58 |
59 | layer_1 = self.pretrained.layer1(x)
60 | layer_2 = self.pretrained.layer2(layer_1)
61 | layer_3 = self.pretrained.layer3(layer_2)
62 | layer_4 = self.pretrained.layer4(layer_3)
63 |
64 | layer_1_rn = self.scratch.layer1_rn(layer_1)
65 | layer_2_rn = self.scratch.layer2_rn(layer_2)
66 | layer_3_rn = self.scratch.layer3_rn(layer_3)
67 | layer_4_rn = self.scratch.layer4_rn(layer_4)
68 |
69 | path_4 = self.scratch.refinenet4(layer_4_rn)
70 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
71 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
72 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
73 |
74 | out = self.scratch.output_conv(path_1)
75 |
76 | return torch.squeeze(out, dim=1)
77 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/README.md:
--------------------------------------------------------------------------------
1 | ## Mobile version of MiDaS for iOS / Android - Monocular Depth Estimation
2 |
3 | ### Accuracy
4 |
5 | * Old small model - ResNet50 default-decoder 384x384
6 | * New small model - EfficientNet-Lite3 small-decoder 256x256
7 |
8 | **Zero-shot error** (the lower - the better):
9 |
10 | | Model | DIW WHDR | Eth3d AbsRel | Sintel AbsRel | Kitti δ>1.25 | NyuDepthV2 δ>1.25 | TUM δ>1.25 |
11 | |---|---|---|---|---|---|---|
12 | | Old small model 384x384 | **0.1248** | 0.1550 | **0.3300** | **21.81** | 15.73 | 17.00 |
13 | | New small model 256x256 | 0.1344 | **0.1344** | 0.3370 | 29.27 | **13.43** | **14.53** |
14 | | Relative improvement, % | -8 % | **+13 %** | -2 % | -34 % | **+15 %** | **+15 %** |
15 |
16 | None of Train/Valid/Test subsets of datasets (DIW, Eth3d, Sintel, Kitti, NyuDepthV2, TUM) were not involved in Training or Fine Tuning.
17 |
18 | ### Inference speed (FPS) on iOS / Android
19 |
20 | **Frames Per Second** (the higher - the better):
21 |
22 | | Model | iPhone CPU | iPhone GPU | iPhone NPU | OnePlus8 CPU | OnePlus8 GPU | OnePlus8 NNAPI |
23 | |---|---|---|---|---|---|---|
24 | | Old small model 384x384 | 0.6 | N/A | N/A | 0.45 | 0.50 | 0.50 |
25 | | New small model 256x256 | 8 | 22 | **30** | 6 | **22** | 4 |
26 | | SpeedUp, X times | **12.8x** | - | - | **13.2x** | **44x** | **8x** |
27 |
28 | N/A - run-time error (no data available)
29 |
30 |
31 | #### Models:
32 |
33 | * Old small model - ResNet50 default-decoder 1x384x384x3, batch=1 FP32 (converters: Pytorch -> ONNX - [onnx_tf](https://github.com/onnx/onnx-tensorflow) -> (saved model) PB -> TFlite)
34 |
35 | (Trained on datasets: RedWeb, MegaDepth, WSVD, 3D Movies, DIML indoor)
36 |
37 | * New small model - EfficientNet-Lite3 small-decoder 1x256x256x3, batch=1 FP32 (custom converter: Pytorch -> TFlite)
38 |
39 | (Trained on datasets: RedWeb, MegaDepth, WSVD, 3D Movies, DIML indoor, HRWSI, IRS, TartanAir, BlendedMVS, ApolloScape)
40 |
41 | #### Frameworks for training and conversions:
42 | ```
43 | pip install torch==1.6.0 torchvision==0.7.0
44 | pip install tf-nightly-gpu==2.5.0.dev20201031 tensorflow-addons==0.11.2 numpy==1.18.0
45 | git clone --depth 1 --branch v1.6.0 https://github.com/onnx/onnx-tensorflow
46 | ```
47 |
48 | #### SoC - OS - Library:
49 |
50 | * iPhone 11 (A13 Bionic) - iOS 13.7 - TensorFlowLiteSwift 0.0.1-nightly
51 | * OnePlus 8 (Snapdragon 865) - Andoird 10 - org.tensorflow:tensorflow-lite-task-vision:0.0.0-nightly
52 |
53 |
54 | ### Citation
55 |
56 | This repository contains code to compute depth from a single image. It accompanies our [paper](https://arxiv.org/abs/1907.01341v3):
57 |
58 | >Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer
59 | René Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, Vladlen Koltun
60 |
61 | Please cite our paper if you use this code or any of the models:
62 | ```
63 | @article{Ranftl2020,
64 | author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun},
65 | title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer},
66 | journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
67 | year = {2020},
68 | }
69 | ```
70 |
71 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/.gitignore:
--------------------------------------------------------------------------------
1 | *.iml
2 | .gradle
3 | /local.properties
4 | /.idea/libraries
5 | /.idea/modules.xml
6 | /.idea/workspace.xml
7 | .DS_Store
8 | /build
9 | /captures
10 | .externalNativeBuild
11 |
12 | /.gradle/
13 | /.idea/
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Alexey
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/README.md:
--------------------------------------------------------------------------------
1 | # MiDaS on Android smartphone by using TensorFlow-lite (TFLite)
2 |
3 |
4 | * Either use Android Studio for compilation.
5 |
6 | * Or use ready to install apk-file:
7 | * Or use URL: https://i.diawi.com/CVb8a9
8 | * Or use QR-code:
9 |
10 | Scan QR-code or open URL -> Press `Install application` -> Press `Download` and wait for download -> Open -> Install -> Open -> Press: Allow MiDaS to take photo and video from the camera While using the APP
11 |
12 | 
13 |
14 | ----
15 |
16 | To use another model, you should convert it to `model_opt.tflite` and place it to the directory: `models\src\main\assets`
17 |
18 |
19 | ----
20 |
21 | Original repository: https://github.com/isl-org/MiDaS
22 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 |
3 | /build/
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/build.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'com.android.application'
2 |
3 | android {
4 | compileSdkVersion 28
5 | defaultConfig {
6 | applicationId "org.tensorflow.lite.examples.classification"
7 | minSdkVersion 21
8 | targetSdkVersion 28
9 | versionCode 1
10 | versionName "1.0"
11 |
12 | testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
13 | }
14 | buildTypes {
15 | release {
16 | minifyEnabled false
17 | proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro'
18 | }
19 | }
20 | aaptOptions {
21 | noCompress "tflite"
22 | }
23 | compileOptions {
24 | sourceCompatibility = '1.8'
25 | targetCompatibility = '1.8'
26 | }
27 | lintOptions {
28 | abortOnError false
29 | }
30 | flavorDimensions "tfliteInference"
31 | productFlavors {
32 | // The TFLite inference is built using the TFLite Support library.
33 | support {
34 | dimension "tfliteInference"
35 | }
36 | // The TFLite inference is built using the TFLite Task library.
37 | taskApi {
38 | dimension "tfliteInference"
39 | }
40 | }
41 |
42 | }
43 |
44 | dependencies {
45 | implementation fileTree(dir: 'libs', include: ['*.jar'])
46 | supportImplementation project(":lib_support")
47 | taskApiImplementation project(":lib_task_api")
48 | implementation 'androidx.appcompat:appcompat:1.0.0'
49 | implementation 'androidx.coordinatorlayout:coordinatorlayout:1.0.0'
50 | implementation 'com.google.android.material:material:1.0.0'
51 |
52 | androidTestImplementation 'androidx.test.ext:junit:1.1.1'
53 | androidTestImplementation 'com.google.truth:truth:1.0.1'
54 | androidTestImplementation 'androidx.test:runner:1.2.0'
55 | androidTestImplementation 'androidx.test:rules:1.1.0'
56 | }
57 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/proguard-rules.pro:
--------------------------------------------------------------------------------
1 | # Add project specific ProGuard rules here.
2 | # You can control the set of applied configuration files using the
3 | # proguardFiles setting in build.gradle.
4 | #
5 | # For more details, see
6 | # http://developer.android.com/guide/developing/tools/proguard.html
7 |
8 | # If your project uses WebView with JS, uncomment the following
9 | # and specify the fully qualified class name to the JavaScript interface
10 | # class:
11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview {
12 | # public *;
13 | #}
14 |
15 | # Uncomment this to preserve the line number information for
16 | # debugging stack traces.
17 | #-keepattributes SourceFile,LineNumberTable
18 |
19 | # If you keep the line number information, uncomment this to
20 | # hide the original source file name.
21 | #-renamesourcefileattribute SourceFile
22 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/androidTest/assets/fox-mobilenet_v1_1.0_224_support.txt:
--------------------------------------------------------------------------------
1 | red_fox 0.79403335
2 | kit_fox 0.16753247
3 | grey_fox 0.03619214
4 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/androidTest/assets/fox-mobilenet_v1_1.0_224_task_api.txt:
--------------------------------------------------------------------------------
1 | red_fox 0.85
2 | kit_fox 0.13
3 | grey_fox 0.02
4 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/androidTest/java/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
18 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/AutoFitTextureView.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package org.tensorflow.lite.examples.classification.customview;
18 |
19 | import android.content.Context;
20 | import android.util.AttributeSet;
21 | import android.view.TextureView;
22 |
23 | /** A {@link TextureView} that can be adjusted to a specified aspect ratio. */
24 | public class AutoFitTextureView extends TextureView {
25 | private int ratioWidth = 0;
26 | private int ratioHeight = 0;
27 |
28 | public AutoFitTextureView(final Context context) {
29 | this(context, null);
30 | }
31 |
32 | public AutoFitTextureView(final Context context, final AttributeSet attrs) {
33 | this(context, attrs, 0);
34 | }
35 |
36 | public AutoFitTextureView(final Context context, final AttributeSet attrs, final int defStyle) {
37 | super(context, attrs, defStyle);
38 | }
39 |
40 | /**
41 | * Sets the aspect ratio for this view. The size of the view will be measured based on the ratio
42 | * calculated from the parameters. Note that the actual sizes of parameters don't matter, that is,
43 | * calling setAspectRatio(2, 3) and setAspectRatio(4, 6) make the same result.
44 | *
45 | * @param width Relative horizontal size
46 | * @param height Relative vertical size
47 | */
48 | public void setAspectRatio(final int width, final int height) {
49 | if (width < 0 || height < 0) {
50 | throw new IllegalArgumentException("Size cannot be negative.");
51 | }
52 | ratioWidth = width;
53 | ratioHeight = height;
54 | requestLayout();
55 | }
56 |
57 | @Override
58 | protected void onMeasure(final int widthMeasureSpec, final int heightMeasureSpec) {
59 | super.onMeasure(widthMeasureSpec, heightMeasureSpec);
60 | final int width = MeasureSpec.getSize(widthMeasureSpec);
61 | final int height = MeasureSpec.getSize(heightMeasureSpec);
62 | if (0 == ratioWidth || 0 == ratioHeight) {
63 | setMeasuredDimension(width, height);
64 | } else {
65 | if (width < height * ratioWidth / ratioHeight) {
66 | setMeasuredDimension(width, width * ratioHeight / ratioWidth);
67 | } else {
68 | setMeasuredDimension(height * ratioWidth / ratioHeight, height);
69 | }
70 | }
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/OverlayView.java:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | package org.tensorflow.lite.examples.classification.customview;
17 |
18 | import android.content.Context;
19 | import android.graphics.Canvas;
20 | import android.util.AttributeSet;
21 | import android.view.View;
22 | import java.util.LinkedList;
23 | import java.util.List;
24 |
25 | /** A simple View providing a render callback to other classes. */
26 | public class OverlayView extends View {
27 | private final List callbacks = new LinkedList();
28 |
29 | public OverlayView(final Context context, final AttributeSet attrs) {
30 | super(context, attrs);
31 | }
32 |
33 | public void addCallback(final DrawCallback callback) {
34 | callbacks.add(callback);
35 | }
36 |
37 | @Override
38 | public synchronized void draw(final Canvas canvas) {
39 | for (final DrawCallback callback : callbacks) {
40 | callback.drawCallback(canvas);
41 | }
42 | }
43 |
44 | /** Interface defining the callback for client classes. */
45 | public interface DrawCallback {
46 | public void drawCallback(final Canvas canvas);
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/RecognitionScoreView.java:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | package org.tensorflow.lite.examples.classification.customview;
17 |
18 | import android.content.Context;
19 | import android.graphics.Canvas;
20 | import android.graphics.Paint;
21 | import android.util.AttributeSet;
22 | import android.util.TypedValue;
23 | import android.view.View;
24 | import java.util.List;
25 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Recognition;
26 |
27 | public class RecognitionScoreView extends View implements ResultsView {
28 | private static final float TEXT_SIZE_DIP = 16;
29 | private final float textSizePx;
30 | private final Paint fgPaint;
31 | private final Paint bgPaint;
32 | private List results;
33 |
34 | public RecognitionScoreView(final Context context, final AttributeSet set) {
35 | super(context, set);
36 |
37 | textSizePx =
38 | TypedValue.applyDimension(
39 | TypedValue.COMPLEX_UNIT_DIP, TEXT_SIZE_DIP, getResources().getDisplayMetrics());
40 | fgPaint = new Paint();
41 | fgPaint.setTextSize(textSizePx);
42 |
43 | bgPaint = new Paint();
44 | bgPaint.setColor(0xcc4285f4);
45 | }
46 |
47 | @Override
48 | public void setResults(final List results) {
49 | this.results = results;
50 | postInvalidate();
51 | }
52 |
53 | @Override
54 | public void onDraw(final Canvas canvas) {
55 | final int x = 10;
56 | int y = (int) (fgPaint.getTextSize() * 1.5f);
57 |
58 | canvas.drawPaint(bgPaint);
59 |
60 | if (results != null) {
61 | for (final Recognition recog : results) {
62 | canvas.drawText(recog.getTitle() + ": " + recog.getConfidence(), x, y, fgPaint);
63 | y += (int) (fgPaint.getTextSize() * 1.5f);
64 | }
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/ResultsView.java:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | package org.tensorflow.lite.examples.classification.customview;
17 |
18 | import java.util.List;
19 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Recognition;
20 |
21 | public interface ResultsView {
22 | public void setResults(final List results);
23 | }
24 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml:
--------------------------------------------------------------------------------
1 |
7 |
12 |
13 |
19 |
22 |
25 |
26 |
27 |
28 |
34 |
35 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/drawable/bottom_sheet_bg.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/drawable/ic_baseline_add.xml:
--------------------------------------------------------------------------------
1 |
6 |
9 |
10 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/drawable/ic_baseline_remove.xml:
--------------------------------------------------------------------------------
1 |
6 |
9 |
10 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/drawable/rectangle.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
7 |
12 |
13 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/layout/tfe_ic_activity_camera.xml:
--------------------------------------------------------------------------------
1 |
16 |
21 |
22 |
27 |
28 |
29 |
36 |
37 |
38 |
44 |
45 |
49 |
50 |
51 |
52 |
53 |
56 |
57 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/layout/tfe_ic_camera_connection_fragment.xml:
--------------------------------------------------------------------------------
1 |
16 |
19 |
20 |
25 |
26 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/values/colors.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | #ffa800
4 | #ff6f00
5 | #425066
6 |
7 | #66000000
8 |
9 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/values/dimens.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | 15dp
4 | 8dp
5 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/values/strings.xml:
--------------------------------------------------------------------------------
1 |
2 | Midas
3 | This device doesn\'t support Camera2 API.
4 | GPU does not yet supported quantized models.
5 | Model:
6 |
7 | - Float_EfficientNet
8 |
13 |
14 |
15 | Device:
16 |
17 | - GPU
18 | - CPU
19 | - NNAPI
20 |
21 |
22 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/values/styles.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/build.gradle:
--------------------------------------------------------------------------------
1 | // Top-level build file where you can add configuration options common to all sub-projects/modules.
2 |
3 | buildscript {
4 |
5 | repositories {
6 | google()
7 | jcenter()
8 | }
9 | dependencies {
10 | classpath 'com.android.tools.build:gradle:4.0.0'
11 | classpath 'de.undercouch:gradle-download-task:4.0.2'
12 | // NOTE: Do not place your application dependencies here; they belong
13 | // in the individual module build.gradle files
14 | }
15 | }
16 |
17 | allprojects {
18 | repositories {
19 | google()
20 | jcenter()
21 | }
22 | }
23 |
24 | task clean(type: Delete) {
25 | delete rootProject.buildDir
26 | }
27 |
28 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/gradle.properties:
--------------------------------------------------------------------------------
1 | # Project-wide Gradle settings.
2 | # IDE (e.g. Android Studio) users:
3 | # Gradle settings configured through the IDE *will override*
4 | # any settings specified in this file.
5 | # For more details on how to configure your build environment visit
6 | # http://www.gradle.org/docs/current/userguide/build_environment.html
7 | # Specifies the JVM arguments used for the daemon process.
8 | # The setting is particularly useful for tweaking memory settings.
9 | org.gradle.jvmargs=-Xmx1536m
10 | # When configured, Gradle will run in incubating parallel mode.
11 | # This option should only be used with decoupled projects. More details, visit
12 | # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
13 | # org.gradle.parallel=true
14 | android.useAndroidX=true
15 | android.enableJetifier=true
16 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionBase=GRADLE_USER_HOME
2 | distributionPath=wrapper/dists
3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.1.1-bin.zip
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/gradlew.bat:
--------------------------------------------------------------------------------
1 | @rem
2 | @rem Copyright 2015 the original author or authors.
3 | @rem
4 | @rem Licensed under the Apache License, Version 2.0 (the "License");
5 | @rem you may not use this file except in compliance with the License.
6 | @rem You may obtain a copy of the License at
7 | @rem
8 | @rem https://www.apache.org/licenses/LICENSE-2.0
9 | @rem
10 | @rem Unless required by applicable law or agreed to in writing, software
11 | @rem distributed under the License is distributed on an "AS IS" BASIS,
12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | @rem See the License for the specific language governing permissions and
14 | @rem limitations under the License.
15 | @rem
16 |
17 | @if "%DEBUG%" == "" @echo off
18 | @rem ##########################################################################
19 | @rem
20 | @rem Gradle startup script for Windows
21 | @rem
22 | @rem ##########################################################################
23 |
24 | @rem Set local scope for the variables with windows NT shell
25 | if "%OS%"=="Windows_NT" setlocal
26 |
27 | set DIRNAME=%~dp0
28 | if "%DIRNAME%" == "" set DIRNAME=.
29 | set APP_BASE_NAME=%~n0
30 | set APP_HOME=%DIRNAME%
31 |
32 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
33 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
34 |
35 | @rem Find java.exe
36 | if defined JAVA_HOME goto findJavaFromJavaHome
37 |
38 | set JAVA_EXE=java.exe
39 | %JAVA_EXE% -version >NUL 2>&1
40 | if "%ERRORLEVEL%" == "0" goto init
41 |
42 | echo.
43 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
44 | echo.
45 | echo Please set the JAVA_HOME variable in your environment to match the
46 | echo location of your Java installation.
47 |
48 | goto fail
49 |
50 | :findJavaFromJavaHome
51 | set JAVA_HOME=%JAVA_HOME:"=%
52 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
53 |
54 | if exist "%JAVA_EXE%" goto init
55 |
56 | echo.
57 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
58 | echo.
59 | echo Please set the JAVA_HOME variable in your environment to match the
60 | echo location of your Java installation.
61 |
62 | goto fail
63 |
64 | :init
65 | @rem Get command-line arguments, handling Windows variants
66 |
67 | if not "%OS%" == "Windows_NT" goto win9xME_args
68 |
69 | :win9xME_args
70 | @rem Slurp the command line arguments.
71 | set CMD_LINE_ARGS=
72 | set _SKIP=2
73 |
74 | :win9xME_args_slurp
75 | if "x%~1" == "x" goto execute
76 |
77 | set CMD_LINE_ARGS=%*
78 |
79 | :execute
80 | @rem Setup the command line
81 |
82 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
83 |
84 | @rem Execute Gradle
85 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
86 |
87 | :end
88 | @rem End local scope for the variables with windows NT shell
89 | if "%ERRORLEVEL%"=="0" goto mainEnd
90 |
91 | :fail
92 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
93 | rem the _cmd.exe /c_ return code!
94 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
95 | exit /b 1
96 |
97 | :mainEnd
98 | if "%OS%"=="Windows_NT" endlocal
99 |
100 | :omega
101 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/build.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'com.android.library'
2 |
3 | android {
4 | compileSdkVersion 28
5 | buildToolsVersion "28.0.0"
6 |
7 | defaultConfig {
8 | minSdkVersion 21
9 | targetSdkVersion 28
10 | versionCode 1
11 | versionName "1.0"
12 |
13 | testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
14 |
15 | }
16 |
17 | buildTypes {
18 | release {
19 | minifyEnabled false
20 | proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
21 | }
22 | }
23 |
24 | aaptOptions {
25 | noCompress "tflite"
26 | }
27 |
28 | lintOptions {
29 | checkReleaseBuilds false
30 | // Or, if you prefer, you can continue to check for errors in release builds,
31 | // but continue the build even when errors are found:
32 | abortOnError false
33 | }
34 | }
35 |
36 | dependencies {
37 | implementation fileTree(dir: 'libs', include: ['*.jar'])
38 | implementation project(":models")
39 | implementation 'androidx.appcompat:appcompat:1.1.0'
40 |
41 | // Build off of nightly TensorFlow Lite
42 | implementation('org.tensorflow:tensorflow-lite:0.0.0-nightly') { changing = true }
43 | implementation('org.tensorflow:tensorflow-lite-gpu:0.0.0-nightly') { changing = true }
44 | implementation('org.tensorflow:tensorflow-lite-support:0.0.0-nightly') { changing = true }
45 | // Use local TensorFlow library
46 | // implementation 'org.tensorflow:tensorflow-lite-local:0.0.0'
47 | }
48 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/proguard-rules.pro:
--------------------------------------------------------------------------------
1 | # Add project specific ProGuard rules here.
2 | # You can control the set of applied configuration files using the
3 | # proguardFiles setting in build.gradle.
4 | #
5 | # For more details, see
6 | # http://developer.android.com/guide/developing/tools/proguard.html
7 |
8 | # If your project uses WebView with JS, uncomment the following
9 | # and specify the fully qualified class name to the JavaScript interface
10 | # class:
11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview {
12 | # public *;
13 | #}
14 |
15 | # Uncomment this to preserve the line number information for
16 | # debugging stack traces.
17 | #-keepattributes SourceFile,LineNumberTable
18 |
19 | # If you keep the line number information, uncomment this to
20 | # hide the original source file name.
21 | #-renamesourcefileattribute SourceFile
22 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/src/main/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatEfficientNet.java:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | package org.tensorflow.lite.examples.classification.tflite;
17 |
18 | import android.app.Activity;
19 | import java.io.IOException;
20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device;
21 | import org.tensorflow.lite.support.common.TensorOperator;
22 | import org.tensorflow.lite.support.common.ops.NormalizeOp;
23 |
24 | /** This TensorFlowLite classifier works with the float EfficientNet model. */
25 | public class ClassifierFloatEfficientNet extends Classifier {
26 |
27 | private static final float IMAGE_MEAN = 115.0f; //127.0f;
28 | private static final float IMAGE_STD = 58.0f; //128.0f;
29 |
30 | /**
31 | * Float model does not need dequantization in the post-processing. Setting mean and std as 0.0f
32 | * and 1.0f, repectively, to bypass the normalization.
33 | */
34 | private static final float PROBABILITY_MEAN = 0.0f;
35 |
36 | private static final float PROBABILITY_STD = 1.0f;
37 |
38 | /**
39 | * Initializes a {@code ClassifierFloatMobileNet}.
40 | *
41 | * @param activity
42 | */
43 | public ClassifierFloatEfficientNet(Activity activity, Device device, int numThreads)
44 | throws IOException {
45 | super(activity, device, numThreads);
46 | }
47 |
48 | @Override
49 | protected String getModelPath() {
50 | // you can download this file from
51 | // see build.gradle for where to obtain this file. It should be auto
52 | // downloaded into assets.
53 | //return "efficientnet-lite0-fp32.tflite";
54 | return "model_opt.tflite";
55 | }
56 |
57 | @Override
58 | protected String getLabelPath() {
59 | return "labels_without_background.txt";
60 | }
61 |
62 | @Override
63 | protected TensorOperator getPreprocessNormalizeOp() {
64 | return new NormalizeOp(IMAGE_MEAN, IMAGE_STD);
65 | }
66 |
67 | @Override
68 | protected TensorOperator getPostprocessNormalizeOp() {
69 | return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD);
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | package org.tensorflow.lite.examples.classification.tflite;
17 |
18 | import android.app.Activity;
19 | import java.io.IOException;
20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device;
21 | import org.tensorflow.lite.support.common.TensorOperator;
22 | import org.tensorflow.lite.support.common.ops.NormalizeOp;
23 |
24 | /** This TensorFlowLite classifier works with the float MobileNet model. */
25 | public class ClassifierFloatMobileNet extends Classifier {
26 |
27 | /** Float MobileNet requires additional normalization of the used input. */
28 | private static final float IMAGE_MEAN = 127.5f;
29 |
30 | private static final float IMAGE_STD = 127.5f;
31 |
32 | /**
33 | * Float model does not need dequantization in the post-processing. Setting mean and std as 0.0f
34 | * and 1.0f, repectively, to bypass the normalization.
35 | */
36 | private static final float PROBABILITY_MEAN = 0.0f;
37 |
38 | private static final float PROBABILITY_STD = 1.0f;
39 |
40 | /**
41 | * Initializes a {@code ClassifierFloatMobileNet}.
42 | *
43 | * @param activity
44 | */
45 | public ClassifierFloatMobileNet(Activity activity, Device device, int numThreads)
46 | throws IOException {
47 | super(activity, device, numThreads);
48 | }
49 |
50 | @Override
51 | protected String getModelPath() {
52 | // you can download this file from
53 | // see build.gradle for where to obtain this file. It should be auto
54 | // downloaded into assets.
55 | return "model_0.tflite";
56 | }
57 |
58 | @Override
59 | protected String getLabelPath() {
60 | return "labels.txt";
61 | }
62 |
63 | @Override
64 | protected TensorOperator getPreprocessNormalizeOp() {
65 | return new NormalizeOp(IMAGE_MEAN, IMAGE_STD);
66 | }
67 |
68 | @Override
69 | protected TensorOperator getPostprocessNormalizeOp() {
70 | return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD);
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedEfficientNet.java:
--------------------------------------------------------------------------------
1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | package org.tensorflow.lite.examples.classification.tflite;
17 |
18 | import android.app.Activity;
19 | import java.io.IOException;
20 | import org.tensorflow.lite.support.common.TensorOperator;
21 | import org.tensorflow.lite.support.common.ops.NormalizeOp;
22 |
23 | /** This TensorFlow Lite classifier works with the quantized EfficientNet model. */
24 | public class ClassifierQuantizedEfficientNet extends Classifier {
25 |
26 | /**
27 | * The quantized model does not require normalization, thus set mean as 0.0f, and std as 1.0f to
28 | * bypass the normalization.
29 | */
30 | private static final float IMAGE_MEAN = 0.0f;
31 |
32 | private static final float IMAGE_STD = 1.0f;
33 |
34 | /** Quantized MobileNet requires additional dequantization to the output probability. */
35 | private static final float PROBABILITY_MEAN = 0.0f;
36 |
37 | private static final float PROBABILITY_STD = 255.0f;
38 |
39 | /**
40 | * Initializes a {@code ClassifierQuantizedMobileNet}.
41 | *
42 | * @param activity
43 | */
44 | public ClassifierQuantizedEfficientNet(Activity activity, Device device, int numThreads)
45 | throws IOException {
46 | super(activity, device, numThreads);
47 | }
48 |
49 | @Override
50 | protected String getModelPath() {
51 | // you can download this file from
52 | // see build.gradle for where to obtain this file. It should be auto
53 | // downloaded into assets.
54 | return "model_quant.tflite";
55 | }
56 |
57 | @Override
58 | protected String getLabelPath() {
59 | return "labels_without_background.txt";
60 | }
61 |
62 | @Override
63 | protected TensorOperator getPreprocessNormalizeOp() {
64 | return new NormalizeOp(IMAGE_MEAN, IMAGE_STD);
65 | }
66 |
67 | @Override
68 | protected TensorOperator getPostprocessNormalizeOp() {
69 | return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD);
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedMobileNet.java:
--------------------------------------------------------------------------------
1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | package org.tensorflow.lite.examples.classification.tflite;
17 |
18 | import android.app.Activity;
19 | import java.io.IOException;
20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device;
21 | import org.tensorflow.lite.support.common.TensorOperator;
22 | import org.tensorflow.lite.support.common.ops.NormalizeOp;
23 |
24 | /** This TensorFlow Lite classifier works with the quantized MobileNet model. */
25 | public class ClassifierQuantizedMobileNet extends Classifier {
26 |
27 | /**
28 | * The quantized model does not require normalization, thus set mean as 0.0f, and std as 1.0f to
29 | * bypass the normalization.
30 | */
31 | private static final float IMAGE_MEAN = 0.0f;
32 |
33 | private static final float IMAGE_STD = 1.0f;
34 |
35 | /** Quantized MobileNet requires additional dequantization to the output probability. */
36 | private static final float PROBABILITY_MEAN = 0.0f;
37 |
38 | private static final float PROBABILITY_STD = 255.0f;
39 |
40 | /**
41 | * Initializes a {@code ClassifierQuantizedMobileNet}.
42 | *
43 | * @param activity
44 | */
45 | public ClassifierQuantizedMobileNet(Activity activity, Device device, int numThreads)
46 | throws IOException {
47 | super(activity, device, numThreads);
48 | }
49 |
50 | @Override
51 | protected String getModelPath() {
52 | // you can download this file from
53 | // see build.gradle for where to obtain this file. It should be auto
54 | // downloaded into assets.
55 | return "model_quant_0.tflite";
56 | }
57 |
58 | @Override
59 | protected String getLabelPath() {
60 | return "labels.txt";
61 | }
62 |
63 | @Override
64 | protected TensorOperator getPreprocessNormalizeOp() {
65 | return new NormalizeOp(IMAGE_MEAN, IMAGE_STD);
66 | }
67 |
68 | @Override
69 | protected TensorOperator getPostprocessNormalizeOp() {
70 | return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD);
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/build.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'com.android.library'
2 |
3 | android {
4 | compileSdkVersion 28
5 | buildToolsVersion "28.0.0"
6 |
7 | defaultConfig {
8 | minSdkVersion 21
9 | targetSdkVersion 28
10 | versionCode 1
11 | versionName "1.0"
12 |
13 | testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
14 |
15 | }
16 |
17 | buildTypes {
18 | release {
19 | minifyEnabled false
20 | proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
21 | }
22 | }
23 | compileOptions {
24 | sourceCompatibility = '1.8'
25 | targetCompatibility = '1.8'
26 | }
27 | aaptOptions {
28 | noCompress "tflite"
29 | }
30 |
31 | lintOptions {
32 | checkReleaseBuilds false
33 | // Or, if you prefer, you can continue to check for errors in release builds,
34 | // but continue the build even when errors are found:
35 | abortOnError false
36 | }
37 | }
38 |
39 | dependencies {
40 | implementation fileTree(dir: 'libs', include: ['*.jar'])
41 | implementation project(":models")
42 | implementation 'androidx.appcompat:appcompat:1.1.0'
43 |
44 | // Build off of nightly TensorFlow Lite Task Library
45 | implementation('org.tensorflow:tensorflow-lite-task-vision:0.0.0-nightly') { changing = true }
46 | implementation('org.tensorflow:tensorflow-lite-metadata:0.0.0-nightly') { changing = true }
47 | }
48 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/proguard-rules.pro:
--------------------------------------------------------------------------------
1 | # Add project specific ProGuard rules here.
2 | # You can control the set of applied configuration files using the
3 | # proguardFiles setting in build.gradle.
4 | #
5 | # For more details, see
6 | # http://developer.android.com/guide/developing/tools/proguard.html
7 |
8 | # If your project uses WebView with JS, uncomment the following
9 | # and specify the fully qualified class name to the JavaScript interface
10 | # class:
11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview {
12 | # public *;
13 | #}
14 |
15 | # Uncomment this to preserve the line number information for
16 | # debugging stack traces.
17 | #-keepattributes SourceFile,LineNumberTable
18 |
19 | # If you keep the line number information, uncomment this to
20 | # hide the original source file name.
21 | #-renamesourcefileattribute SourceFile
22 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/src/main/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatEfficientNet.java:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | package org.tensorflow.lite.examples.classification.tflite;
17 |
18 | import android.app.Activity;
19 | import java.io.IOException;
20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device;
21 |
22 | /** This TensorFlowLite classifier works with the float EfficientNet model. */
23 | public class ClassifierFloatEfficientNet extends Classifier {
24 |
25 | /**
26 | * Initializes a {@code ClassifierFloatMobileNet}.
27 | *
28 | * @param device a {@link Device} object to configure the hardware accelerator
29 | * @param numThreads the number of threads during the inference
30 | * @throws IOException if the model is not loaded correctly
31 | */
32 | public ClassifierFloatEfficientNet(Activity activity, Device device, int numThreads)
33 | throws IOException {
34 | super(activity, device, numThreads);
35 | }
36 |
37 | @Override
38 | protected String getModelPath() {
39 | // you can download this file from
40 | // see build.gradle for where to obtain this file. It should be auto
41 | // downloaded into assets.
42 | //return "efficientnet-lite0-fp32.tflite";
43 | return "model.tflite";
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java:
--------------------------------------------------------------------------------
1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | package org.tensorflow.lite.examples.classification.tflite;
17 |
18 | import android.app.Activity;
19 | import java.io.IOException;
20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device;
21 |
22 | /** This TensorFlowLite classifier works with the float MobileNet model. */
23 | public class ClassifierFloatMobileNet extends Classifier {
24 | /**
25 | * Initializes a {@code ClassifierFloatMobileNet}.
26 | *
27 | * @param device a {@link Device} object to configure the hardware accelerator
28 | * @param numThreads the number of threads during the inference
29 | * @throws IOException if the model is not loaded correctly
30 | */
31 | public ClassifierFloatMobileNet(Activity activity, Device device, int numThreads)
32 | throws IOException {
33 | super(activity, device, numThreads);
34 | }
35 |
36 | @Override
37 | protected String getModelPath() {
38 | // you can download this file from
39 | // see build.gradle for where to obtain this file. It should be auto
40 | // downloaded into assets.
41 | return "mobilenet_v1_1.0_224.tflite";
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedEfficientNet.java:
--------------------------------------------------------------------------------
1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | package org.tensorflow.lite.examples.classification.tflite;
17 |
18 | import android.app.Activity;
19 | import java.io.IOException;
20 |
21 | /** This TensorFlow Lite classifier works with the quantized EfficientNet model. */
22 | public class ClassifierQuantizedEfficientNet extends Classifier {
23 |
24 | /**
25 | * Initializes a {@code ClassifierQuantizedMobileNet}.
26 | *
27 | * @param device a {@link Device} object to configure the hardware accelerator
28 | * @param numThreads the number of threads during the inference
29 | * @throws IOException if the model is not loaded correctly
30 | */
31 | public ClassifierQuantizedEfficientNet(Activity activity, Device device, int numThreads)
32 | throws IOException {
33 | super(activity, device, numThreads);
34 | }
35 |
36 | @Override
37 | protected String getModelPath() {
38 | // you can download this file from
39 | // see build.gradle for where to obtain this file. It should be auto
40 | // downloaded into assets.
41 | return "efficientnet-lite0-int8.tflite";
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedMobileNet.java:
--------------------------------------------------------------------------------
1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | ==============================================================================*/
15 |
16 | package org.tensorflow.lite.examples.classification.tflite;
17 |
18 | import android.app.Activity;
19 | import java.io.IOException;
20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device;
21 |
22 | /** This TensorFlow Lite classifier works with the quantized MobileNet model. */
23 | public class ClassifierQuantizedMobileNet extends Classifier {
24 |
25 | /**
26 | * Initializes a {@code ClassifierQuantizedMobileNet}.
27 | *
28 | * @param device a {@link Device} object to configure the hardware accelerator
29 | * @param numThreads the number of threads during the inference
30 | * @throws IOException if the model is not loaded correctly
31 | */
32 | public ClassifierQuantizedMobileNet(Activity activity, Device device, int numThreads)
33 | throws IOException {
34 | super(activity, device, numThreads);
35 | }
36 |
37 | @Override
38 | protected String getModelPath() {
39 | // you can download this file from
40 | // see build.gradle for where to obtain this file. It should be auto
41 | // downloaded into assets.
42 | return "mobilenet_v1_1.0_224_quant.tflite";
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/models/build.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'com.android.library'
2 | apply plugin: 'de.undercouch.download'
3 |
4 | android {
5 | compileSdkVersion 28
6 | buildToolsVersion "28.0.0"
7 |
8 | defaultConfig {
9 | minSdkVersion 21
10 | targetSdkVersion 28
11 | versionCode 1
12 | versionName "1.0"
13 |
14 | testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
15 |
16 | }
17 |
18 | buildTypes {
19 | release {
20 | minifyEnabled false
21 | proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
22 | }
23 | }
24 |
25 | aaptOptions {
26 | noCompress "tflite"
27 | }
28 |
29 | lintOptions {
30 | checkReleaseBuilds false
31 | // Or, if you prefer, you can continue to check for errors in release builds,
32 | // but continue the build even when errors are found:
33 | abortOnError false
34 | }
35 | }
36 |
37 | // Download default models; if you wish to use your own models then
38 | // place them in the "assets" directory and comment out this line.
39 | project.ext.ASSET_DIR = projectDir.toString() + '/src/main/assets'
40 | apply from:'download.gradle'
41 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/models/download.gradle:
--------------------------------------------------------------------------------
1 | def modelFloatDownloadUrl = "https://github.com/isl-org/MiDaS/releases/download/v2_1/model_opt.tflite"
2 | def modelFloatFile = "model_opt.tflite"
3 |
4 | task downloadModelFloat(type: Download) {
5 | src "${modelFloatDownloadUrl}"
6 | dest project.ext.ASSET_DIR + "/${modelFloatFile}"
7 | overwrite false
8 | }
9 |
10 | preBuild.dependsOn downloadModelFloat
11 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/models/proguard-rules.pro:
--------------------------------------------------------------------------------
1 | # Add project specific ProGuard rules here.
2 | # You can control the set of applied configuration files using the
3 | # proguardFiles setting in build.gradle.
4 | #
5 | # For more details, see
6 | # http://developer.android.com/guide/developing/tools/proguard.html
7 |
8 | # If your project uses WebView with JS, uncomment the following
9 | # and specify the fully qualified class name to the JavaScript interface
10 | # class:
11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview {
12 | # public *;
13 | #}
14 |
15 | # Uncomment this to preserve the line number information for
16 | # debugging stack traces.
17 | #-keepattributes SourceFile,LineNumberTable
18 |
19 | # If you keep the line number information, uncomment this to
20 | # hide the original source file name.
21 | #-renamesourcefileattribute SourceFile
22 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/models/src/main/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/models/src/main/assets/run_tflite.py:
--------------------------------------------------------------------------------
1 | # Flex ops are included in the nightly build of the TensorFlow Python package. You can use TFLite models containing Flex ops by the same Python API as normal TFLite models. The nightly TensorFlow build can be installed with this command:
2 | # Flex ops will be added to the TensorFlow Python package's and the tflite_runtime package from version 2.3 for Linux and 2.4 for other environments.
3 | # https://www.tensorflow.org/lite/guide/ops_select#running_the_model
4 |
5 | # You must use: tf-nightly
6 | # pip install tf-nightly
7 |
8 | import os
9 | import glob
10 | import cv2
11 | import numpy as np
12 |
13 | import tensorflow as tf
14 |
15 | width=256
16 | height=256
17 | model_name="model.tflite"
18 | #model_name="model_quant.tflite"
19 | image_name="dog.jpg"
20 |
21 | # input
22 | img = cv2.imread(image_name)
23 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
24 |
25 | mean=[0.485, 0.456, 0.406]
26 | std=[0.229, 0.224, 0.225]
27 | img = (img - mean) / std
28 |
29 | img_resized = tf.image.resize(img, [width,height], method='bicubic', preserve_aspect_ratio=False)
30 | #img_resized = tf.transpose(img_resized, [2, 0, 1])
31 | img_input = img_resized.numpy()
32 | reshape_img = img_input.reshape(1,width,height,3)
33 | tensor = tf.convert_to_tensor(reshape_img, dtype=tf.float32)
34 |
35 | # load model
36 | print("Load model...")
37 | interpreter = tf.lite.Interpreter(model_path=model_name)
38 | print("Allocate tensor...")
39 | interpreter.allocate_tensors()
40 | print("Get input/output details...")
41 | input_details = interpreter.get_input_details()
42 | output_details = interpreter.get_output_details()
43 | print("Get input shape...")
44 | input_shape = input_details[0]['shape']
45 | print(input_shape)
46 | print(input_details)
47 | print(output_details)
48 | #input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
49 | print("Set input tensor...")
50 | interpreter.set_tensor(input_details[0]['index'], tensor)
51 |
52 | print("invoke()...")
53 | interpreter.invoke()
54 |
55 | # The function `get_tensor()` returns a copy of the tensor data.
56 | # Use `tensor()` in order to get a pointer to the tensor.
57 | print("get output tensor...")
58 | output = interpreter.get_tensor(output_details[0]['index'])
59 | #output = np.squeeze(output)
60 | output = output.reshape(width, height)
61 | #print(output)
62 | prediction = np.array(output)
63 | print("reshape prediction...")
64 | prediction = prediction.reshape(width, height)
65 |
66 | # output file
67 | #prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
68 | print(" Write image to: output.png")
69 | depth_min = prediction.min()
70 | depth_max = prediction.max()
71 | img_out = (255 * (prediction - depth_min) / (depth_max - depth_min)).astype("uint8")
72 | print("save output image...")
73 | cv2.imwrite("output.png", img_out)
74 |
75 | print("finished")
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/settings.gradle:
--------------------------------------------------------------------------------
1 | rootProject.name = 'TFLite Image Classification Demo App'
2 | include ':app', ':lib_support', ':lib_task_api', ':models'
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore model file
2 | #*.tflite
3 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas.xcodeproj/project.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | IDEDidComputeMac32BitWarning
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcuserdata/admin.xcuserdatad/UserInterfaceState.xcuserstate:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcuserdata/admin.xcuserdatad/UserInterfaceState.xcuserstate
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas.xcodeproj/xcuserdata/admin.xcuserdatad/xcschemes/xcschememanagement.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | SchemeUserState
6 |
7 | PoseNet.xcscheme_^#shared#^_
8 |
9 | orderHint
10 | 3
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/AppDelegate.swift:
--------------------------------------------------------------------------------
1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | import UIKit
16 |
17 | @UIApplicationMain
18 | class AppDelegate: UIResponder, UIApplicationDelegate {
19 |
20 | var window: UIWindow?
21 |
22 | func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
23 | return true
24 | }
25 |
26 | func applicationWillResignActive(_ application: UIApplication) {
27 | }
28 |
29 | func applicationDidEnterBackground(_ application: UIApplication) {
30 | }
31 |
32 | func applicationWillEnterForeground(_ application: UIApplication) {
33 | }
34 |
35 | func applicationDidBecomeActive(_ application: UIApplication) {
36 | }
37 |
38 | func applicationWillTerminate(_ application: UIApplication) {
39 | }
40 | }
41 |
42 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Assets.xcassets/AppIcon.appiconset/Contents.json:
--------------------------------------------------------------------------------
1 | {"images":[{"size":"60x60","expected-size":"180","filename":"180.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"40x40","expected-size":"80","filename":"80.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"40x40","expected-size":"120","filename":"120.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"60x60","expected-size":"120","filename":"120.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"57x57","expected-size":"57","filename":"57.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"1x"},{"size":"29x29","expected-size":"58","filename":"58.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"29x29","expected-size":"29","filename":"29.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"1x"},{"size":"29x29","expected-size":"87","filename":"87.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"57x57","expected-size":"114","filename":"114.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"20x20","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"20x20","expected-size":"60","filename":"60.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"1024x1024","filename":"1024.png","expected-size":"1024","idiom":"ios-marketing","folder":"Assets.xcassets/AppIcon.appiconset/","scale":"1x"},{"size":"40x40","expected-size":"80","filename":"80.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"72x72","expected-size":"72","filename":"72.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"76x76","expected-size":"152","filename":"152.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"50x50","expected-size":"100","filename":"100.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"29x29","expected-size":"58","filename":"58.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"76x76","expected-size":"76","filename":"76.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"29x29","expected-size":"29","filename":"29.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"50x50","expected-size":"50","filename":"50.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"72x72","expected-size":"144","filename":"144.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"40x40","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"83.5x83.5","expected-size":"167","filename":"167.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"20x20","expected-size":"20","filename":"20.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"20x20","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"}]}
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Assets.xcassets/Contents.json:
--------------------------------------------------------------------------------
1 | {
2 | "info" : {
3 | "version" : 1,
4 | "author" : "xcode"
5 | }
6 | }
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Camera Feed/PreviewView.swift:
--------------------------------------------------------------------------------
1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | import UIKit
16 | import AVFoundation
17 |
18 | /// The camera frame is displayed on this view.
19 | class PreviewView: UIView {
20 | var previewLayer: AVCaptureVideoPreviewLayer {
21 | guard let layer = layer as? AVCaptureVideoPreviewLayer else {
22 | fatalError("Layer expected is of type VideoPreviewLayer")
23 | }
24 | return layer
25 | }
26 |
27 | var session: AVCaptureSession? {
28 | get {
29 | return previewLayer.session
30 | }
31 | set {
32 | previewLayer.session = newValue
33 | }
34 | }
35 |
36 | override class var layerClass: AnyClass {
37 | return AVCaptureVideoPreviewLayer.self
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Cells/InfoCell.swift:
--------------------------------------------------------------------------------
1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | import UIKit
16 |
17 | /// Table cell for inference result in bottom view.
18 | class InfoCell: UITableViewCell {
19 | @IBOutlet weak var fieldNameLabel: UILabel!
20 | @IBOutlet weak var infoLabel: UILabel!
21 | }
22 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Constants.swift:
--------------------------------------------------------------------------------
1 | // Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | // =============================================================================
15 |
16 | enum Constants {
17 | // MARK: - Constants related to the image processing
18 | static let bgraPixel = (channels: 4, alphaComponent: 3, lastBgrComponent: 2)
19 | static let rgbPixelChannels = 3
20 | static let maxRGBValue: Float32 = 255.0
21 |
22 | // MARK: - Constants related to the model interperter
23 | static let defaultThreadCount = 2
24 | static let defaultDelegate: Delegates = .CPU
25 | }
26 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Extensions/CGSizeExtension.swift:
--------------------------------------------------------------------------------
1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | // =============================================================================
15 |
16 | import Accelerate
17 | import Foundation
18 |
19 | extension CGSize {
20 | /// Returns `CGAfineTransform` to resize `self` to fit in destination size, keeping aspect ratio
21 | /// of `self`. `self` image is resized to be inscribe to destination size and located in center of
22 | /// destination.
23 | ///
24 | /// - Parameter toFitIn: destination size to be filled.
25 | /// - Returns: `CGAffineTransform` to transform `self` image to `dest` image.
26 | func transformKeepAspect(toFitIn dest: CGSize) -> CGAffineTransform {
27 | let sourceRatio = self.height / self.width
28 | let destRatio = dest.height / dest.width
29 |
30 | // Calculates ratio `self` to `dest`.
31 | var ratio: CGFloat
32 | var x: CGFloat = 0
33 | var y: CGFloat = 0
34 | if sourceRatio > destRatio {
35 | // Source size is taller than destination. Resized to fit in destination height, and find
36 | // horizontal starting point to be centered.
37 | ratio = dest.height / self.height
38 | x = (dest.width - self.width * ratio) / 2
39 | } else {
40 | ratio = dest.width / self.width
41 | y = (dest.height - self.height * ratio) / 2
42 | }
43 | return CGAffineTransform(a: ratio, b: 0, c: 0, d: ratio, tx: x, ty: y)
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Extensions/TFLiteExtension.swift:
--------------------------------------------------------------------------------
1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | // =============================================================================
15 |
16 | import Accelerate
17 | import CoreImage
18 | import Foundation
19 | import TensorFlowLite
20 |
21 | // MARK: - Data
22 | extension Data {
23 | /// Creates a new buffer by copying the buffer pointer of the given array.
24 | ///
25 | /// - Warning: The given array's element type `T` must be trivial in that it can be copied bit
26 | /// for bit with no indirection or reference-counting operations; otherwise, reinterpreting
27 | /// data from the resulting buffer has undefined behavior.
28 | /// - Parameter array: An array with elements of type `T`.
29 | init(copyingBufferOf array: [T]) {
30 | self = array.withUnsafeBufferPointer(Data.init)
31 | }
32 |
33 | /// Convert a Data instance to Array representation.
34 | func toArray(type: T.Type) -> [T] where T: AdditiveArithmetic {
35 | var array = [T](repeating: T.zero, count: self.count / MemoryLayout.stride)
36 | _ = array.withUnsafeMutableBytes { self.copyBytes(to: $0) }
37 | return array
38 | }
39 | }
40 |
41 | // MARK: - Wrappers
42 | /// Struct for handling multidimension `Data` in flat `Array`.
43 | struct FlatArray {
44 | private var array: [Element]
45 | var dimensions: [Int]
46 |
47 | init(tensor: Tensor) {
48 | dimensions = tensor.shape.dimensions
49 | array = tensor.data.toArray(type: Element.self)
50 | }
51 |
52 | private func flatIndex(_ index: [Int]) -> Int {
53 | guard index.count == dimensions.count else {
54 | fatalError("Invalid index: got \(index.count) index(es) for \(dimensions.count) index(es).")
55 | }
56 |
57 | var result = 0
58 | for i in 0.. index[i] else {
60 | fatalError("Invalid index: \(index[i]) is bigger than \(dimensions[i])")
61 | }
62 | result = dimensions[i] * result + index[i]
63 | }
64 | return result
65 | }
66 |
67 | subscript(_ index: Int...) -> Element {
68 | get {
69 | return array[flatIndex(index)]
70 | }
71 | set(newValue) {
72 | array[flatIndex(index)] = newValue
73 | }
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Info.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | CFBundleDevelopmentRegion
6 | $(DEVELOPMENT_LANGUAGE)
7 | CFBundleExecutable
8 | $(EXECUTABLE_NAME)
9 | CFBundleIdentifier
10 | $(PRODUCT_BUNDLE_IDENTIFIER)
11 | CFBundleInfoDictionaryVersion
12 | 6.0
13 | CFBundleName
14 | $(PRODUCT_NAME)
15 | CFBundlePackageType
16 | APPL
17 | CFBundleShortVersionString
18 | 1.0
19 | CFBundleVersion
20 | 1
21 | LSRequiresIPhoneOS
22 |
23 | NSCameraUsageDescription
24 | This app will use camera to continuously estimate the depth map.
25 | UILaunchStoryboardName
26 | LaunchScreen
27 | UIMainStoryboardFile
28 | Main
29 | UIRequiredDeviceCapabilities
30 |
31 | armv7
32 |
33 | UISupportedInterfaceOrientations
34 |
35 | UIInterfaceOrientationPortrait
36 |
37 | UISupportedInterfaceOrientations~ipad
38 |
39 | UIInterfaceOrientationPortrait
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Views/OverlayView.swift:
--------------------------------------------------------------------------------
1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | import UIKit
16 |
17 | /// UIView for rendering inference output.
18 | class OverlayView: UIView {
19 |
20 | var dots = [CGPoint]()
21 | var lines = [Line]()
22 |
23 | override func draw(_ rect: CGRect) {
24 | for dot in dots {
25 | drawDot(of: dot)
26 | }
27 | for line in lines {
28 | drawLine(of: line)
29 | }
30 | }
31 |
32 | func drawDot(of dot: CGPoint) {
33 | let dotRect = CGRect(
34 | x: dot.x - Traits.dot.radius / 2, y: dot.y - Traits.dot.radius / 2,
35 | width: Traits.dot.radius, height: Traits.dot.radius)
36 | let dotPath = UIBezierPath(ovalIn: dotRect)
37 |
38 | Traits.dot.color.setFill()
39 | dotPath.fill()
40 | }
41 |
42 | func drawLine(of line: Line) {
43 | let linePath = UIBezierPath()
44 | linePath.move(to: CGPoint(x: line.from.x, y: line.from.y))
45 | linePath.addLine(to: CGPoint(x: line.to.x, y: line.to.y))
46 | linePath.close()
47 |
48 | linePath.lineWidth = Traits.line.width
49 | Traits.line.color.setStroke()
50 |
51 | linePath.stroke()
52 | }
53 |
54 | func clear() {
55 | self.dots = []
56 | self.lines = []
57 | }
58 | }
59 |
60 | private enum Traits {
61 | static let dot = (radius: CGFloat(5), color: UIColor.orange)
62 | static let line = (width: CGFloat(1.0), color: UIColor.orange)
63 | }
64 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Podfile:
--------------------------------------------------------------------------------
1 | # Uncomment the next line to define a global platform for your project
2 | platform :ios, '12.0'
3 |
4 | target 'Midas' do
5 | # Comment the next line if you're not using Swift and don't want to use dynamic frameworks
6 | use_frameworks!
7 |
8 | # Pods for Midas
9 | pod 'TensorFlowLiteSwift', '~> 0.0.1-nightly'
10 | pod 'TensorFlowLiteSwift/CoreML', '~> 0.0.1-nightly'
11 | pod 'TensorFlowLiteSwift/Metal', '~> 0.0.1-nightly'
12 | end
13 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/RunScripts/download_models.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Download TF Lite model from the internet if it does not exist.
3 |
4 | TFLITE_MODEL="model_opt.tflite"
5 | TFLITE_FILE="Midas/Model/${TFLITE_MODEL}"
6 | MODEL_SRC="https://github.com/isl-org/MiDaS/releases/download/v2/${TFLITE_MODEL}"
7 |
8 | if test -f "${TFLITE_FILE}"; then
9 | echo "INFO: TF Lite model already exists. Skip downloading and use the local model."
10 | else
11 | curl --create-dirs -o "${TFLITE_FILE}" -LJO "${MODEL_SRC}"
12 | echo "INFO: Downloaded TensorFlow Lite model to ${TFLITE_FILE}."
13 | fi
14 |
15 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/output/.placeholder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/output/.placeholder
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Alexey
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/do_catkin_make.sh:
--------------------------------------------------------------------------------
1 | mkdir src
2 | catkin_make
3 | source devel/setup.bash
4 | echo $ROS_PACKAGE_PATH
5 | chmod +x ./devel/setup.bash
6 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/downloads.sh:
--------------------------------------------------------------------------------
1 | mkdir ~/.ros
2 | wget https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small-traced.pt
3 | cp ./model-small-traced.pt ~/.ros/model-small-traced.pt
4 |
5 |
6 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_melodic_ubuntu_17_18.sh:
--------------------------------------------------------------------------------
1 | #@title { display-mode: "code" }
2 |
3 | #from http://wiki.ros.org/indigo/Installation/Ubuntu
4 |
5 | #1.2 Setup sources.list
6 | sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list'
7 |
8 | # 1.3 Setup keys
9 | sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654
10 | sudo apt-key adv --keyserver 'hkp://ha.pool.sks-keyservers.net:80' --recv-key 421C365BD9FF1F717815A3895523BAEEB01FA116
11 |
12 | curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add -
13 |
14 | # 1.4 Installation
15 | sudo apt-get update
16 | sudo apt-get upgrade
17 |
18 | # Desktop-Full Install:
19 | sudo apt-get install ros-melodic-desktop-full
20 |
21 | printf "\nsource /opt/ros/melodic/setup.bash\n" >> ~/.bashrc
22 |
23 | # 1.5 Initialize rosdep
24 | sudo rosdep init
25 | rosdep update
26 |
27 |
28 | # 1.7 Getting rosinstall (python)
29 | sudo apt-get install python-rosinstall
30 | sudo apt-get install python-catkin-tools
31 | sudo apt-get install python-rospy
32 | sudo apt-get install python-rosdep
33 | sudo apt-get install python-roscd
34 | sudo apt-get install python-pip
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_noetic_ubuntu_20.sh:
--------------------------------------------------------------------------------
1 | #@title { display-mode: "code" }
2 |
3 | #from http://wiki.ros.org/indigo/Installation/Ubuntu
4 |
5 | #1.2 Setup sources.list
6 | sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list'
7 |
8 | # 1.3 Setup keys
9 | sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654
10 |
11 | curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add -
12 |
13 | # 1.4 Installation
14 | sudo apt-get update
15 | sudo apt-get upgrade
16 |
17 | # Desktop-Full Install:
18 | sudo apt-get install ros-noetic-desktop-full
19 |
20 | printf "\nsource /opt/ros/noetic/setup.bash\n" >> ~/.bashrc
21 |
22 | # 1.5 Initialize rosdep
23 | sudo rosdep init
24 | rosdep update
25 |
26 |
27 | # 1.7 Getting rosinstall (python)
28 | sudo apt-get install python3-rosinstall
29 | sudo apt-get install python3-catkin-tools
30 | sudo apt-get install python3-rospy
31 | sudo apt-get install python3-rosdep
32 | sudo apt-get install python3-roscd
33 | sudo apt-get install python3-pip
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/make_package_cpp.sh:
--------------------------------------------------------------------------------
1 | cd ~/catkin_ws/src
2 | catkin_create_pkg midas_cpp std_msgs roscpp cv_bridge sensor_msgs image_transport
3 | cd ~/catkin_ws
4 | catkin_make
5 |
6 | chmod +x ~/catkin_ws/devel/setup.bash
7 | printf "\nsource ~/catkin_ws/devel/setup.bash" >> ~/.bashrc
8 | source ~/catkin_ws/devel/setup.bash
9 |
10 |
11 | sudo rosdep init
12 | rosdep update
13 | #rospack depends1 midas_cpp
14 | roscd midas_cpp
15 | #cat package.xml
16 | #rospack depends midas_cpp
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/launch_midas_cpp.sh:
--------------------------------------------------------------------------------
1 | source ~/catkin_ws/devel/setup.bash
2 | roslaunch midas_cpp midas_cpp.launch model_name:="model-small-traced.pt" input_topic:="image_topic" output_topic:="midas_topic" out_orig_size:="true"
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_cpp.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_talker_listener.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | midas_cpp
4 | 0.1.0
5 | The midas_cpp package
6 |
7 | Alexey Bochkovskiy
8 | MIT
9 | https://github.com/isl-org/MiDaS/tree/master/ros
10 |
11 |
12 |
13 |
14 |
15 |
16 | TODO
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 | catkin
52 | cv_bridge
53 | image_transport
54 | roscpp
55 | rospy
56 | sensor_msgs
57 | std_msgs
58 | cv_bridge
59 | image_transport
60 | roscpp
61 | rospy
62 | sensor_msgs
63 | std_msgs
64 | cv_bridge
65 | image_transport
66 | roscpp
67 | rospy
68 | sensor_msgs
69 | std_msgs
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from __future__ import print_function
3 |
4 | import roslib
5 | #roslib.load_manifest('my_package')
6 | import sys
7 | import rospy
8 | import cv2
9 | import numpy as np
10 | from std_msgs.msg import String
11 | from sensor_msgs.msg import Image
12 | from cv_bridge import CvBridge, CvBridgeError
13 |
14 | class video_show:
15 |
16 | def __init__(self):
17 | self.show_output = rospy.get_param('~show_output', True)
18 | self.save_output = rospy.get_param('~save_output', False)
19 | self.output_video_file = rospy.get_param('~output_video_file','result.mp4')
20 | # rospy.loginfo(f"Listener - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}")
21 |
22 | self.bridge = CvBridge()
23 | self.image_sub = rospy.Subscriber("midas_topic", Image, self.callback)
24 |
25 | def callback(self, data):
26 | try:
27 | cv_image = self.bridge.imgmsg_to_cv2(data)
28 | except CvBridgeError as e:
29 | print(e)
30 | return
31 |
32 | if cv_image.size == 0:
33 | return
34 |
35 | rospy.loginfo("Listener: Received new frame")
36 | cv_image = cv_image.astype("uint8")
37 |
38 | if self.show_output==True:
39 | cv2.imshow("video_show", cv_image)
40 | cv2.waitKey(10)
41 |
42 | if self.save_output==True:
43 | if self.video_writer_init==False:
44 | fourcc = cv2.VideoWriter_fourcc(*'XVID')
45 | self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0]))
46 |
47 | self.out.write(cv_image)
48 |
49 |
50 |
51 | def main(args):
52 | rospy.init_node('listener', anonymous=True)
53 | ic = video_show()
54 | try:
55 | rospy.spin()
56 | except KeyboardInterrupt:
57 | print("Shutting down")
58 | cv2.destroyAllWindows()
59 |
60 | if __name__ == '__main__':
61 | main(sys.argv)
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener_original.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from __future__ import print_function
3 |
4 | import roslib
5 | #roslib.load_manifest('my_package')
6 | import sys
7 | import rospy
8 | import cv2
9 | import numpy as np
10 | from std_msgs.msg import String
11 | from sensor_msgs.msg import Image
12 | from cv_bridge import CvBridge, CvBridgeError
13 |
14 | class video_show:
15 |
16 | def __init__(self):
17 | self.show_output = rospy.get_param('~show_output', True)
18 | self.save_output = rospy.get_param('~save_output', False)
19 | self.output_video_file = rospy.get_param('~output_video_file','result.mp4')
20 | # rospy.loginfo(f"Listener original - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}")
21 |
22 | self.bridge = CvBridge()
23 | self.image_sub = rospy.Subscriber("image_topic", Image, self.callback)
24 |
25 | def callback(self, data):
26 | try:
27 | cv_image = self.bridge.imgmsg_to_cv2(data)
28 | except CvBridgeError as e:
29 | print(e)
30 | return
31 |
32 | if cv_image.size == 0:
33 | return
34 |
35 | rospy.loginfo("Listener_original: Received new frame")
36 | cv_image = cv_image.astype("uint8")
37 |
38 | if self.show_output==True:
39 | cv2.imshow("video_show_orig", cv_image)
40 | cv2.waitKey(10)
41 |
42 | if self.save_output==True:
43 | if self.video_writer_init==False:
44 | fourcc = cv2.VideoWriter_fourcc(*'XVID')
45 | self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0]))
46 |
47 | self.out.write(cv_image)
48 |
49 |
50 |
51 | def main(args):
52 | rospy.init_node('listener_original', anonymous=True)
53 | ic = video_show()
54 | try:
55 | rospy.spin()
56 | except KeyboardInterrupt:
57 | print("Shutting down")
58 | cv2.destroyAllWindows()
59 |
60 | if __name__ == '__main__':
61 | main(sys.argv)
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/talker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 |
4 | import roslib
5 | #roslib.load_manifest('my_package')
6 | import sys
7 | import rospy
8 | import cv2
9 | from std_msgs.msg import String
10 | from sensor_msgs.msg import Image
11 | from cv_bridge import CvBridge, CvBridgeError
12 |
13 |
14 | def talker():
15 | rospy.init_node('talker', anonymous=True)
16 |
17 | use_camera = rospy.get_param('~use_camera', False)
18 | input_video_file = rospy.get_param('~input_video_file','test.mp4')
19 | # rospy.loginfo(f"Talker - params: use_camera={use_camera}, input_video_file={input_video_file}")
20 |
21 | # rospy.loginfo("Talker: Trying to open a video stream")
22 | if use_camera == True:
23 | cap = cv2.VideoCapture(0)
24 | else:
25 | cap = cv2.VideoCapture(input_video_file)
26 |
27 | pub = rospy.Publisher('image_topic', Image, queue_size=1)
28 | rate = rospy.Rate(30) # 30hz
29 | bridge = CvBridge()
30 |
31 | while not rospy.is_shutdown():
32 | ret, cv_image = cap.read()
33 | if ret==False:
34 | print("Talker: Video is over")
35 | rospy.loginfo("Video is over")
36 | return
37 |
38 | try:
39 | image = bridge.cv2_to_imgmsg(cv_image, "bgr8")
40 | except CvBridgeError as e:
41 | rospy.logerr("Talker: cv2image conversion failed: ", e)
42 | print(e)
43 | continue
44 |
45 | rospy.loginfo("Talker: Publishing frame")
46 | pub.publish(image)
47 | rate.sleep()
48 |
49 | if __name__ == '__main__':
50 | try:
51 | talker()
52 | except rospy.ROSInterruptException:
53 | pass
54 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/run_talker_listener_test.sh:
--------------------------------------------------------------------------------
1 | # place any test.mp4 file near with this file
2 |
3 | # roscore
4 | # rosnode kill -a
5 |
6 | source ~/catkin_ws/devel/setup.bash
7 |
8 | roscore &
9 | P1=$!
10 | rosrun midas_cpp talker.py &
11 | P2=$!
12 | rosrun midas_cpp listener_original.py &
13 | P3=$!
14 | rosrun midas_cpp listener.py &
15 | P4=$!
16 | wait $P1 $P2 $P3 $P4
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/input/.placeholder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/input/.placeholder
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/make_onnx_model.py:
--------------------------------------------------------------------------------
1 | """Compute depth maps for images in the input folder.
2 | """
3 | import os
4 | import ntpath
5 | import glob
6 | import torch
7 | import utils
8 | import cv2
9 | import numpy as np
10 | from torchvision.transforms import Compose, Normalize
11 | from torchvision import transforms
12 |
13 | from shutil import copyfile
14 | import fileinput
15 | import sys
16 | sys.path.append(os.getcwd() + '/..')
17 |
18 | def modify_file():
19 | modify_filename = '../midas/blocks.py'
20 | copyfile(modify_filename, modify_filename+'.bak')
21 |
22 | with open(modify_filename, 'r') as file :
23 | filedata = file.read()
24 |
25 | filedata = filedata.replace('align_corners=True', 'align_corners=False')
26 | filedata = filedata.replace('import torch.nn as nn', 'import torch.nn as nn\nimport torchvision.models as models')
27 | filedata = filedata.replace('torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl")', 'models.resnext101_32x8d()')
28 |
29 | with open(modify_filename, 'w') as file:
30 | file.write(filedata)
31 |
32 | def restore_file():
33 | modify_filename = '../midas/blocks.py'
34 | copyfile(modify_filename+'.bak', modify_filename)
35 |
36 | modify_file()
37 |
38 | from midas.midas_net import MidasNet
39 | from midas.transforms import Resize, NormalizeImage, PrepareForNet
40 |
41 | restore_file()
42 |
43 |
44 | class MidasNet_preprocessing(MidasNet):
45 | """Network for monocular depth estimation.
46 | """
47 | def forward(self, x):
48 | """Forward pass.
49 |
50 | Args:
51 | x (tensor): input data (image)
52 |
53 | Returns:
54 | tensor: depth
55 | """
56 |
57 | mean = torch.tensor([0.485, 0.456, 0.406])
58 | std = torch.tensor([0.229, 0.224, 0.225])
59 | x.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])
60 |
61 | return MidasNet.forward(self, x)
62 |
63 |
64 | def run(model_path):
65 | """Run MonoDepthNN to compute depth maps.
66 |
67 | Args:
68 | model_path (str): path to saved model
69 | """
70 | print("initialize")
71 |
72 | # select device
73 |
74 | # load network
75 | #model = MidasNet(model_path, non_negative=True)
76 | model = MidasNet_preprocessing(model_path, non_negative=True)
77 |
78 | model.eval()
79 |
80 | print("start processing")
81 |
82 | # input
83 | img_input = np.zeros((3, 384, 384), np.float32)
84 |
85 | # compute
86 | with torch.no_grad():
87 | sample = torch.from_numpy(img_input).unsqueeze(0)
88 | prediction = model.forward(sample)
89 | prediction = (
90 | torch.nn.functional.interpolate(
91 | prediction.unsqueeze(1),
92 | size=img_input.shape[:2],
93 | mode="bicubic",
94 | align_corners=False,
95 | )
96 | .squeeze()
97 | .cpu()
98 | .numpy()
99 | )
100 |
101 | torch.onnx.export(model, sample, ntpath.basename(model_path).rsplit('.', 1)[0]+'.onnx', opset_version=9)
102 |
103 | print("finished")
104 |
105 |
106 | if __name__ == "__main__":
107 | # set paths
108 | # MODEL_PATH = "model.pt"
109 | MODEL_PATH = "../model-f6b98070.pt"
110 |
111 | # compute depth maps
112 | run(MODEL_PATH)
113 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/output/.placeholder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/output/.placeholder
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_onnx.py:
--------------------------------------------------------------------------------
1 | """Compute depth maps for images in the input folder.
2 | """
3 | import os
4 | import glob
5 | import utils
6 | import cv2
7 | import sys
8 | import numpy as np
9 | import argparse
10 |
11 | import onnx
12 | import onnxruntime as rt
13 |
14 | from transforms import Resize, NormalizeImage, PrepareForNet
15 |
16 |
17 | def run(input_path, output_path, model_path, model_type="large"):
18 | """Run MonoDepthNN to compute depth maps.
19 |
20 | Args:
21 | input_path (str): path to input folder
22 | output_path (str): path to output folder
23 | model_path (str): path to saved model
24 | """
25 | print("initialize")
26 |
27 | # select device
28 | device = "CUDA:0"
29 | #device = "CPU"
30 | print("device: %s" % device)
31 |
32 | # network resolution
33 | if model_type == "large":
34 | net_w, net_h = 384, 384
35 | elif model_type == "small":
36 | net_w, net_h = 256, 256
37 | else:
38 | print(f"model_type '{model_type}' not implemented, use: --model_type large")
39 | assert False
40 |
41 | # load network
42 | print("loading model...")
43 | model = rt.InferenceSession(model_path)
44 | input_name = model.get_inputs()[0].name
45 | output_name = model.get_outputs()[0].name
46 |
47 | resize_image = Resize(
48 | net_w,
49 | net_h,
50 | resize_target=None,
51 | keep_aspect_ratio=False,
52 | ensure_multiple_of=32,
53 | resize_method="upper_bound",
54 | image_interpolation_method=cv2.INTER_CUBIC,
55 | )
56 |
57 | def compose2(f1, f2):
58 | return lambda x: f2(f1(x))
59 |
60 | transform = compose2(resize_image, PrepareForNet())
61 |
62 | # get input
63 | img_names = glob.glob(os.path.join(input_path, "*"))
64 | num_images = len(img_names)
65 |
66 | # create output folder
67 | os.makedirs(output_path, exist_ok=True)
68 |
69 | print("start processing")
70 |
71 | for ind, img_name in enumerate(img_names):
72 |
73 | print(" processing {} ({}/{})".format(img_name, ind + 1, num_images))
74 |
75 | # input
76 | img = utils.read_image(img_name)
77 | img_input = transform({"image": img})["image"]
78 |
79 | # compute
80 | output = model.run([output_name], {input_name: img_input.reshape(1, 3, net_h, net_w).astype(np.float32)})[0]
81 | prediction = np.array(output).reshape(net_h, net_w)
82 | prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
83 |
84 | # output
85 | filename = os.path.join(
86 | output_path, os.path.splitext(os.path.basename(img_name))[0]
87 | )
88 | utils.write_depth(filename, prediction, bits=2)
89 |
90 | print("finished")
91 |
92 |
93 | if __name__ == "__main__":
94 | parser = argparse.ArgumentParser()
95 |
96 | parser.add_argument('-i', '--input_path',
97 | default='input',
98 | help='folder with input images'
99 | )
100 |
101 | parser.add_argument('-o', '--output_path',
102 | default='output',
103 | help='folder for output images'
104 | )
105 |
106 | parser.add_argument('-m', '--model_weights',
107 | default='model-f6b98070.onnx',
108 | help='path to the trained weights of model'
109 | )
110 |
111 | parser.add_argument('-t', '--model_type',
112 | default='large',
113 | help='model type: large or small'
114 | )
115 |
116 | args = parser.parse_args()
117 |
118 | # compute depth maps
119 | run(args.input_path, args.output_path, args.model_weights, args.model_type)
120 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys
3 | import cv2
4 |
5 |
6 | def write_pfm(path, image, scale=1):
7 | """Write pfm file.
8 | Args:
9 | path (str): pathto file
10 | image (array): data
11 | scale (int, optional): Scale. Defaults to 1.
12 | """
13 |
14 | with open(path, "wb") as file:
15 | color = None
16 |
17 | if image.dtype.name != "float32":
18 | raise Exception("Image dtype must be float32.")
19 |
20 | image = np.flipud(image)
21 |
22 | if len(image.shape) == 3 and image.shape[2] == 3: # color image
23 | color = True
24 | elif (
25 | len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1
26 | ): # greyscale
27 | color = False
28 | else:
29 | raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.")
30 |
31 | file.write("PF\n" if color else "Pf\n".encode())
32 | file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
33 |
34 | endian = image.dtype.byteorder
35 |
36 | if endian == "<" or endian == "=" and sys.byteorder == "little":
37 | scale = -scale
38 |
39 | file.write("%f\n".encode() % scale)
40 |
41 | image.tofile(file)
42 |
43 | def read_image(path):
44 | """Read image and output RGB image (0-1).
45 | Args:
46 | path (str): path to file
47 | Returns:
48 | array: RGB image (0-1)
49 | """
50 | img = cv2.imread(path)
51 |
52 | if img.ndim == 2:
53 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
54 |
55 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
56 |
57 | return img
58 |
59 | def write_depth(path, depth, bits=1):
60 | """Write depth map to pfm and png file.
61 | Args:
62 | path (str): filepath without extension
63 | depth (array): depth
64 | """
65 | write_pfm(path + ".pfm", depth.astype(np.float32))
66 |
67 | depth_min = depth.min()
68 | depth_max = depth.max()
69 |
70 | max_val = (2**(8*bits))-1
71 |
72 | if depth_max - depth_min > np.finfo("float").eps:
73 | out = max_val * (depth - depth_min) / (depth_max - depth_min)
74 | else:
75 | out = 0
76 |
77 | if bits == 1:
78 | cv2.imwrite(path + ".png", out.astype("uint8"))
79 | elif bits == 2:
80 | cv2.imwrite(path + ".png", out.astype("uint16"))
81 |
82 | return
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/builder.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | from importlib import import_module
26 | from .depth_model import DepthModel
27 |
28 | def build_model(config) -> DepthModel:
29 | """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface.
30 | This function should be used to construct models for training and evaluation.
31 |
32 | Args:
33 | config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder.
34 |
35 | Returns:
36 | torch.nn.Module: Model corresponding to name and version as specified in config
37 | """
38 | module_name = f"zoedepth.models.{config.model}"
39 | try:
40 | module = import_module(module_name)
41 | except ModuleNotFoundError as e:
42 | # print the original error message
43 | print(e)
44 | raise ValueError(
45 | f"Model {config.model} not found. Refer above error for details.") from e
46 | try:
47 | get_version = getattr(module, "get_version")
48 | except AttributeError as e:
49 | raise ValueError(
50 | f"Model {config.model} has no get_version function.") from e
51 | return get_version(config.version_name).build_from_config(config)
52 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/model_io.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | import torch
26 |
27 | def load_state_dict(model, state_dict):
28 | """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict.
29 |
30 | DataParallel prefixes state_dict keys with 'module.' when saving.
31 | If the model is not a DataParallel model but the state_dict is, then prefixes are removed.
32 | If the model is a DataParallel model but the state_dict is not, then prefixes are added.
33 | """
34 | state_dict = state_dict.get('model', state_dict)
35 | # if model is a DataParallel model, then state_dict keys are prefixed with 'module.'
36 |
37 | do_prefix = isinstance(
38 | model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel))
39 | state = {}
40 | for k, v in state_dict.items():
41 | if k.startswith('module.') and not do_prefix:
42 | k = k[7:]
43 |
44 | if not k.startswith('module.') and do_prefix:
45 | k = 'module.' + k
46 |
47 | state[k] = v
48 |
49 | model.load_state_dict(state)
50 | print("Loaded successfully")
51 | return model
52 |
53 |
54 | def load_wts(model, checkpoint_path):
55 | ckpt = torch.load(checkpoint_path, map_location='cpu')
56 | return load_state_dict(model, ckpt)
57 |
58 |
59 | def load_state_dict_from_url(model, url, **kwargs):
60 | state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs)
61 | return load_state_dict(model, state_dict)
62 |
63 |
64 | def load_state_from_resource(model, resource: str):
65 | """Loads weights to the model from a given resource. A resource can be of following types:
66 | 1. URL. Prefixed with "url::"
67 | e.g. url::http(s)://url.resource.com/ckpt.pt
68 |
69 | 2. Local path. Prefixed with "local::"
70 | e.g. local::/path/to/ckpt.pt
71 |
72 |
73 | Args:
74 | model (torch.nn.Module): Model
75 | resource (str): resource string
76 |
77 | Returns:
78 | torch.nn.Module: Model with loaded weights
79 | """
80 | print(f"Using pretrained resource {resource}")
81 |
82 | if resource.startswith('url::'):
83 | url = resource.split('url::')[1]
84 | return load_state_dict_from_url(model, url, progress=True)
85 |
86 | elif resource.startswith('local::'):
87 | path = resource.split('local::')[1]
88 | return load_wts(model, path)
89 |
90 | else:
91 | raise ValueError("Invalid resource type, only url:: and local:: are supported")
92 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/zoedepth/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | from .zoedepth_v1 import ZoeDepth
26 |
27 | all_versions = {
28 | "v1": ZoeDepth,
29 | }
30 |
31 | get_version = lambda v : all_versions[v]
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": {
3 | "name": "ZoeDepth",
4 | "version_name": "v1",
5 | "n_bins": 64,
6 | "bin_embedding_dim": 128,
7 | "bin_centers_type": "softplus",
8 | "n_attractors":[16, 8, 4, 1],
9 | "attractor_alpha": 1000,
10 | "attractor_gamma": 2,
11 | "attractor_kind" : "mean",
12 | "attractor_type" : "inv",
13 | "midas_model_type" : "DPT_BEiT_L_384",
14 | "min_temp": 0.0212,
15 | "max_temp": 50.0,
16 | "output_distribution": "logbinomial",
17 | "memory_efficient": true,
18 | "inverse_midas": false,
19 | "img_size": [384, 512]
20 | },
21 |
22 | "train": {
23 | "train_midas": true,
24 | "use_pretrained_midas": true,
25 | "trainer": "zoedepth",
26 | "epochs": 5,
27 | "bs": 16,
28 | "optim_kwargs": {"lr": 0.000161, "wd": 0.01},
29 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
30 | "same_lr": false,
31 | "w_si": 1,
32 | "w_domain": 0.2,
33 | "w_reg": 0,
34 | "w_grad": 0,
35 | "avoid_boundary": false,
36 | "random_crop": false,
37 | "input_width": 640,
38 | "input_height": 480,
39 | "midas_lr_factor": 1,
40 | "encoder_lr_factor":10,
41 | "pos_enc_lr_factor":10,
42 | "freeze_midas_bn": true
43 |
44 | },
45 |
46 | "infer":{
47 | "train_midas": false,
48 | "use_pretrained_midas": false,
49 | "pretrained_resource" : null,
50 | "force_keep_ar": true
51 | },
52 |
53 | "eval":{
54 | "train_midas": false,
55 | "use_pretrained_midas": false,
56 | "pretrained_resource" : null
57 | }
58 | }
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": {
3 | "bin_centers_type": "normed",
4 | "img_size": [384, 768]
5 | },
6 |
7 | "train": {
8 | },
9 |
10 | "infer":{
11 | "train_midas": false,
12 | "use_pretrained_midas": false,
13 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt",
14 | "force_keep_ar": true
15 | },
16 |
17 | "eval":{
18 | "train_midas": false,
19 | "use_pretrained_midas": false,
20 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt"
21 | }
22 | }
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/zoedepth_nk/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | from .zoedepth_nk_v1 import ZoeDepthNK
26 |
27 | all_versions = {
28 | "v1": ZoeDepthNK,
29 | }
30 |
31 | get_version = lambda v : all_versions[v]
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": {
3 | "name": "ZoeDepthNK",
4 | "version_name": "v1",
5 | "bin_conf" : [
6 | {
7 | "name": "nyu",
8 | "n_bins": 64,
9 | "min_depth": 1e-3,
10 | "max_depth": 10.0
11 | },
12 | {
13 | "name": "kitti",
14 | "n_bins": 64,
15 | "min_depth": 1e-3,
16 | "max_depth": 80.0
17 | }
18 | ],
19 | "bin_embedding_dim": 128,
20 | "bin_centers_type": "softplus",
21 | "n_attractors":[16, 8, 4, 1],
22 | "attractor_alpha": 1000,
23 | "attractor_gamma": 2,
24 | "attractor_kind" : "mean",
25 | "attractor_type" : "inv",
26 | "min_temp": 0.0212,
27 | "max_temp": 50.0,
28 | "memory_efficient": true,
29 | "midas_model_type" : "DPT_BEiT_L_384",
30 | "img_size": [384, 512]
31 | },
32 |
33 | "train": {
34 | "train_midas": true,
35 | "use_pretrained_midas": true,
36 | "trainer": "zoedepth_nk",
37 | "epochs": 5,
38 | "bs": 16,
39 | "optim_kwargs": {"lr": 0.0002512, "wd": 0.01},
40 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true},
41 | "same_lr": false,
42 | "w_si": 1,
43 | "w_domain": 100,
44 | "avoid_boundary": false,
45 | "random_crop": false,
46 | "input_width": 640,
47 | "input_height": 480,
48 | "w_grad": 0,
49 | "w_reg": 0,
50 | "midas_lr_factor": 10,
51 | "encoder_lr_factor":10,
52 | "pos_enc_lr_factor":10
53 | },
54 |
55 | "infer": {
56 | "train_midas": false,
57 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
58 | "use_pretrained_midas": false,
59 | "force_keep_ar": true
60 | },
61 |
62 | "eval": {
63 | "train_midas": false,
64 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt",
65 | "use_pretrained_midas": false
66 | }
67 | }
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/trainers/builder.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | from importlib import import_module
26 |
27 |
28 | def get_trainer(config):
29 | """Builds and returns a trainer based on the config.
30 |
31 | Args:
32 | config (dict): the config dict (typically constructed using utils.config.get_config)
33 | config.trainer (str): the name of the trainer to use. The module named "{config.trainer}_trainer" must exist in trainers root module
34 |
35 | Raises:
36 | ValueError: If the specified trainer does not exist under trainers/ folder
37 |
38 | Returns:
39 | Trainer (inherited from zoedepth.trainers.BaseTrainer): The Trainer object
40 | """
41 | assert "trainer" in config and config.trainer is not None and config.trainer != '', "Trainer not specified. Config: {0}".format(
42 | config)
43 | try:
44 | Trainer = getattr(import_module(
45 | f"zoedepth.trainers.{config.trainer}_trainer"), 'Trainer')
46 | except ModuleNotFoundError as e:
47 | raise ValueError(f"Trainer {config.trainer}_trainer not found.") from e
48 | return Trainer
49 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/utils/arg_utils.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | def infer_type(x): # hacky way to infer type from string args
4 | if not isinstance(x, str):
5 | return x
6 |
7 | try:
8 | x = int(x)
9 | return x
10 | except ValueError:
11 | pass
12 |
13 | try:
14 | x = float(x)
15 | return x
16 | except ValueError:
17 | pass
18 |
19 | return x
20 |
21 |
22 | def parse_unknown(unknown_args):
23 | clean = []
24 | for a in unknown_args:
25 | if "=" in a:
26 | k, v = a.split("=")
27 | clean.extend([k, v])
28 | else:
29 | clean.append(a)
30 |
31 | keys = clean[::2]
32 | values = clean[1::2]
33 | return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)}
34 |
--------------------------------------------------------------------------------
/src/flux/annotator/zoe/zoedepth/utils/geometry.py:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | # Copyright (c) 2022 Intelligent Systems Lab Org
4 |
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
6 | # of this software and associated documentation files (the "Software"), to deal
7 | # in the Software without restriction, including without limitation the rights
8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | # copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 |
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 |
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | # File author: Shariq Farooq Bhat
24 |
25 | import numpy as np
26 |
27 | def get_intrinsics(H,W):
28 | """
29 | Intrinsics for a pinhole camera model.
30 | Assume fov of 55 degrees and central principal point.
31 | """
32 | f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0)
33 | cx = 0.5 * W
34 | cy = 0.5 * H
35 | return np.array([[f, 0, cx],
36 | [0, f, cy],
37 | [0, 0, 1]])
38 |
39 | def depth_to_points(depth, R=None, t=None):
40 |
41 | K = get_intrinsics(depth.shape[1], depth.shape[2])
42 | Kinv = np.linalg.inv(K)
43 | if R is None:
44 | R = np.eye(3)
45 | if t is None:
46 | t = np.zeros(3)
47 |
48 | # M converts from your coordinate to PyTorch3D's coordinate system
49 | M = np.eye(3)
50 | M[0, 0] = -1.0
51 | M[1, 1] = -1.0
52 |
53 | height, width = depth.shape[1:3]
54 |
55 | x = np.arange(width)
56 | y = np.arange(height)
57 | coord = np.stack(np.meshgrid(x, y), -1)
58 | coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1) # z=1
59 | coord = coord.astype(np.float32)
60 | # coord = torch.as_tensor(coord, dtype=torch.float32, device=device)
61 | coord = coord[None] # bs, h, w, 3
62 |
63 | D = depth[:, :, :, None, None]
64 | # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape )
65 | pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None]
66 | # pts3D_1 live in your coordinate system. Convert them to Py3D's
67 | pts3D_1 = M[None, None, None, ...] @ pts3D_1
68 | # from reference to targe tviewpoint
69 | pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None]
70 | # pts3D_2 = pts3D_1
71 | # depth_2 = pts3D_2[:, :, :, 2, :] # b,1,h,w
72 | return pts3D_2[:, :, :, :3, 0][0]
73 |
74 |
75 | def create_triangles(h, w, mask=None):
76 | """
77 | Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68
78 | Creates mesh triangle indices from a given pixel grid size.
79 | This function is not and need not be differentiable as triangle indices are
80 | fixed.
81 | Args:
82 | h: (int) denoting the height of the image.
83 | w: (int) denoting the width of the image.
84 | Returns:
85 | triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3)
86 | """
87 | x, y = np.meshgrid(range(w - 1), range(h - 1))
88 | tl = y * w + x
89 | tr = y * w + x + 1
90 | bl = (y + 1) * w + x
91 | br = (y + 1) * w + x + 1
92 | triangles = np.array([tl, bl, tr, br, tr, bl])
93 | triangles = np.transpose(triangles, (1, 2, 0)).reshape(
94 | ((w - 1) * (h - 1) * 2, 3))
95 | if mask is not None:
96 | mask = mask.reshape(-1)
97 | triangles = triangles[mask[triangles].all(1)]
98 | return triangles
99 |
--------------------------------------------------------------------------------
/src/flux/math.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from einops import rearrange
3 | from torch import Tensor
4 |
5 |
6 | def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor:
7 | q, k = apply_rope(q, k, pe)
8 |
9 | x = torch.nn.functional.scaled_dot_product_attention(q, k, v)
10 | x = rearrange(x, "B H L D -> B L (H D)")
11 |
12 | return x
13 |
14 |
15 | def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
16 | assert dim % 2 == 0
17 | scale = torch.arange(0, dim, 2, dtype=torch.float64, device=pos.device) / dim
18 | omega = 1.0 / (theta**scale)
19 | out = torch.einsum("...n,d->...nd", pos, omega)
20 | out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
21 | out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
22 | return out.float()
23 |
24 |
25 | def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]:
26 | xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
27 | xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
28 | xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
29 | xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
30 | return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)
31 |
--------------------------------------------------------------------------------
/src/flux/modules/conditioner.py:
--------------------------------------------------------------------------------
1 | from torch import Tensor, nn
2 | from transformers import (CLIPTextModel, CLIPTokenizer, T5EncoderModel,
3 | T5Tokenizer)
4 |
5 |
6 | class HFEmbedder(nn.Module):
7 | def __init__(self, version: str, max_length: int, **hf_kwargs):
8 | super().__init__()
9 | self.is_clip = version.startswith("openai")
10 | self.max_length = max_length
11 | self.output_key = "pooler_output" if self.is_clip else "last_hidden_state"
12 |
13 | if self.is_clip:
14 | self.tokenizer: CLIPTokenizer = CLIPTokenizer.from_pretrained(version, max_length=max_length)
15 | self.hf_module: CLIPTextModel = CLIPTextModel.from_pretrained(version, **hf_kwargs)
16 | else:
17 | self.tokenizer: T5Tokenizer = T5Tokenizer.from_pretrained(version, max_length=max_length)
18 | self.hf_module: T5EncoderModel = T5EncoderModel.from_pretrained(version, **hf_kwargs)
19 |
20 | self.hf_module = self.hf_module.eval().requires_grad_(False)
21 |
22 | def forward(self, text: list[str]) -> Tensor:
23 | batch_encoding = self.tokenizer(
24 | text,
25 | truncation=True,
26 | max_length=self.max_length,
27 | return_length=False,
28 | return_overflowing_tokens=False,
29 | padding="max_length",
30 | return_tensors="pt",
31 | )
32 |
33 | outputs = self.hf_module(
34 | input_ids=batch_encoding["input_ids"].to(self.hf_module.device),
35 | attention_mask=None,
36 | output_hidden_states=False,
37 | )
38 | return outputs[self.output_key]
39 |
--------------------------------------------------------------------------------
/train_configs/test_canny_controlnet.yaml:
--------------------------------------------------------------------------------
1 | model_name: "flux-dev"
2 | data_config:
3 | train_batch_size: 4
4 | num_workers: 4
5 | img_size: 512
6 | img_dir: images/
7 | report_to: wandb
8 | train_batch_size: 3
9 | output_dir: saves_canny/
10 | max_train_steps: 100000
11 | learning_rate: 2e-5
12 | lr_scheduler: constant
13 | lr_warmup_steps: 10
14 | adam_beta1: 0.9
15 | adam_beta2: 0.999
16 | adam_weight_decay: 0.01
17 | adam_epsilon: 1e-8
18 | max_grad_norm: 1.0
19 | logging_dir: logs
20 | mixed_precision: "bf16"
21 | checkpointing_steps: 2500
22 | checkpoints_total_limit: 10
23 | tracker_project_name: canny_training
24 | resume_from_checkpoint: latest
25 | gradient_accumulation_steps: 2
26 |
--------------------------------------------------------------------------------
/train_configs/test_finetune.yaml:
--------------------------------------------------------------------------------
1 | model_name: "flux-dev"
2 | data_config:
3 | train_batch_size: 1
4 | num_workers: 4
5 | img_size: 512
6 | img_dir: images/
7 | report_to: wandb
8 | train_batch_size: 1
9 | output_dir: saves/
10 | max_train_steps: 100000
11 | learning_rate: 1e-5
12 | lr_scheduler: constant
13 | lr_warmup_steps: 10
14 | adam_beta1: 0.9
15 | adam_beta2: 0.999
16 | adam_weight_decay: 0.01
17 | adam_epsilon: 1e-8
18 | max_grad_norm: 1.0
19 | logging_dir: logs
20 | mixed_precision: "bf16"
21 | checkpointing_steps: 2500
22 | checkpoints_total_limit: 10
23 | tracker_project_name: finetune_test
24 | resume_from_checkpoint: latest
25 | gradient_accumulation_steps: 2
26 |
--------------------------------------------------------------------------------
/train_configs/test_lora.yaml:
--------------------------------------------------------------------------------
1 | model_name: "flux-dev"
2 | data_config:
3 | train_batch_size: 1
4 | num_workers: 4
5 | img_size: 512
6 | img_dir: images/
7 | random_ratio: true # support multi crop preprocessing
8 | report_to: wandb
9 | train_batch_size: 1
10 | output_dir: lora/
11 | max_train_steps: 100000
12 | learning_rate: 1e-5
13 | lr_scheduler: constant
14 | lr_warmup_steps: 10
15 | adam_beta1: 0.9
16 | adam_beta2: 0.999
17 | adam_weight_decay: 0.01
18 | adam_epsilon: 1e-8
19 | max_grad_norm: 1.0
20 | logging_dir: logs
21 | mixed_precision: "bf16"
22 | checkpointing_steps: 2500
23 | checkpoints_total_limit: 10
24 | tracker_project_name: lora_test
25 | resume_from_checkpoint: latest
26 | gradient_accumulation_steps: 2
27 | rank: 16
28 | single_blocks: "1,2,3,4"
29 | double_blocks: null
30 | disable_sampling: false
31 | sample_every: 250 # sample every this many steps
32 | sample_width: 1024
33 | sample_height: 1024
34 | sample_steps: 20
35 | sample_prompts:
36 | - "woman with red hair, playing chess at the park, bomb going off in the background"
37 | - "a woman holding a coffee cup, in a beanie, sitting at a cafe"
38 |
--------------------------------------------------------------------------------