├── .gitignore ├── LICENSE ├── README.md ├── assets ├── example_images │ └── statue.jpg └── readme │ ├── dark │ ├── follow-cta-rev2.png │ └── header-rev1.png │ ├── examples │ ├── canny_example_1.png │ ├── canny_result1.png │ ├── canny_result2.png │ ├── depth_example_1.png │ ├── depth_example_2.png │ ├── depth_example_3.png │ ├── depth_result1.png │ ├── depth_result2.png │ ├── furry1.png │ ├── furry2.png │ ├── furry3.png │ ├── furry4.png │ ├── hed_example_1.png │ ├── hed_example_2.png │ ├── hed_result1.png │ ├── picture-0-rev1.png │ ├── picture-1-rev1.png │ ├── picture-2-rev1.png │ ├── picture-3-rev1.png │ ├── picture-4-rev1.png │ ├── picture-5-rev1.png │ ├── picture-6-rev1.png │ ├── picture-7-rev1.png │ ├── result_12.png │ ├── result_13.png │ ├── result_14.png │ ├── result_15.png │ ├── result_18.png │ ├── result_19.png │ ├── result_21.png │ ├── result_22.png │ ├── result_23.png │ └── result_24.png │ └── light │ ├── controlnet-canny-header-rev1.png │ ├── flux-controlnet-collections.png │ ├── flux-lora-collection-rev1.png │ ├── follow-cta-rev2.png │ ├── header-rev1.png │ ├── join-our-discord-rev1.png │ └── lora-photorealism-header-rev1.png ├── cog.yaml ├── gradio_demo.py ├── image_datasets ├── canny_dataset.py └── dataset.py ├── main.py ├── models_licence └── LICENSE-FLUX1-dev ├── predict.py ├── requirements.txt ├── src └── flux │ ├── __init__.py │ ├── __main__.py │ ├── annotator │ ├── canny │ │ └── __init__.py │ ├── ckpts │ │ └── ckpts.txt │ ├── dwpose │ │ ├── __init__.py │ │ ├── onnxdet.py │ │ ├── onnxpose.py │ │ ├── util.py │ │ └── wholebody.py │ ├── hed │ │ └── __init__.py │ ├── midas │ │ ├── LICENSE │ │ ├── __init__.py │ │ ├── api.py │ │ ├── midas │ │ │ ├── __init__.py │ │ │ ├── base_model.py │ │ │ ├── blocks.py │ │ │ ├── dpt_depth.py │ │ │ ├── midas_net.py │ │ │ ├── midas_net_custom.py │ │ │ ├── transforms.py │ │ │ └── vit.py │ │ └── utils.py │ ├── mlsd │ │ ├── LICENSE │ │ ├── __init__.py │ │ ├── models │ │ │ ├── mbv2_mlsd_large.py │ │ │ └── mbv2_mlsd_tiny.py │ │ └── utils.py │ ├── tile │ │ ├── __init__.py │ │ └── guided_filter.py │ ├── util.py │ └── zoe │ │ ├── LICENSE │ │ ├── __init__.py │ │ └── zoedepth │ │ ├── data │ │ ├── __init__.py │ │ ├── data_mono.py │ │ ├── ddad.py │ │ ├── diml_indoor_test.py │ │ ├── diml_outdoor_test.py │ │ ├── diode.py │ │ ├── hypersim.py │ │ ├── ibims.py │ │ ├── preprocess.py │ │ ├── sun_rgbd_loader.py │ │ ├── transforms.py │ │ ├── vkitti.py │ │ └── vkitti2.py │ │ ├── models │ │ ├── __init__.py │ │ ├── base_models │ │ │ ├── __init__.py │ │ │ ├── midas.py │ │ │ └── midas_repo │ │ │ │ ├── .gitignore │ │ │ │ ├── Dockerfile │ │ │ │ ├── LICENSE │ │ │ │ ├── README.md │ │ │ │ ├── environment.yaml │ │ │ │ ├── hubconf.py │ │ │ │ ├── input │ │ │ │ └── .placeholder │ │ │ │ ├── midas │ │ │ │ ├── backbones │ │ │ │ │ ├── beit.py │ │ │ │ │ ├── levit.py │ │ │ │ │ ├── next_vit.py │ │ │ │ │ ├── swin.py │ │ │ │ │ ├── swin2.py │ │ │ │ │ ├── swin_common.py │ │ │ │ │ ├── utils.py │ │ │ │ │ └── vit.py │ │ │ │ ├── base_model.py │ │ │ │ ├── blocks.py │ │ │ │ ├── dpt_depth.py │ │ │ │ ├── midas_net.py │ │ │ │ ├── midas_net_custom.py │ │ │ │ ├── model_loader.py │ │ │ │ └── transforms.py │ │ │ │ ├── mobile │ │ │ │ ├── README.md │ │ │ │ ├── android │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── EXPLORE_THE_CODE.md │ │ │ │ │ ├── LICENSE │ │ │ │ │ ├── README.md │ │ │ │ │ ├── app │ │ │ │ │ │ ├── .gitignore │ │ │ │ │ │ ├── build.gradle │ │ │ │ │ │ ├── proguard-rules.pro │ │ │ │ │ │ └── src │ │ │ │ │ │ │ ├── androidTest │ │ │ │ │ │ │ ├── assets │ │ │ │ │ │ │ │ ├── fox-mobilenet_v1_1.0_224_support.txt │ │ │ │ │ │ │ │ └── fox-mobilenet_v1_1.0_224_task_api.txt │ │ │ │ │ │ │ └── java │ │ │ │ │ │ │ │ ├── AndroidManifest.xml │ │ │ │ │ │ │ │ └── org │ │ │ │ │ │ │ │ └── tensorflow │ │ │ │ │ │ │ │ └── lite │ │ │ │ │ │ │ │ └── examples │ │ │ │ │ │ │ │ └── classification │ │ │ │ │ │ │ │ └── ClassifierTest.java │ │ │ │ │ │ │ └── main │ │ │ │ │ │ │ ├── AndroidManifest.xml │ │ │ │ │ │ │ ├── java │ │ │ │ │ │ │ └── org │ │ │ │ │ │ │ │ └── tensorflow │ │ │ │ │ │ │ │ └── lite │ │ │ │ │ │ │ │ └── examples │ │ │ │ │ │ │ │ └── classification │ │ │ │ │ │ │ │ ├── CameraActivity.java │ │ │ │ │ │ │ │ ├── CameraConnectionFragment.java │ │ │ │ │ │ │ │ ├── ClassifierActivity.java │ │ │ │ │ │ │ │ ├── LegacyCameraConnectionFragment.java │ │ │ │ │ │ │ │ └── customview │ │ │ │ │ │ │ │ ├── AutoFitTextureView.java │ │ │ │ │ │ │ │ ├── OverlayView.java │ │ │ │ │ │ │ │ ├── RecognitionScoreView.java │ │ │ │ │ │ │ │ └── ResultsView.java │ │ │ │ │ │ │ └── res │ │ │ │ │ │ │ ├── drawable-v24 │ │ │ │ │ │ │ └── ic_launcher_foreground.xml │ │ │ │ │ │ │ ├── drawable │ │ │ │ │ │ │ ├── bottom_sheet_bg.xml │ │ │ │ │ │ │ ├── ic_baseline_add.xml │ │ │ │ │ │ │ ├── ic_baseline_remove.xml │ │ │ │ │ │ │ ├── ic_launcher_background.xml │ │ │ │ │ │ │ └── rectangle.xml │ │ │ │ │ │ │ ├── layout │ │ │ │ │ │ │ ├── tfe_ic_activity_camera.xml │ │ │ │ │ │ │ ├── tfe_ic_camera_connection_fragment.xml │ │ │ │ │ │ │ └── tfe_ic_layout_bottom_sheet.xml │ │ │ │ │ │ │ ├── mipmap-anydpi-v26 │ │ │ │ │ │ │ ├── ic_launcher.xml │ │ │ │ │ │ │ └── ic_launcher_round.xml │ │ │ │ │ │ │ └── values │ │ │ │ │ │ │ ├── colors.xml │ │ │ │ │ │ │ ├── dimens.xml │ │ │ │ │ │ │ ├── strings.xml │ │ │ │ │ │ │ └── styles.xml │ │ │ │ │ ├── build.gradle │ │ │ │ │ ├── gradle.properties │ │ │ │ │ ├── gradle │ │ │ │ │ │ └── wrapper │ │ │ │ │ │ │ ├── gradle-wrapper.jar │ │ │ │ │ │ │ └── gradle-wrapper.properties │ │ │ │ │ ├── gradlew │ │ │ │ │ ├── gradlew.bat │ │ │ │ │ ├── lib_support │ │ │ │ │ │ ├── build.gradle │ │ │ │ │ │ ├── proguard-rules.pro │ │ │ │ │ │ └── src │ │ │ │ │ │ │ └── main │ │ │ │ │ │ │ ├── AndroidManifest.xml │ │ │ │ │ │ │ └── java │ │ │ │ │ │ │ └── org │ │ │ │ │ │ │ └── tensorflow │ │ │ │ │ │ │ └── lite │ │ │ │ │ │ │ └── examples │ │ │ │ │ │ │ └── classification │ │ │ │ │ │ │ └── tflite │ │ │ │ │ │ │ ├── Classifier.java │ │ │ │ │ │ │ ├── ClassifierFloatEfficientNet.java │ │ │ │ │ │ │ ├── ClassifierFloatMobileNet.java │ │ │ │ │ │ │ ├── ClassifierQuantizedEfficientNet.java │ │ │ │ │ │ │ └── ClassifierQuantizedMobileNet.java │ │ │ │ │ ├── lib_task_api │ │ │ │ │ │ ├── build.gradle │ │ │ │ │ │ ├── proguard-rules.pro │ │ │ │ │ │ └── src │ │ │ │ │ │ │ └── main │ │ │ │ │ │ │ ├── AndroidManifest.xml │ │ │ │ │ │ │ └── java │ │ │ │ │ │ │ └── org │ │ │ │ │ │ │ └── tensorflow │ │ │ │ │ │ │ └── lite │ │ │ │ │ │ │ └── examples │ │ │ │ │ │ │ └── classification │ │ │ │ │ │ │ └── tflite │ │ │ │ │ │ │ ├── Classifier.java │ │ │ │ │ │ │ ├── ClassifierFloatEfficientNet.java │ │ │ │ │ │ │ ├── ClassifierFloatMobileNet.java │ │ │ │ │ │ │ ├── ClassifierQuantizedEfficientNet.java │ │ │ │ │ │ │ └── ClassifierQuantizedMobileNet.java │ │ │ │ │ ├── models │ │ │ │ │ │ ├── build.gradle │ │ │ │ │ │ ├── download.gradle │ │ │ │ │ │ ├── proguard-rules.pro │ │ │ │ │ │ └── src │ │ │ │ │ │ │ └── main │ │ │ │ │ │ │ ├── AndroidManifest.xml │ │ │ │ │ │ │ └── assets │ │ │ │ │ │ │ ├── labels.txt │ │ │ │ │ │ │ ├── labels_without_background.txt │ │ │ │ │ │ │ └── run_tflite.py │ │ │ │ │ └── settings.gradle │ │ │ │ └── ios │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── LICENSE │ │ │ │ │ ├── Midas.xcodeproj │ │ │ │ │ ├── project.pbxproj │ │ │ │ │ ├── project.xcworkspace │ │ │ │ │ │ ├── contents.xcworkspacedata │ │ │ │ │ │ ├── xcshareddata │ │ │ │ │ │ │ └── IDEWorkspaceChecks.plist │ │ │ │ │ │ └── xcuserdata │ │ │ │ │ │ │ └── admin.xcuserdatad │ │ │ │ │ │ │ └── UserInterfaceState.xcuserstate │ │ │ │ │ └── xcuserdata │ │ │ │ │ │ └── admin.xcuserdatad │ │ │ │ │ │ └── xcschemes │ │ │ │ │ │ └── xcschememanagement.plist │ │ │ │ │ ├── Midas │ │ │ │ │ ├── AppDelegate.swift │ │ │ │ │ ├── Assets.xcassets │ │ │ │ │ │ ├── AppIcon.appiconset │ │ │ │ │ │ │ └── Contents.json │ │ │ │ │ │ └── Contents.json │ │ │ │ │ ├── Camera Feed │ │ │ │ │ │ ├── CameraFeedManager.swift │ │ │ │ │ │ └── PreviewView.swift │ │ │ │ │ ├── Cells │ │ │ │ │ │ └── InfoCell.swift │ │ │ │ │ ├── Constants.swift │ │ │ │ │ ├── Extensions │ │ │ │ │ │ ├── CGSizeExtension.swift │ │ │ │ │ │ ├── CVPixelBufferExtension.swift │ │ │ │ │ │ └── TFLiteExtension.swift │ │ │ │ │ ├── Info.plist │ │ │ │ │ ├── ModelDataHandler │ │ │ │ │ │ └── ModelDataHandler.swift │ │ │ │ │ ├── Storyboards │ │ │ │ │ │ └── Base.lproj │ │ │ │ │ │ │ ├── Launch Screen.storyboard │ │ │ │ │ │ │ └── Main.storyboard │ │ │ │ │ ├── ViewControllers │ │ │ │ │ │ └── ViewController.swift │ │ │ │ │ └── Views │ │ │ │ │ │ └── OverlayView.swift │ │ │ │ │ ├── Podfile │ │ │ │ │ ├── README.md │ │ │ │ │ └── RunScripts │ │ │ │ │ └── download_models.sh │ │ │ │ ├── output │ │ │ │ └── .placeholder │ │ │ │ ├── ros │ │ │ │ ├── LICENSE │ │ │ │ ├── README.md │ │ │ │ ├── additions │ │ │ │ │ ├── do_catkin_make.sh │ │ │ │ │ ├── downloads.sh │ │ │ │ │ ├── install_ros_melodic_ubuntu_17_18.sh │ │ │ │ │ ├── install_ros_noetic_ubuntu_20.sh │ │ │ │ │ └── make_package_cpp.sh │ │ │ │ ├── launch_midas_cpp.sh │ │ │ │ ├── midas_cpp │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ ├── launch │ │ │ │ │ │ ├── midas_cpp.launch │ │ │ │ │ │ └── midas_talker_listener.launch │ │ │ │ │ ├── package.xml │ │ │ │ │ ├── scripts │ │ │ │ │ │ ├── listener.py │ │ │ │ │ │ ├── listener_original.py │ │ │ │ │ │ └── talker.py │ │ │ │ │ └── src │ │ │ │ │ │ └── main.cpp │ │ │ │ └── run_talker_listener_test.sh │ │ │ │ ├── run.py │ │ │ │ ├── tf │ │ │ │ ├── README.md │ │ │ │ ├── input │ │ │ │ │ └── .placeholder │ │ │ │ ├── make_onnx_model.py │ │ │ │ ├── output │ │ │ │ │ └── .placeholder │ │ │ │ ├── run_onnx.py │ │ │ │ ├── run_pb.py │ │ │ │ ├── transforms.py │ │ │ │ └── utils.py │ │ │ │ └── utils.py │ │ ├── builder.py │ │ ├── depth_model.py │ │ ├── layers │ │ │ ├── attractor.py │ │ │ ├── dist_layers.py │ │ │ ├── localbins_layers.py │ │ │ └── patch_transformer.py │ │ ├── model_io.py │ │ ├── zoedepth │ │ │ ├── __init__.py │ │ │ ├── config_zoedepth.json │ │ │ ├── config_zoedepth_kitti.json │ │ │ └── zoedepth_v1.py │ │ └── zoedepth_nk │ │ │ ├── __init__.py │ │ │ ├── config_zoedepth_nk.json │ │ │ └── zoedepth_nk_v1.py │ │ ├── trainers │ │ ├── base_trainer.py │ │ ├── builder.py │ │ ├── loss.py │ │ ├── zoedepth_nk_trainer.py │ │ └── zoedepth_trainer.py │ │ └── utils │ │ ├── __init__.py │ │ ├── arg_utils.py │ │ ├── config.py │ │ ├── easydict │ │ └── __init__.py │ │ ├── geometry.py │ │ └── misc.py │ ├── api.py │ ├── cli.py │ ├── controlnet.py │ ├── math.py │ ├── model.py │ ├── modules │ ├── autoencoder.py │ ├── conditioner.py │ └── layers.py │ ├── sampling.py │ ├── util.py │ └── xflux_pipeline.py ├── train_configs ├── test_canny_controlnet.yaml ├── test_finetune.yaml └── test_lora.yaml ├── train_flux_deepspeed.py ├── train_flux_deepspeed_controlnet.py └── train_flux_lora_deepspeed.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | Makefile 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | weights/ 25 | 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache/ 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # poetry 101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 102 | # This is especially recommended for binary packages to ensure reproducibility, and is more 103 | # commonly ignored for libraries. 104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 105 | #poetry.lock 106 | 107 | # pdm 108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 109 | #pdm.lock 110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 111 | # in version control. 112 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 113 | .pdm.toml 114 | .pdm-python 115 | .pdm-build/ 116 | 117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 118 | __pypackages__/ 119 | 120 | # Celery stuff 121 | celerybeat-schedule 122 | celerybeat.pid 123 | 124 | # SageMath parsed files 125 | *.sage.py 126 | 127 | # Environments 128 | .env 129 | .venv 130 | env/ 131 | venv/ 132 | ENV/ 133 | env.bak/ 134 | venv.bak/ 135 | 136 | # Spyder project settings 137 | .spyderproject 138 | .spyproject 139 | 140 | # Rope project settings 141 | .ropeproject 142 | 143 | # mkdocs documentation 144 | /site 145 | 146 | # mypy 147 | .mypy_cache/ 148 | .dmypy.json 149 | dmypy.json 150 | 151 | # Pyre type checker 152 | .pyre/ 153 | 154 | # pytype static type analyzer 155 | .pytype/ 156 | 157 | # Cython debug symbols 158 | cython_debug/ 159 | 160 | # PyCharm 161 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 162 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 163 | # and can be added to the global gitignore or merged into this file. For a more nuclear 164 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 165 | #.idea/ 166 | 167 | .DS_Store 168 | -------------------------------------------------------------------------------- /assets/example_images/statue.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/example_images/statue.jpg -------------------------------------------------------------------------------- /assets/readme/dark/follow-cta-rev2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/dark/follow-cta-rev2.png -------------------------------------------------------------------------------- /assets/readme/dark/header-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/dark/header-rev1.png -------------------------------------------------------------------------------- /assets/readme/examples/canny_example_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/canny_example_1.png -------------------------------------------------------------------------------- /assets/readme/examples/canny_result1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/canny_result1.png -------------------------------------------------------------------------------- /assets/readme/examples/canny_result2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/canny_result2.png -------------------------------------------------------------------------------- /assets/readme/examples/depth_example_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/depth_example_1.png -------------------------------------------------------------------------------- /assets/readme/examples/depth_example_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/depth_example_2.png -------------------------------------------------------------------------------- /assets/readme/examples/depth_example_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/depth_example_3.png -------------------------------------------------------------------------------- /assets/readme/examples/depth_result1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/depth_result1.png -------------------------------------------------------------------------------- /assets/readme/examples/depth_result2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/depth_result2.png -------------------------------------------------------------------------------- /assets/readme/examples/furry1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/furry1.png -------------------------------------------------------------------------------- /assets/readme/examples/furry2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/furry2.png -------------------------------------------------------------------------------- /assets/readme/examples/furry3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/furry3.png -------------------------------------------------------------------------------- /assets/readme/examples/furry4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/furry4.png -------------------------------------------------------------------------------- /assets/readme/examples/hed_example_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/hed_example_1.png -------------------------------------------------------------------------------- /assets/readme/examples/hed_example_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/hed_example_2.png -------------------------------------------------------------------------------- /assets/readme/examples/hed_result1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/hed_result1.png -------------------------------------------------------------------------------- /assets/readme/examples/picture-0-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-0-rev1.png -------------------------------------------------------------------------------- /assets/readme/examples/picture-1-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-1-rev1.png -------------------------------------------------------------------------------- /assets/readme/examples/picture-2-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-2-rev1.png -------------------------------------------------------------------------------- /assets/readme/examples/picture-3-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-3-rev1.png -------------------------------------------------------------------------------- /assets/readme/examples/picture-4-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-4-rev1.png -------------------------------------------------------------------------------- /assets/readme/examples/picture-5-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-5-rev1.png -------------------------------------------------------------------------------- /assets/readme/examples/picture-6-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-6-rev1.png -------------------------------------------------------------------------------- /assets/readme/examples/picture-7-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/picture-7-rev1.png -------------------------------------------------------------------------------- /assets/readme/examples/result_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_12.png -------------------------------------------------------------------------------- /assets/readme/examples/result_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_13.png -------------------------------------------------------------------------------- /assets/readme/examples/result_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_14.png -------------------------------------------------------------------------------- /assets/readme/examples/result_15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_15.png -------------------------------------------------------------------------------- /assets/readme/examples/result_18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_18.png -------------------------------------------------------------------------------- /assets/readme/examples/result_19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_19.png -------------------------------------------------------------------------------- /assets/readme/examples/result_21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_21.png -------------------------------------------------------------------------------- /assets/readme/examples/result_22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_22.png -------------------------------------------------------------------------------- /assets/readme/examples/result_23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_23.png -------------------------------------------------------------------------------- /assets/readme/examples/result_24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/examples/result_24.png -------------------------------------------------------------------------------- /assets/readme/light/controlnet-canny-header-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/controlnet-canny-header-rev1.png -------------------------------------------------------------------------------- /assets/readme/light/flux-controlnet-collections.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/flux-controlnet-collections.png -------------------------------------------------------------------------------- /assets/readme/light/flux-lora-collection-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/flux-lora-collection-rev1.png -------------------------------------------------------------------------------- /assets/readme/light/follow-cta-rev2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/follow-cta-rev2.png -------------------------------------------------------------------------------- /assets/readme/light/header-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/header-rev1.png -------------------------------------------------------------------------------- /assets/readme/light/join-our-discord-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/join-our-discord-rev1.png -------------------------------------------------------------------------------- /assets/readme/light/lora-photorealism-header-rev1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/assets/readme/light/lora-photorealism-header-rev1.png -------------------------------------------------------------------------------- /cog.yaml: -------------------------------------------------------------------------------- 1 | # Configuration for Cog ⚙️ 2 | # Reference: https://cog.run/yaml 3 | 4 | build: 5 | gpu: true 6 | cuda: "12.1" 7 | python_version: "3.11" 8 | python_packages: 9 | - "accelerate==0.30.1" 10 | - "deepspeed==0.14.4" 11 | - "einops==0.8.0" 12 | - "transformers==4.43.3" 13 | - "huggingface-hub==0.24.5" 14 | - "einops==0.8.0" 15 | - "pandas==2.2.2" 16 | - "opencv-python==4.10.0.84" 17 | - "pillow==10.4.0" 18 | - "optimum-quanto==0.2.4" 19 | - "sentencepiece==0.2.0" 20 | run: 21 | - curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.8.2/pget_linux_x86_64" && chmod +x /usr/local/bin/pget 22 | 23 | # predict.py defines how predictions are run on your model 24 | predict: "predict.py:Predictor" 25 | -------------------------------------------------------------------------------- /image_datasets/canny_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | from PIL import Image 5 | import torch 6 | from torch.utils.data import Dataset, DataLoader 7 | import json 8 | import random 9 | import cv2 10 | 11 | 12 | def canny_processor(image, low_threshold=100, high_threshold=200): 13 | image = np.array(image) 14 | image = cv2.Canny(image, low_threshold, high_threshold) 15 | image = image[:, :, None] 16 | image = np.concatenate([image, image, image], axis=2) 17 | canny_image = Image.fromarray(image) 18 | return canny_image 19 | 20 | 21 | def c_crop(image): 22 | width, height = image.size 23 | new_size = min(width, height) 24 | left = (width - new_size) / 2 25 | top = (height - new_size) / 2 26 | right = (width + new_size) / 2 27 | bottom = (height + new_size) / 2 28 | return image.crop((left, top, right, bottom)) 29 | 30 | class CustomImageDataset(Dataset): 31 | def __init__(self, img_dir, img_size=512): 32 | self.images = [os.path.join(img_dir, i) for i in os.listdir(img_dir) if '.jpg' in i or '.png' in i] 33 | self.images.sort() 34 | self.img_size = img_size 35 | 36 | def __len__(self): 37 | return len(self.images) 38 | 39 | def __getitem__(self, idx): 40 | try: 41 | img = Image.open(self.images[idx]) 42 | img = c_crop(img) 43 | img = img.resize((self.img_size, self.img_size)) 44 | hint = canny_processor(img) 45 | img = torch.from_numpy((np.array(img) / 127.5) - 1) 46 | img = img.permute(2, 0, 1) 47 | hint = torch.from_numpy((np.array(hint) / 127.5) - 1) 48 | hint = hint.permute(2, 0, 1) 49 | json_path = self.images[idx].split('.')[0] + '.json' 50 | prompt = json.load(open(json_path))['caption'] 51 | return img, hint, prompt 52 | except Exception as e: 53 | print(e) 54 | return self.__getitem__(random.randint(0, len(self.images) - 1)) 55 | 56 | 57 | def loader(train_batch_size, num_workers, **args): 58 | dataset = CustomImageDataset(**args) 59 | return DataLoader(dataset, batch_size=train_batch_size, num_workers=num_workers, shuffle=True) 60 | -------------------------------------------------------------------------------- /image_datasets/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | from PIL import Image 5 | import torch 6 | from torch.utils.data import Dataset, DataLoader 7 | import json 8 | import random 9 | 10 | def image_resize(img, max_size=512): 11 | w, h = img.size 12 | if w >= h: 13 | new_w = max_size 14 | new_h = int((max_size / w) * h) 15 | else: 16 | new_h = max_size 17 | new_w = int((max_size / h) * w) 18 | return img.resize((new_w, new_h)) 19 | 20 | def c_crop(image): 21 | width, height = image.size 22 | new_size = min(width, height) 23 | left = (width - new_size) / 2 24 | top = (height - new_size) / 2 25 | right = (width + new_size) / 2 26 | bottom = (height + new_size) / 2 27 | return image.crop((left, top, right, bottom)) 28 | 29 | def crop_to_aspect_ratio(image, ratio="16:9"): 30 | width, height = image.size 31 | ratio_map = { 32 | "16:9": (16, 9), 33 | "4:3": (4, 3), 34 | "1:1": (1, 1) 35 | } 36 | target_w, target_h = ratio_map[ratio] 37 | target_ratio_value = target_w / target_h 38 | 39 | current_ratio = width / height 40 | 41 | if current_ratio > target_ratio_value: 42 | new_width = int(height * target_ratio_value) 43 | offset = (width - new_width) // 2 44 | crop_box = (offset, 0, offset + new_width, height) 45 | else: 46 | new_height = int(width / target_ratio_value) 47 | offset = (height - new_height) // 2 48 | crop_box = (0, offset, width, offset + new_height) 49 | 50 | cropped_img = image.crop(crop_box) 51 | return cropped_img 52 | 53 | 54 | class CustomImageDataset(Dataset): 55 | def __init__(self, img_dir, img_size=512, caption_type='json', random_ratio=False): 56 | self.images = [os.path.join(img_dir, i) for i in os.listdir(img_dir) if '.jpg' in i or '.png' in i] 57 | self.images.sort() 58 | self.img_size = img_size 59 | self.caption_type = caption_type 60 | self.random_ratio = random_ratio 61 | 62 | def __len__(self): 63 | return len(self.images) 64 | 65 | def __getitem__(self, idx): 66 | try: 67 | img = Image.open(self.images[idx]).convert('RGB') 68 | if self.random_ratio: 69 | ratio = random.choice(["16:9", "default", "1:1", "4:3"]) 70 | if ratio != "default": 71 | img = crop_to_aspect_ratio(img, ratio) 72 | img = image_resize(img, self.img_size) 73 | w, h = img.size 74 | new_w = (w // 32) * 32 75 | new_h = (h // 32) * 32 76 | img = img.resize((new_w, new_h)) 77 | img = torch.from_numpy((np.array(img) / 127.5) - 1) 78 | img = img.permute(2, 0, 1) 79 | json_path = self.images[idx].split('.')[0] + '.' + self.caption_type 80 | if self.caption_type == "json": 81 | prompt = json.load(open(json_path))['caption'] 82 | else: 83 | prompt = open(json_path).read() 84 | return img, prompt 85 | except Exception as e: 86 | print(e) 87 | return self.__getitem__(random.randint(0, len(self.images) - 1)) 88 | 89 | 90 | def loader(train_batch_size, num_workers, **args): 91 | dataset = CustomImageDataset(**args) 92 | return DataLoader(dataset, batch_size=train_batch_size, num_workers=num_workers, shuffle=True) 93 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.30.1 2 | deepspeed==0.14.4 3 | einops==0.8.0 4 | transformers==4.43.3 5 | huggingface-hub==0.24.5 6 | optimum-quanto 7 | datasets 8 | omegaconf 9 | diffusers 10 | sentencepiece 11 | opencv-python 12 | matplotlib 13 | onnxruntime 14 | torchvision 15 | timm 16 | -------------------------------------------------------------------------------- /src/flux/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | from ._version import version as __version__ # type: ignore 3 | from ._version import version_tuple 4 | except ImportError: 5 | __version__ = "unknown (no version information available)" 6 | version_tuple = (0, 0, "unknown", "noinfo") 7 | 8 | from pathlib import Path 9 | 10 | PACKAGE = __package__.replace("_", "-") 11 | PACKAGE_ROOT = Path(__file__).parent 12 | -------------------------------------------------------------------------------- /src/flux/__main__.py: -------------------------------------------------------------------------------- 1 | from .cli import app 2 | 3 | if __name__ == "__main__": 4 | app() 5 | -------------------------------------------------------------------------------- /src/flux/annotator/canny/__init__.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | 4 | class CannyDetector: 5 | def __call__(self, img, low_threshold, high_threshold): 6 | return cv2.Canny(img, low_threshold, high_threshold) 7 | -------------------------------------------------------------------------------- /src/flux/annotator/ckpts/ckpts.txt: -------------------------------------------------------------------------------- 1 | Weights here. -------------------------------------------------------------------------------- /src/flux/annotator/dwpose/__init__.py: -------------------------------------------------------------------------------- 1 | # Openpose 2 | # Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose 3 | # 2nd Edited by https://github.com/Hzzone/pytorch-openpose 4 | # 3rd Edited by ControlNet 5 | # 4th Edited by ControlNet (added face and correct hands) 6 | 7 | import os 8 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" 9 | 10 | import torch 11 | import numpy as np 12 | from . import util 13 | from .wholebody import Wholebody 14 | 15 | def draw_pose(pose, H, W): 16 | bodies = pose['bodies'] 17 | faces = pose['faces'] 18 | hands = pose['hands'] 19 | candidate = bodies['candidate'] 20 | subset = bodies['subset'] 21 | canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8) 22 | 23 | canvas = util.draw_bodypose(canvas, candidate, subset) 24 | 25 | canvas = util.draw_handpose(canvas, hands) 26 | 27 | canvas = util.draw_facepose(canvas, faces) 28 | 29 | return canvas 30 | 31 | 32 | class DWposeDetector: 33 | def __init__(self, device): 34 | 35 | self.pose_estimation = Wholebody(device) 36 | 37 | def __call__(self, oriImg): 38 | oriImg = oriImg.copy() 39 | H, W, C = oriImg.shape 40 | with torch.no_grad(): 41 | candidate, subset = self.pose_estimation(oriImg) 42 | nums, keys, locs = candidate.shape 43 | candidate[..., 0] /= float(W) 44 | candidate[..., 1] /= float(H) 45 | body = candidate[:,:18].copy() 46 | body = body.reshape(nums*18, locs) 47 | score = subset[:,:18] 48 | for i in range(len(score)): 49 | for j in range(len(score[i])): 50 | if score[i][j] > 0.3: 51 | score[i][j] = int(18*i+j) 52 | else: 53 | score[i][j] = -1 54 | 55 | un_visible = subset<0.3 56 | candidate[un_visible] = -1 57 | 58 | foot = candidate[:,18:24] 59 | 60 | faces = candidate[:,24:92] 61 | 62 | hands = candidate[:,92:113] 63 | hands = np.vstack([hands, candidate[:,113:]]) 64 | 65 | bodies = dict(candidate=body, subset=score) 66 | pose = dict(bodies=bodies, hands=hands, faces=faces) 67 | 68 | return draw_pose(pose, H, W) 69 | -------------------------------------------------------------------------------- /src/flux/annotator/dwpose/wholebody.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | import onnxruntime as ort 5 | from huggingface_hub import hf_hub_download 6 | from .onnxdet import inference_detector 7 | from .onnxpose import inference_pose 8 | 9 | 10 | class Wholebody: 11 | def __init__(self, device="cuda:0"): 12 | providers = ['CPUExecutionProvider'] if device == 'cpu' else ['CUDAExecutionProvider'] 13 | onnx_det = hf_hub_download("yzd-v/DWPose", "yolox_l.onnx") 14 | onnx_pose = hf_hub_download("yzd-v/DWPose", "dw-ll_ucoco_384.onnx") 15 | 16 | self.session_det = ort.InferenceSession(path_or_bytes=onnx_det, providers=providers) 17 | self.session_pose = ort.InferenceSession(path_or_bytes=onnx_pose, providers=providers) 18 | 19 | def __call__(self, oriImg): 20 | det_result = inference_detector(self.session_det, oriImg) 21 | keypoints, scores = inference_pose(self.session_pose, det_result, oriImg) 22 | 23 | keypoints_info = np.concatenate( 24 | (keypoints, scores[..., None]), axis=-1) 25 | # compute neck joint 26 | neck = np.mean(keypoints_info[:, [5, 6]], axis=1) 27 | # neck score when visualizing pred 28 | neck[:, 2:4] = np.logical_and( 29 | keypoints_info[:, 5, 2:4] > 0.3, 30 | keypoints_info[:, 6, 2:4] > 0.3).astype(int) 31 | new_keypoints_info = np.insert( 32 | keypoints_info, 17, neck, axis=1) 33 | mmpose_idx = [ 34 | 17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3 35 | ] 36 | openpose_idx = [ 37 | 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17 38 | ] 39 | new_keypoints_info[:, openpose_idx] = \ 40 | new_keypoints_info[:, mmpose_idx] 41 | keypoints_info = new_keypoints_info 42 | 43 | keypoints, scores = keypoints_info[ 44 | ..., :2], keypoints_info[..., 2] 45 | 46 | return keypoints, scores 47 | 48 | 49 | -------------------------------------------------------------------------------- /src/flux/annotator/midas/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/flux/annotator/midas/__init__.py: -------------------------------------------------------------------------------- 1 | # Midas Depth Estimation 2 | # From https://github.com/isl-org/MiDaS 3 | # MIT LICENSE 4 | 5 | import cv2 6 | import numpy as np 7 | import torch 8 | 9 | from einops import rearrange 10 | from .api import MiDaSInference 11 | 12 | 13 | class MidasDetector: 14 | def __init__(self): 15 | self.model = MiDaSInference(model_type="dpt_hybrid").cuda() 16 | 17 | def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1): 18 | assert input_image.ndim == 3 19 | image_depth = input_image 20 | with torch.no_grad(): 21 | image_depth = torch.from_numpy(image_depth).float().cuda() 22 | image_depth = image_depth / 127.5 - 1.0 23 | image_depth = rearrange(image_depth, 'h w c -> 1 c h w') 24 | depth = self.model(image_depth)[0] 25 | 26 | depth_pt = depth.clone() 27 | depth_pt -= torch.min(depth_pt) 28 | depth_pt /= torch.max(depth_pt) 29 | depth_pt = depth_pt.cpu().numpy() 30 | depth_image = (depth_pt * 255.0).clip(0, 255).astype(np.uint8) 31 | 32 | depth_np = depth.cpu().numpy() 33 | x = cv2.Sobel(depth_np, cv2.CV_32F, 1, 0, ksize=3) 34 | y = cv2.Sobel(depth_np, cv2.CV_32F, 0, 1, ksize=3) 35 | z = np.ones_like(x) * a 36 | x[depth_pt < bg_th] = 0 37 | y[depth_pt < bg_th] = 0 38 | normal = np.stack([x, y, z], axis=2) 39 | normal /= np.sum(normal ** 2.0, axis=2, keepdims=True) ** 0.5 40 | normal_image = (normal * 127.5 + 127.5).clip(0, 255).astype(np.uint8) 41 | 42 | return depth_image, normal_image 43 | -------------------------------------------------------------------------------- /src/flux/annotator/midas/midas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/midas/midas/__init__.py -------------------------------------------------------------------------------- /src/flux/annotator/midas/midas/base_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class BaseModel(torch.nn.Module): 5 | def load(self, path): 6 | """Load model from file. 7 | 8 | Args: 9 | path (str): file path 10 | """ 11 | parameters = torch.load(path, map_location=torch.device('cpu')) 12 | 13 | if "optimizer" in parameters: 14 | parameters = parameters["model"] 15 | 16 | self.load_state_dict(parameters) 17 | -------------------------------------------------------------------------------- /src/flux/annotator/midas/midas/dpt_depth.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .base_model import BaseModel 6 | from .blocks import ( 7 | FeatureFusionBlock, 8 | FeatureFusionBlock_custom, 9 | Interpolate, 10 | _make_encoder, 11 | forward_vit, 12 | ) 13 | 14 | 15 | def _make_fusion_block(features, use_bn): 16 | return FeatureFusionBlock_custom( 17 | features, 18 | nn.ReLU(False), 19 | deconv=False, 20 | bn=use_bn, 21 | expand=False, 22 | align_corners=True, 23 | ) 24 | 25 | 26 | class DPT(BaseModel): 27 | def __init__( 28 | self, 29 | head, 30 | features=256, 31 | backbone="vitb_rn50_384", 32 | readout="project", 33 | channels_last=False, 34 | use_bn=False, 35 | ): 36 | 37 | super(DPT, self).__init__() 38 | 39 | self.channels_last = channels_last 40 | 41 | hooks = { 42 | "vitb_rn50_384": [0, 1, 8, 11], 43 | "vitb16_384": [2, 5, 8, 11], 44 | "vitl16_384": [5, 11, 17, 23], 45 | } 46 | 47 | # Instantiate backbone and reassemble blocks 48 | self.pretrained, self.scratch = _make_encoder( 49 | backbone, 50 | features, 51 | False, # Set to true of you want to train from scratch, uses ImageNet weights 52 | groups=1, 53 | expand=False, 54 | exportable=False, 55 | hooks=hooks[backbone], 56 | use_readout=readout, 57 | ) 58 | 59 | self.scratch.refinenet1 = _make_fusion_block(features, use_bn) 60 | self.scratch.refinenet2 = _make_fusion_block(features, use_bn) 61 | self.scratch.refinenet3 = _make_fusion_block(features, use_bn) 62 | self.scratch.refinenet4 = _make_fusion_block(features, use_bn) 63 | 64 | self.scratch.output_conv = head 65 | 66 | 67 | def forward(self, x): 68 | if self.channels_last == True: 69 | x.contiguous(memory_format=torch.channels_last) 70 | 71 | layer_1, layer_2, layer_3, layer_4 = forward_vit(self.pretrained, x) 72 | 73 | layer_1_rn = self.scratch.layer1_rn(layer_1) 74 | layer_2_rn = self.scratch.layer2_rn(layer_2) 75 | layer_3_rn = self.scratch.layer3_rn(layer_3) 76 | layer_4_rn = self.scratch.layer4_rn(layer_4) 77 | 78 | path_4 = self.scratch.refinenet4(layer_4_rn) 79 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn) 80 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn) 81 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn) 82 | 83 | out = self.scratch.output_conv(path_1) 84 | 85 | return out 86 | 87 | 88 | class DPTDepthModel(DPT): 89 | def __init__(self, path=None, non_negative=True, **kwargs): 90 | features = kwargs["features"] if "features" in kwargs else 256 91 | 92 | head = nn.Sequential( 93 | nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1), 94 | Interpolate(scale_factor=2, mode="bilinear", align_corners=True), 95 | nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1), 96 | nn.ReLU(True), 97 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), 98 | nn.ReLU(True) if non_negative else nn.Identity(), 99 | nn.Identity(), 100 | ) 101 | 102 | super().__init__(head, **kwargs) 103 | 104 | if path is not None: 105 | self.load(path) 106 | 107 | def forward(self, x): 108 | return super().forward(x).squeeze(dim=1) 109 | 110 | -------------------------------------------------------------------------------- /src/flux/annotator/midas/midas/midas_net.py: -------------------------------------------------------------------------------- 1 | """MidashNet: Network for monocular depth estimation trained by mixing several datasets. 2 | This file contains code that is adapted from 3 | https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .base_model import BaseModel 9 | from .blocks import FeatureFusionBlock, Interpolate, _make_encoder 10 | 11 | 12 | class MidasNet(BaseModel): 13 | """Network for monocular depth estimation. 14 | """ 15 | 16 | def __init__(self, path=None, features=256, non_negative=True): 17 | """Init. 18 | 19 | Args: 20 | path (str, optional): Path to saved model. Defaults to None. 21 | features (int, optional): Number of features. Defaults to 256. 22 | backbone (str, optional): Backbone network for encoder. Defaults to resnet50 23 | """ 24 | print("Loading weights: ", path) 25 | 26 | super(MidasNet, self).__init__() 27 | 28 | use_pretrained = False if path is None else True 29 | 30 | self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) 31 | 32 | self.scratch.refinenet4 = FeatureFusionBlock(features) 33 | self.scratch.refinenet3 = FeatureFusionBlock(features) 34 | self.scratch.refinenet2 = FeatureFusionBlock(features) 35 | self.scratch.refinenet1 = FeatureFusionBlock(features) 36 | 37 | self.scratch.output_conv = nn.Sequential( 38 | nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), 39 | Interpolate(scale_factor=2, mode="bilinear"), 40 | nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), 41 | nn.ReLU(True), 42 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), 43 | nn.ReLU(True) if non_negative else nn.Identity(), 44 | ) 45 | 46 | if path: 47 | self.load(path) 48 | 49 | def forward(self, x): 50 | """Forward pass. 51 | 52 | Args: 53 | x (tensor): input data (image) 54 | 55 | Returns: 56 | tensor: depth 57 | """ 58 | 59 | layer_1 = self.pretrained.layer1(x) 60 | layer_2 = self.pretrained.layer2(layer_1) 61 | layer_3 = self.pretrained.layer3(layer_2) 62 | layer_4 = self.pretrained.layer4(layer_3) 63 | 64 | layer_1_rn = self.scratch.layer1_rn(layer_1) 65 | layer_2_rn = self.scratch.layer2_rn(layer_2) 66 | layer_3_rn = self.scratch.layer3_rn(layer_3) 67 | layer_4_rn = self.scratch.layer4_rn(layer_4) 68 | 69 | path_4 = self.scratch.refinenet4(layer_4_rn) 70 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn) 71 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn) 72 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn) 73 | 74 | out = self.scratch.output_conv(path_1) 75 | 76 | return torch.squeeze(out, dim=1) 77 | -------------------------------------------------------------------------------- /src/flux/annotator/mlsd/__init__.py: -------------------------------------------------------------------------------- 1 | # MLSD Line Detection 2 | # From https://github.com/navervision/mlsd 3 | # Apache-2.0 license 4 | 5 | import cv2 6 | import numpy as np 7 | import torch 8 | import os 9 | 10 | from einops import rearrange 11 | from huggingface_hub import hf_hub_download 12 | from .models.mbv2_mlsd_tiny import MobileV2_MLSD_Tiny 13 | from .models.mbv2_mlsd_large import MobileV2_MLSD_Large 14 | from .utils import pred_lines 15 | 16 | from ...annotator.util import annotator_ckpts_path 17 | 18 | 19 | class MLSDdetector: 20 | def __init__(self): 21 | model_path = os.path.join(annotator_ckpts_path, "mlsd_large_512_fp32.pth") 22 | if not os.path.exists(model_path): 23 | model_path = hf_hub_download("lllyasviel/Annotators", "mlsd_large_512_fp32.pth") 24 | model = MobileV2_MLSD_Large() 25 | model.load_state_dict(torch.load(model_path), strict=True) 26 | self.model = model.cuda().eval() 27 | 28 | def __call__(self, input_image, thr_v, thr_d): 29 | assert input_image.ndim == 3 30 | img = input_image 31 | img_output = np.zeros_like(img) 32 | try: 33 | with torch.no_grad(): 34 | lines = pred_lines(img, self.model, [img.shape[0], img.shape[1]], thr_v, thr_d) 35 | for line in lines: 36 | x_start, y_start, x_end, y_end = [int(val) for val in line] 37 | cv2.line(img_output, (x_start, y_start), (x_end, y_end), [255, 255, 255], 1) 38 | except Exception as e: 39 | pass 40 | return img_output[:, :, 0] 41 | -------------------------------------------------------------------------------- /src/flux/annotator/tile/__init__.py: -------------------------------------------------------------------------------- 1 | import random 2 | import cv2 3 | from .guided_filter import FastGuidedFilter 4 | 5 | 6 | class TileDetector: 7 | # https://huggingface.co/xinsir/controlnet-tile-sdxl-1.0 8 | def __init__(self): 9 | pass 10 | 11 | def __call__(self, image): 12 | blur_strength = random.sample([i / 10. for i in range(10, 201, 2)], k=1)[0] 13 | radius = random.sample([i for i in range(1, 40, 2)], k=1)[0] 14 | eps = random.sample([i / 1000. for i in range(1, 101, 2)], k=1)[0] 15 | scale_factor = random.sample([i / 10. for i in range(10, 181, 5)], k=1)[0] 16 | 17 | ksize = int(blur_strength) 18 | if ksize % 2 == 0: 19 | ksize += 1 20 | 21 | if random.random() > 0.5: 22 | image = cv2.GaussianBlur(image, (ksize, ksize), blur_strength / 2) 23 | if random.random() > 0.5: 24 | filter = FastGuidedFilter(image, radius, eps, scale_factor) 25 | image = filter.filter(image) 26 | return image 27 | -------------------------------------------------------------------------------- /src/flux/annotator/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | 5 | 6 | annotator_ckpts_path = os.path.join(os.path.dirname(__file__), 'ckpts') 7 | 8 | 9 | def HWC3(x): 10 | assert x.dtype == np.uint8 11 | if x.ndim == 2: 12 | x = x[:, :, None] 13 | assert x.ndim == 3 14 | H, W, C = x.shape 15 | assert C == 1 or C == 3 or C == 4 16 | if C == 3: 17 | return x 18 | if C == 1: 19 | return np.concatenate([x, x, x], axis=2) 20 | if C == 4: 21 | color = x[:, :, 0:3].astype(np.float32) 22 | alpha = x[:, :, 3:4].astype(np.float32) / 255.0 23 | y = color * alpha + 255.0 * (1.0 - alpha) 24 | y = y.clip(0, 255).astype(np.uint8) 25 | return y 26 | 27 | 28 | def resize_image(input_image, resolution): 29 | H, W, C = input_image.shape 30 | H = float(H) 31 | W = float(W) 32 | k = float(resolution) / min(H, W) 33 | H *= k 34 | W *= k 35 | H = int(np.round(H / 64.0)) * 64 36 | W = int(np.round(W / 64.0)) * 64 37 | img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA) 38 | return img 39 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /src/flux/annotator/zoe/__init__.py: -------------------------------------------------------------------------------- 1 | # ZoeDepth 2 | # https://github.com/isl-org/ZoeDepth 3 | 4 | import os 5 | import cv2 6 | import numpy as np 7 | import torch 8 | 9 | from einops import rearrange 10 | from .zoedepth.models.zoedepth.zoedepth_v1 import ZoeDepth 11 | from .zoedepth.utils.config import get_config 12 | from ...annotator.util import annotator_ckpts_path 13 | from huggingface_hub import hf_hub_download 14 | 15 | 16 | class ZoeDetector: 17 | def __init__(self): 18 | model_path = os.path.join(annotator_ckpts_path, "ZoeD_M12_N.pt") 19 | if not os.path.exists(model_path): 20 | model_path = hf_hub_download("lllyasviel/Annotators", "ZoeD_M12_N.pt") 21 | conf = get_config("zoedepth", "infer") 22 | model = ZoeDepth.build_from_config(conf) 23 | model.load_state_dict(torch.load(model_path)['model'], strict=False) 24 | model = model.cuda() 25 | model.device = 'cuda' 26 | model.eval() 27 | self.model = model 28 | 29 | def __call__(self, input_image): 30 | assert input_image.ndim == 3 31 | image_depth = input_image 32 | with torch.no_grad(): 33 | image_depth = torch.from_numpy(image_depth).float().cuda() 34 | image_depth = image_depth / 255.0 35 | image_depth = rearrange(image_depth, 'h w c -> 1 c h w') 36 | depth = self.model.infer(image_depth) 37 | 38 | depth = depth[0, 0].cpu().numpy() 39 | 40 | vmin = np.percentile(depth, 2) 41 | vmax = np.percentile(depth, 85) 42 | 43 | depth -= vmin 44 | depth /= vmax - vmin 45 | depth = 1.0 - depth 46 | depth_image = (depth * 255.0).clip(0, 255).astype(np.uint8) 47 | 48 | return depth_image 49 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/data/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/data/ibims.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import os 26 | 27 | import numpy as np 28 | import torch 29 | from PIL import Image 30 | from torch.utils.data import DataLoader, Dataset 31 | from torchvision import transforms as T 32 | 33 | 34 | class iBims(Dataset): 35 | def __init__(self, config): 36 | root_folder = config.ibims_root 37 | with open(os.path.join(root_folder, "imagelist.txt"), 'r') as f: 38 | imglist = f.read().split() 39 | 40 | samples = [] 41 | for basename in imglist: 42 | img_path = os.path.join(root_folder, 'rgb', basename + ".png") 43 | depth_path = os.path.join(root_folder, 'depth', basename + ".png") 44 | valid_mask_path = os.path.join( 45 | root_folder, 'mask_invalid', basename+".png") 46 | transp_mask_path = os.path.join( 47 | root_folder, 'mask_transp', basename+".png") 48 | 49 | samples.append( 50 | (img_path, depth_path, valid_mask_path, transp_mask_path)) 51 | 52 | self.samples = samples 53 | # self.normalize = T.Normalize( 54 | # mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 55 | self.normalize = lambda x : x 56 | 57 | def __getitem__(self, idx): 58 | img_path, depth_path, valid_mask_path, transp_mask_path = self.samples[idx] 59 | 60 | img = np.asarray(Image.open(img_path), dtype=np.float32) / 255.0 61 | depth = np.asarray(Image.open(depth_path), 62 | dtype=np.uint16).astype('float')*50.0/65535 63 | 64 | mask_valid = np.asarray(Image.open(valid_mask_path)) 65 | mask_transp = np.asarray(Image.open(transp_mask_path)) 66 | 67 | # depth = depth * mask_valid * mask_transp 68 | depth = np.where(mask_valid * mask_transp, depth, -1) 69 | 70 | img = torch.from_numpy(img).permute(2, 0, 1) 71 | img = self.normalize(img) 72 | depth = torch.from_numpy(depth).unsqueeze(0) 73 | return dict(image=img, depth=depth, image_path=img_path, depth_path=depth_path, dataset='ibims') 74 | 75 | def __len__(self): 76 | return len(self.samples) 77 | 78 | 79 | def get_ibims_loader(config, batch_size=1, **kwargs): 80 | dataloader = DataLoader(iBims(config), batch_size=batch_size, **kwargs) 81 | return dataloader 82 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | *.png 107 | *.pfm 108 | *.jpg 109 | *.jpeg 110 | *.pt -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/Dockerfile: -------------------------------------------------------------------------------- 1 | # enables cuda support in docker 2 | FROM nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04 3 | 4 | # install python 3.6, pip and requirements for opencv-python 5 | # (see https://github.com/NVIDIA/nvidia-docker/issues/864) 6 | RUN apt-get update && apt-get -y install \ 7 | python3 \ 8 | python3-pip \ 9 | libsm6 \ 10 | libxext6 \ 11 | libxrender-dev \ 12 | curl \ 13 | && rm -rf /var/lib/apt/lists/* 14 | 15 | # install python dependencies 16 | RUN pip3 install --upgrade pip 17 | RUN pip3 install torch~=1.8 torchvision opencv-python-headless~=3.4 timm 18 | 19 | # copy inference code 20 | WORKDIR /opt/MiDaS 21 | COPY ./midas ./midas 22 | COPY ./*.py ./ 23 | 24 | # download model weights so the docker image can be used offline 25 | RUN cd weights && {curl -OL https://github.com/isl-org/MiDaS/releases/download/v3/dpt_hybrid_384.pt; cd -; } 26 | RUN python3 run.py --model_type dpt_hybrid; exit 0 27 | 28 | # entrypoint (dont forget to mount input and output directories) 29 | CMD python3 run.py --model_type dpt_hybrid 30 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/environment.yaml: -------------------------------------------------------------------------------- 1 | name: midas-py310 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - nvidia::cudatoolkit=11.7 7 | - python=3.10.8 8 | - pytorch::pytorch=1.13.0 9 | - torchvision=0.14.0 10 | - pip=22.3.1 11 | - numpy=1.23.4 12 | - pip: 13 | - opencv-python==4.6.0.66 14 | - imutils==0.5.4 15 | - timm==0.6.12 16 | - einops==0.6.0 -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/input/.placeholder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/input/.placeholder -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/levit.py: -------------------------------------------------------------------------------- 1 | import timm 2 | import torch 3 | import torch.nn as nn 4 | import numpy as np 5 | 6 | from .utils import activations, get_activation, Transpose 7 | 8 | 9 | def forward_levit(pretrained, x): 10 | pretrained.model.forward_features(x) 11 | 12 | layer_1 = pretrained.activations["1"] 13 | layer_2 = pretrained.activations["2"] 14 | layer_3 = pretrained.activations["3"] 15 | 16 | layer_1 = pretrained.act_postprocess1(layer_1) 17 | layer_2 = pretrained.act_postprocess2(layer_2) 18 | layer_3 = pretrained.act_postprocess3(layer_3) 19 | 20 | return layer_1, layer_2, layer_3 21 | 22 | 23 | def _make_levit_backbone( 24 | model, 25 | hooks=[3, 11, 21], 26 | patch_grid=[14, 14] 27 | ): 28 | pretrained = nn.Module() 29 | 30 | pretrained.model = model 31 | pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) 32 | pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) 33 | pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) 34 | 35 | pretrained.activations = activations 36 | 37 | patch_grid_size = np.array(patch_grid, dtype=int) 38 | 39 | pretrained.act_postprocess1 = nn.Sequential( 40 | Transpose(1, 2), 41 | nn.Unflatten(2, torch.Size(patch_grid_size.tolist())) 42 | ) 43 | pretrained.act_postprocess2 = nn.Sequential( 44 | Transpose(1, 2), 45 | nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 2).astype(int)).tolist())) 46 | ) 47 | pretrained.act_postprocess3 = nn.Sequential( 48 | Transpose(1, 2), 49 | nn.Unflatten(2, torch.Size((np.ceil(patch_grid_size / 4).astype(int)).tolist())) 50 | ) 51 | 52 | return pretrained 53 | 54 | 55 | class ConvTransposeNorm(nn.Sequential): 56 | """ 57 | Modification of 58 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: ConvNorm 59 | such that ConvTranspose2d is used instead of Conv2d. 60 | """ 61 | 62 | def __init__( 63 | self, in_chs, out_chs, kernel_size=1, stride=1, pad=0, dilation=1, 64 | groups=1, bn_weight_init=1): 65 | super().__init__() 66 | self.add_module('c', 67 | nn.ConvTranspose2d(in_chs, out_chs, kernel_size, stride, pad, dilation, groups, bias=False)) 68 | self.add_module('bn', nn.BatchNorm2d(out_chs)) 69 | 70 | nn.init.constant_(self.bn.weight, bn_weight_init) 71 | 72 | @torch.no_grad() 73 | def fuse(self): 74 | c, bn = self._modules.values() 75 | w = bn.weight / (bn.running_var + bn.eps) ** 0.5 76 | w = c.weight * w[:, None, None, None] 77 | b = bn.bias - bn.running_mean * bn.weight / (bn.running_var + bn.eps) ** 0.5 78 | m = nn.ConvTranspose2d( 79 | w.size(1), w.size(0), w.shape[2:], stride=self.c.stride, 80 | padding=self.c.padding, dilation=self.c.dilation, groups=self.c.groups) 81 | m.weight.data.copy_(w) 82 | m.bias.data.copy_(b) 83 | return m 84 | 85 | 86 | def stem_b4_transpose(in_chs, out_chs, activation): 87 | """ 88 | Modification of 89 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/levit.py: stem_b16 90 | such that ConvTranspose2d is used instead of Conv2d and stem is also reduced to the half. 91 | """ 92 | return nn.Sequential( 93 | ConvTransposeNorm(in_chs, out_chs, 3, 2, 1), 94 | activation(), 95 | ConvTransposeNorm(out_chs, out_chs // 2, 3, 2, 1), 96 | activation()) 97 | 98 | 99 | def _make_pretrained_levit_384(pretrained, hooks=None): 100 | model = timm.create_model("levit_384", pretrained=pretrained) 101 | 102 | hooks = [3, 11, 21] if hooks == None else hooks 103 | return _make_levit_backbone( 104 | model, 105 | hooks=hooks 106 | ) 107 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/next_vit.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | import torch.nn as nn 4 | 5 | from pathlib import Path 6 | from .utils import activations, forward_default, get_activation 7 | 8 | from ..external.next_vit.classification.nextvit import * 9 | 10 | 11 | def forward_next_vit(pretrained, x): 12 | return forward_default(pretrained, x, "forward") 13 | 14 | 15 | def _make_next_vit_backbone( 16 | model, 17 | hooks=[2, 6, 36, 39], 18 | ): 19 | pretrained = nn.Module() 20 | 21 | pretrained.model = model 22 | pretrained.model.features[hooks[0]].register_forward_hook(get_activation("1")) 23 | pretrained.model.features[hooks[1]].register_forward_hook(get_activation("2")) 24 | pretrained.model.features[hooks[2]].register_forward_hook(get_activation("3")) 25 | pretrained.model.features[hooks[3]].register_forward_hook(get_activation("4")) 26 | 27 | pretrained.activations = activations 28 | 29 | return pretrained 30 | 31 | 32 | def _make_pretrained_next_vit_large_6m(hooks=None): 33 | model = timm.create_model("nextvit_large") 34 | 35 | hooks = [2, 6, 36, 39] if hooks == None else hooks 36 | return _make_next_vit_backbone( 37 | model, 38 | hooks=hooks, 39 | ) 40 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | from .swin_common import _make_swin_backbone 4 | 5 | 6 | def _make_pretrained_swinl12_384(pretrained, hooks=None): 7 | model = timm.create_model("swin_large_patch4_window12_384", pretrained=pretrained) 8 | 9 | hooks = [1, 1, 17, 1] if hooks == None else hooks 10 | return _make_swin_backbone( 11 | model, 12 | hooks=hooks 13 | ) 14 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin2.py: -------------------------------------------------------------------------------- 1 | import timm 2 | 3 | from .swin_common import _make_swin_backbone 4 | 5 | 6 | def _make_pretrained_swin2l24_384(pretrained, hooks=None): 7 | model = timm.create_model("swinv2_large_window12to24_192to384_22kft1k", pretrained=pretrained) 8 | 9 | hooks = [1, 1, 17, 1] if hooks == None else hooks 10 | return _make_swin_backbone( 11 | model, 12 | hooks=hooks 13 | ) 14 | 15 | 16 | def _make_pretrained_swin2b24_384(pretrained, hooks=None): 17 | model = timm.create_model("swinv2_base_window12to24_192to384_22kft1k", pretrained=pretrained) 18 | 19 | hooks = [1, 1, 17, 1] if hooks == None else hooks 20 | return _make_swin_backbone( 21 | model, 22 | hooks=hooks 23 | ) 24 | 25 | 26 | def _make_pretrained_swin2t16_256(pretrained, hooks=None): 27 | model = timm.create_model("swinv2_tiny_window16_256", pretrained=pretrained) 28 | 29 | hooks = [1, 1, 5, 1] if hooks == None else hooks 30 | return _make_swin_backbone( 31 | model, 32 | hooks=hooks, 33 | patch_grid=[64, 64] 34 | ) 35 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin_common.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import torch.nn as nn 4 | import numpy as np 5 | 6 | from .utils import activations, forward_default, get_activation, Transpose 7 | 8 | 9 | def forward_swin(pretrained, x): 10 | return forward_default(pretrained, x) 11 | 12 | 13 | def _make_swin_backbone( 14 | model, 15 | hooks=[1, 1, 17, 1], 16 | patch_grid=[96, 96] 17 | ): 18 | pretrained = nn.Module() 19 | 20 | pretrained.model = model 21 | pretrained.model.layers[0].blocks[hooks[0]].register_forward_hook(get_activation("1")) 22 | pretrained.model.layers[1].blocks[hooks[1]].register_forward_hook(get_activation("2")) 23 | pretrained.model.layers[2].blocks[hooks[2]].register_forward_hook(get_activation("3")) 24 | pretrained.model.layers[3].blocks[hooks[3]].register_forward_hook(get_activation("4")) 25 | 26 | pretrained.activations = activations 27 | 28 | if hasattr(model, "patch_grid"): 29 | used_patch_grid = model.patch_grid 30 | else: 31 | used_patch_grid = patch_grid 32 | 33 | patch_grid_size = np.array(used_patch_grid, dtype=int) 34 | 35 | pretrained.act_postprocess1 = nn.Sequential( 36 | Transpose(1, 2), 37 | nn.Unflatten(2, torch.Size(patch_grid_size.tolist())) 38 | ) 39 | pretrained.act_postprocess2 = nn.Sequential( 40 | Transpose(1, 2), 41 | nn.Unflatten(2, torch.Size((patch_grid_size // 2).tolist())) 42 | ) 43 | pretrained.act_postprocess3 = nn.Sequential( 44 | Transpose(1, 2), 45 | nn.Unflatten(2, torch.Size((patch_grid_size // 4).tolist())) 46 | ) 47 | pretrained.act_postprocess4 = nn.Sequential( 48 | Transpose(1, 2), 49 | nn.Unflatten(2, torch.Size((patch_grid_size // 8).tolist())) 50 | ) 51 | 52 | return pretrained 53 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/base_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class BaseModel(torch.nn.Module): 5 | def load(self, path): 6 | """Load model from file. 7 | 8 | Args: 9 | path (str): file path 10 | """ 11 | parameters = torch.load(path, map_location=torch.device('cpu')) 12 | 13 | if "optimizer" in parameters: 14 | parameters = parameters["model"] 15 | 16 | self.load_state_dict(parameters) 17 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net.py: -------------------------------------------------------------------------------- 1 | """MidashNet: Network for monocular depth estimation trained by mixing several datasets. 2 | This file contains code that is adapted from 3 | https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | 8 | from .base_model import BaseModel 9 | from .blocks import FeatureFusionBlock, Interpolate, _make_encoder 10 | 11 | 12 | class MidasNet(BaseModel): 13 | """Network for monocular depth estimation. 14 | """ 15 | 16 | def __init__(self, path=None, features=256, non_negative=True): 17 | """Init. 18 | 19 | Args: 20 | path (str, optional): Path to saved model. Defaults to None. 21 | features (int, optional): Number of features. Defaults to 256. 22 | backbone (str, optional): Backbone network for encoder. Defaults to resnet50 23 | """ 24 | print("Loading weights: ", path) 25 | 26 | super(MidasNet, self).__init__() 27 | 28 | use_pretrained = False if path is None else True 29 | 30 | self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) 31 | 32 | self.scratch.refinenet4 = FeatureFusionBlock(features) 33 | self.scratch.refinenet3 = FeatureFusionBlock(features) 34 | self.scratch.refinenet2 = FeatureFusionBlock(features) 35 | self.scratch.refinenet1 = FeatureFusionBlock(features) 36 | 37 | self.scratch.output_conv = nn.Sequential( 38 | nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), 39 | Interpolate(scale_factor=2, mode="bilinear"), 40 | nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), 41 | nn.ReLU(True), 42 | nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), 43 | nn.ReLU(True) if non_negative else nn.Identity(), 44 | ) 45 | 46 | if path: 47 | self.load(path) 48 | 49 | def forward(self, x): 50 | """Forward pass. 51 | 52 | Args: 53 | x (tensor): input data (image) 54 | 55 | Returns: 56 | tensor: depth 57 | """ 58 | 59 | layer_1 = self.pretrained.layer1(x) 60 | layer_2 = self.pretrained.layer2(layer_1) 61 | layer_3 = self.pretrained.layer3(layer_2) 62 | layer_4 = self.pretrained.layer4(layer_3) 63 | 64 | layer_1_rn = self.scratch.layer1_rn(layer_1) 65 | layer_2_rn = self.scratch.layer2_rn(layer_2) 66 | layer_3_rn = self.scratch.layer3_rn(layer_3) 67 | layer_4_rn = self.scratch.layer4_rn(layer_4) 68 | 69 | path_4 = self.scratch.refinenet4(layer_4_rn) 70 | path_3 = self.scratch.refinenet3(path_4, layer_3_rn) 71 | path_2 = self.scratch.refinenet2(path_3, layer_2_rn) 72 | path_1 = self.scratch.refinenet1(path_2, layer_1_rn) 73 | 74 | out = self.scratch.output_conv(path_1) 75 | 76 | return torch.squeeze(out, dim=1) 77 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/README.md: -------------------------------------------------------------------------------- 1 | ## Mobile version of MiDaS for iOS / Android - Monocular Depth Estimation 2 | 3 | ### Accuracy 4 | 5 | * Old small model - ResNet50 default-decoder 384x384 6 | * New small model - EfficientNet-Lite3 small-decoder 256x256 7 | 8 | **Zero-shot error** (the lower - the better): 9 | 10 | | Model | DIW WHDR | Eth3d AbsRel | Sintel AbsRel | Kitti δ>1.25 | NyuDepthV2 δ>1.25 | TUM δ>1.25 | 11 | |---|---|---|---|---|---|---| 12 | | Old small model 384x384 | **0.1248** | 0.1550 | **0.3300** | **21.81** | 15.73 | 17.00 | 13 | | New small model 256x256 | 0.1344 | **0.1344** | 0.3370 | 29.27 | **13.43** | **14.53** | 14 | | Relative improvement, % | -8 % | **+13 %** | -2 % | -34 % | **+15 %** | **+15 %** | 15 | 16 | None of Train/Valid/Test subsets of datasets (DIW, Eth3d, Sintel, Kitti, NyuDepthV2, TUM) were not involved in Training or Fine Tuning. 17 | 18 | ### Inference speed (FPS) on iOS / Android 19 | 20 | **Frames Per Second** (the higher - the better): 21 | 22 | | Model | iPhone CPU | iPhone GPU | iPhone NPU | OnePlus8 CPU | OnePlus8 GPU | OnePlus8 NNAPI | 23 | |---|---|---|---|---|---|---| 24 | | Old small model 384x384 | 0.6 | N/A | N/A | 0.45 | 0.50 | 0.50 | 25 | | New small model 256x256 | 8 | 22 | **30** | 6 | **22** | 4 | 26 | | SpeedUp, X times | **12.8x** | - | - | **13.2x** | **44x** | **8x** | 27 | 28 | N/A - run-time error (no data available) 29 | 30 | 31 | #### Models: 32 | 33 | * Old small model - ResNet50 default-decoder 1x384x384x3, batch=1 FP32 (converters: Pytorch -> ONNX - [onnx_tf](https://github.com/onnx/onnx-tensorflow) -> (saved model) PB -> TFlite) 34 | 35 | (Trained on datasets: RedWeb, MegaDepth, WSVD, 3D Movies, DIML indoor) 36 | 37 | * New small model - EfficientNet-Lite3 small-decoder 1x256x256x3, batch=1 FP32 (custom converter: Pytorch -> TFlite) 38 | 39 | (Trained on datasets: RedWeb, MegaDepth, WSVD, 3D Movies, DIML indoor, HRWSI, IRS, TartanAir, BlendedMVS, ApolloScape) 40 | 41 | #### Frameworks for training and conversions: 42 | ``` 43 | pip install torch==1.6.0 torchvision==0.7.0 44 | pip install tf-nightly-gpu==2.5.0.dev20201031 tensorflow-addons==0.11.2 numpy==1.18.0 45 | git clone --depth 1 --branch v1.6.0 https://github.com/onnx/onnx-tensorflow 46 | ``` 47 | 48 | #### SoC - OS - Library: 49 | 50 | * iPhone 11 (A13 Bionic) - iOS 13.7 - TensorFlowLiteSwift 0.0.1-nightly 51 | * OnePlus 8 (Snapdragon 865) - Andoird 10 - org.tensorflow:tensorflow-lite-task-vision:0.0.0-nightly 52 | 53 | 54 | ### Citation 55 | 56 | This repository contains code to compute depth from a single image. It accompanies our [paper](https://arxiv.org/abs/1907.01341v3): 57 | 58 | >Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer 59 | René Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, Vladlen Koltun 60 | 61 | Please cite our paper if you use this code or any of the models: 62 | ``` 63 | @article{Ranftl2020, 64 | author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun}, 65 | title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer}, 66 | journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)}, 67 | year = {2020}, 68 | } 69 | ``` 70 | 71 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .gradle 3 | /local.properties 4 | /.idea/libraries 5 | /.idea/modules.xml 6 | /.idea/workspace.xml 7 | .DS_Store 8 | /build 9 | /captures 10 | .externalNativeBuild 11 | 12 | /.gradle/ 13 | /.idea/ -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Alexey 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/README.md: -------------------------------------------------------------------------------- 1 | # MiDaS on Android smartphone by using TensorFlow-lite (TFLite) 2 | 3 | 4 | * Either use Android Studio for compilation. 5 | 6 | * Or use ready to install apk-file: 7 | * Or use URL: https://i.diawi.com/CVb8a9 8 | * Or use QR-code: 9 | 10 | Scan QR-code or open URL -> Press `Install application` -> Press `Download` and wait for download -> Open -> Install -> Open -> Press: Allow MiDaS to take photo and video from the camera While using the APP 11 | 12 | ![CVb8a9](https://user-images.githubusercontent.com/4096485/97727213-38552500-1ae1-11eb-8b76-4ea11216f76d.png) 13 | 14 | ---- 15 | 16 | To use another model, you should convert it to `model_opt.tflite` and place it to the directory: `models\src\main\assets` 17 | 18 | 19 | ---- 20 | 21 | Original repository: https://github.com/isl-org/MiDaS 22 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | 3 | /build/ -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'com.android.application' 2 | 3 | android { 4 | compileSdkVersion 28 5 | defaultConfig { 6 | applicationId "org.tensorflow.lite.examples.classification" 7 | minSdkVersion 21 8 | targetSdkVersion 28 9 | versionCode 1 10 | versionName "1.0" 11 | 12 | testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" 13 | } 14 | buildTypes { 15 | release { 16 | minifyEnabled false 17 | proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' 18 | } 19 | } 20 | aaptOptions { 21 | noCompress "tflite" 22 | } 23 | compileOptions { 24 | sourceCompatibility = '1.8' 25 | targetCompatibility = '1.8' 26 | } 27 | lintOptions { 28 | abortOnError false 29 | } 30 | flavorDimensions "tfliteInference" 31 | productFlavors { 32 | // The TFLite inference is built using the TFLite Support library. 33 | support { 34 | dimension "tfliteInference" 35 | } 36 | // The TFLite inference is built using the TFLite Task library. 37 | taskApi { 38 | dimension "tfliteInference" 39 | } 40 | } 41 | 42 | } 43 | 44 | dependencies { 45 | implementation fileTree(dir: 'libs', include: ['*.jar']) 46 | supportImplementation project(":lib_support") 47 | taskApiImplementation project(":lib_task_api") 48 | implementation 'androidx.appcompat:appcompat:1.0.0' 49 | implementation 'androidx.coordinatorlayout:coordinatorlayout:1.0.0' 50 | implementation 'com.google.android.material:material:1.0.0' 51 | 52 | androidTestImplementation 'androidx.test.ext:junit:1.1.1' 53 | androidTestImplementation 'com.google.truth:truth:1.0.1' 54 | androidTestImplementation 'androidx.test:runner:1.2.0' 55 | androidTestImplementation 'androidx.test:rules:1.1.0' 56 | } 57 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # You can control the set of applied configuration files using the 3 | # proguardFiles setting in build.gradle. 4 | # 5 | # For more details, see 6 | # http://developer.android.com/guide/developing/tools/proguard.html 7 | 8 | # If your project uses WebView with JS, uncomment the following 9 | # and specify the fully qualified class name to the JavaScript interface 10 | # class: 11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 12 | # public *; 13 | #} 14 | 15 | # Uncomment this to preserve the line number information for 16 | # debugging stack traces. 17 | #-keepattributes SourceFile,LineNumberTable 18 | 19 | # If you keep the line number information, uncomment this to 20 | # hide the original source file name. 21 | #-renamesourcefileattribute SourceFile 22 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/androidTest/assets/fox-mobilenet_v1_1.0_224_support.txt: -------------------------------------------------------------------------------- 1 | red_fox 0.79403335 2 | kit_fox 0.16753247 3 | grey_fox 0.03619214 4 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/androidTest/assets/fox-mobilenet_v1_1.0_224_task_api.txt: -------------------------------------------------------------------------------- 1 | red_fox 0.85 2 | kit_fox 0.13 3 | grey_fox 0.02 4 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/androidTest/java/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 18 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/AutoFitTextureView.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 The TensorFlow Authors. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.tensorflow.lite.examples.classification.customview; 18 | 19 | import android.content.Context; 20 | import android.util.AttributeSet; 21 | import android.view.TextureView; 22 | 23 | /** A {@link TextureView} that can be adjusted to a specified aspect ratio. */ 24 | public class AutoFitTextureView extends TextureView { 25 | private int ratioWidth = 0; 26 | private int ratioHeight = 0; 27 | 28 | public AutoFitTextureView(final Context context) { 29 | this(context, null); 30 | } 31 | 32 | public AutoFitTextureView(final Context context, final AttributeSet attrs) { 33 | this(context, attrs, 0); 34 | } 35 | 36 | public AutoFitTextureView(final Context context, final AttributeSet attrs, final int defStyle) { 37 | super(context, attrs, defStyle); 38 | } 39 | 40 | /** 41 | * Sets the aspect ratio for this view. The size of the view will be measured based on the ratio 42 | * calculated from the parameters. Note that the actual sizes of parameters don't matter, that is, 43 | * calling setAspectRatio(2, 3) and setAspectRatio(4, 6) make the same result. 44 | * 45 | * @param width Relative horizontal size 46 | * @param height Relative vertical size 47 | */ 48 | public void setAspectRatio(final int width, final int height) { 49 | if (width < 0 || height < 0) { 50 | throw new IllegalArgumentException("Size cannot be negative."); 51 | } 52 | ratioWidth = width; 53 | ratioHeight = height; 54 | requestLayout(); 55 | } 56 | 57 | @Override 58 | protected void onMeasure(final int widthMeasureSpec, final int heightMeasureSpec) { 59 | super.onMeasure(widthMeasureSpec, heightMeasureSpec); 60 | final int width = MeasureSpec.getSize(widthMeasureSpec); 61 | final int height = MeasureSpec.getSize(heightMeasureSpec); 62 | if (0 == ratioWidth || 0 == ratioHeight) { 63 | setMeasuredDimension(width, height); 64 | } else { 65 | if (width < height * ratioWidth / ratioHeight) { 66 | setMeasuredDimension(width, width * ratioHeight / ratioWidth); 67 | } else { 68 | setMeasuredDimension(height * ratioWidth / ratioHeight, height); 69 | } 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/OverlayView.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | package org.tensorflow.lite.examples.classification.customview; 17 | 18 | import android.content.Context; 19 | import android.graphics.Canvas; 20 | import android.util.AttributeSet; 21 | import android.view.View; 22 | import java.util.LinkedList; 23 | import java.util.List; 24 | 25 | /** A simple View providing a render callback to other classes. */ 26 | public class OverlayView extends View { 27 | private final List callbacks = new LinkedList(); 28 | 29 | public OverlayView(final Context context, final AttributeSet attrs) { 30 | super(context, attrs); 31 | } 32 | 33 | public void addCallback(final DrawCallback callback) { 34 | callbacks.add(callback); 35 | } 36 | 37 | @Override 38 | public synchronized void draw(final Canvas canvas) { 39 | for (final DrawCallback callback : callbacks) { 40 | callback.drawCallback(canvas); 41 | } 42 | } 43 | 44 | /** Interface defining the callback for client classes. */ 45 | public interface DrawCallback { 46 | public void drawCallback(final Canvas canvas); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/RecognitionScoreView.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | package org.tensorflow.lite.examples.classification.customview; 17 | 18 | import android.content.Context; 19 | import android.graphics.Canvas; 20 | import android.graphics.Paint; 21 | import android.util.AttributeSet; 22 | import android.util.TypedValue; 23 | import android.view.View; 24 | import java.util.List; 25 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Recognition; 26 | 27 | public class RecognitionScoreView extends View implements ResultsView { 28 | private static final float TEXT_SIZE_DIP = 16; 29 | private final float textSizePx; 30 | private final Paint fgPaint; 31 | private final Paint bgPaint; 32 | private List results; 33 | 34 | public RecognitionScoreView(final Context context, final AttributeSet set) { 35 | super(context, set); 36 | 37 | textSizePx = 38 | TypedValue.applyDimension( 39 | TypedValue.COMPLEX_UNIT_DIP, TEXT_SIZE_DIP, getResources().getDisplayMetrics()); 40 | fgPaint = new Paint(); 41 | fgPaint.setTextSize(textSizePx); 42 | 43 | bgPaint = new Paint(); 44 | bgPaint.setColor(0xcc4285f4); 45 | } 46 | 47 | @Override 48 | public void setResults(final List results) { 49 | this.results = results; 50 | postInvalidate(); 51 | } 52 | 53 | @Override 54 | public void onDraw(final Canvas canvas) { 55 | final int x = 10; 56 | int y = (int) (fgPaint.getTextSize() * 1.5f); 57 | 58 | canvas.drawPaint(bgPaint); 59 | 60 | if (results != null) { 61 | for (final Recognition recog : results) { 62 | canvas.drawText(recog.getTitle() + ": " + recog.getConfidence(), x, y, fgPaint); 63 | y += (int) (fgPaint.getTextSize() * 1.5f); 64 | } 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/ResultsView.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | package org.tensorflow.lite.examples.classification.customview; 17 | 18 | import java.util.List; 19 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Recognition; 20 | 21 | public interface ResultsView { 22 | public void setResults(final List results); 23 | } 24 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml: -------------------------------------------------------------------------------- 1 | 7 | 12 | 13 | 19 | 22 | 25 | 26 | 27 | 28 | 34 | 35 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/drawable/bottom_sheet_bg.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/drawable/ic_baseline_add.xml: -------------------------------------------------------------------------------- 1 | 6 | 9 | 10 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/drawable/ic_baseline_remove.xml: -------------------------------------------------------------------------------- 1 | 6 | 9 | 10 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/drawable/rectangle.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 7 | 12 | 13 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/layout/tfe_ic_activity_camera.xml: -------------------------------------------------------------------------------- 1 | 16 | 21 | 22 | 27 | 28 | 29 | 36 | 37 | 38 | 44 | 45 | 49 | 50 | 51 | 52 | 53 | 56 | 57 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/layout/tfe_ic_camera_connection_fragment.xml: -------------------------------------------------------------------------------- 1 | 16 | 19 | 20 | 25 | 26 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/values/colors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ffa800 4 | #ff6f00 5 | #425066 6 | 7 | #66000000 8 | 9 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/values/dimens.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 15dp 4 | 8dp 5 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | Midas 3 | This device doesn\'t support Camera2 API. 4 | GPU does not yet supported quantized models. 5 | Model: 6 | 7 | Float_EfficientNet 8 | 13 | 14 | 15 | Device: 16 | 17 | GPU 18 | CPU 19 | NNAPI 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/app/src/main/res/values/styles.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/build.gradle: -------------------------------------------------------------------------------- 1 | // Top-level build file where you can add configuration options common to all sub-projects/modules. 2 | 3 | buildscript { 4 | 5 | repositories { 6 | google() 7 | jcenter() 8 | } 9 | dependencies { 10 | classpath 'com.android.tools.build:gradle:4.0.0' 11 | classpath 'de.undercouch:gradle-download-task:4.0.2' 12 | // NOTE: Do not place your application dependencies here; they belong 13 | // in the individual module build.gradle files 14 | } 15 | } 16 | 17 | allprojects { 18 | repositories { 19 | google() 20 | jcenter() 21 | } 22 | } 23 | 24 | task clean(type: Delete) { 25 | delete rootProject.buildDir 26 | } 27 | 28 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/gradle.properties: -------------------------------------------------------------------------------- 1 | # Project-wide Gradle settings. 2 | # IDE (e.g. Android Studio) users: 3 | # Gradle settings configured through the IDE *will override* 4 | # any settings specified in this file. 5 | # For more details on how to configure your build environment visit 6 | # http://www.gradle.org/docs/current/userguide/build_environment.html 7 | # Specifies the JVM arguments used for the daemon process. 8 | # The setting is particularly useful for tweaking memory settings. 9 | org.gradle.jvmargs=-Xmx1536m 10 | # When configured, Gradle will run in incubating parallel mode. 11 | # This option should only be used with decoupled projects. More details, visit 12 | # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects 13 | # org.gradle.parallel=true 14 | android.useAndroidX=true 15 | android.enableJetifier=true 16 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.1.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 33 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 34 | 35 | @rem Find java.exe 36 | if defined JAVA_HOME goto findJavaFromJavaHome 37 | 38 | set JAVA_EXE=java.exe 39 | %JAVA_EXE% -version >NUL 2>&1 40 | if "%ERRORLEVEL%" == "0" goto init 41 | 42 | echo. 43 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 44 | echo. 45 | echo Please set the JAVA_HOME variable in your environment to match the 46 | echo location of your Java installation. 47 | 48 | goto fail 49 | 50 | :findJavaFromJavaHome 51 | set JAVA_HOME=%JAVA_HOME:"=% 52 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 53 | 54 | if exist "%JAVA_EXE%" goto init 55 | 56 | echo. 57 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 58 | echo. 59 | echo Please set the JAVA_HOME variable in your environment to match the 60 | echo location of your Java installation. 61 | 62 | goto fail 63 | 64 | :init 65 | @rem Get command-line arguments, handling Windows variants 66 | 67 | if not "%OS%" == "Windows_NT" goto win9xME_args 68 | 69 | :win9xME_args 70 | @rem Slurp the command line arguments. 71 | set CMD_LINE_ARGS= 72 | set _SKIP=2 73 | 74 | :win9xME_args_slurp 75 | if "x%~1" == "x" goto execute 76 | 77 | set CMD_LINE_ARGS=%* 78 | 79 | :execute 80 | @rem Setup the command line 81 | 82 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 83 | 84 | @rem Execute Gradle 85 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 86 | 87 | :end 88 | @rem End local scope for the variables with windows NT shell 89 | if "%ERRORLEVEL%"=="0" goto mainEnd 90 | 91 | :fail 92 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 93 | rem the _cmd.exe /c_ return code! 94 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 95 | exit /b 1 96 | 97 | :mainEnd 98 | if "%OS%"=="Windows_NT" endlocal 99 | 100 | :omega 101 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'com.android.library' 2 | 3 | android { 4 | compileSdkVersion 28 5 | buildToolsVersion "28.0.0" 6 | 7 | defaultConfig { 8 | minSdkVersion 21 9 | targetSdkVersion 28 10 | versionCode 1 11 | versionName "1.0" 12 | 13 | testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" 14 | 15 | } 16 | 17 | buildTypes { 18 | release { 19 | minifyEnabled false 20 | proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' 21 | } 22 | } 23 | 24 | aaptOptions { 25 | noCompress "tflite" 26 | } 27 | 28 | lintOptions { 29 | checkReleaseBuilds false 30 | // Or, if you prefer, you can continue to check for errors in release builds, 31 | // but continue the build even when errors are found: 32 | abortOnError false 33 | } 34 | } 35 | 36 | dependencies { 37 | implementation fileTree(dir: 'libs', include: ['*.jar']) 38 | implementation project(":models") 39 | implementation 'androidx.appcompat:appcompat:1.1.0' 40 | 41 | // Build off of nightly TensorFlow Lite 42 | implementation('org.tensorflow:tensorflow-lite:0.0.0-nightly') { changing = true } 43 | implementation('org.tensorflow:tensorflow-lite-gpu:0.0.0-nightly') { changing = true } 44 | implementation('org.tensorflow:tensorflow-lite-support:0.0.0-nightly') { changing = true } 45 | // Use local TensorFlow library 46 | // implementation 'org.tensorflow:tensorflow-lite-local:0.0.0' 47 | } 48 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # You can control the set of applied configuration files using the 3 | # proguardFiles setting in build.gradle. 4 | # 5 | # For more details, see 6 | # http://developer.android.com/guide/developing/tools/proguard.html 7 | 8 | # If your project uses WebView with JS, uncomment the following 9 | # and specify the fully qualified class name to the JavaScript interface 10 | # class: 11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 12 | # public *; 13 | #} 14 | 15 | # Uncomment this to preserve the line number information for 16 | # debugging stack traces. 17 | #-keepattributes SourceFile,LineNumberTable 18 | 19 | # If you keep the line number information, uncomment this to 20 | # hide the original source file name. 21 | #-renamesourcefileattribute SourceFile 22 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatEfficientNet.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | package org.tensorflow.lite.examples.classification.tflite; 17 | 18 | import android.app.Activity; 19 | import java.io.IOException; 20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; 21 | import org.tensorflow.lite.support.common.TensorOperator; 22 | import org.tensorflow.lite.support.common.ops.NormalizeOp; 23 | 24 | /** This TensorFlowLite classifier works with the float EfficientNet model. */ 25 | public class ClassifierFloatEfficientNet extends Classifier { 26 | 27 | private static final float IMAGE_MEAN = 115.0f; //127.0f; 28 | private static final float IMAGE_STD = 58.0f; //128.0f; 29 | 30 | /** 31 | * Float model does not need dequantization in the post-processing. Setting mean and std as 0.0f 32 | * and 1.0f, repectively, to bypass the normalization. 33 | */ 34 | private static final float PROBABILITY_MEAN = 0.0f; 35 | 36 | private static final float PROBABILITY_STD = 1.0f; 37 | 38 | /** 39 | * Initializes a {@code ClassifierFloatMobileNet}. 40 | * 41 | * @param activity 42 | */ 43 | public ClassifierFloatEfficientNet(Activity activity, Device device, int numThreads) 44 | throws IOException { 45 | super(activity, device, numThreads); 46 | } 47 | 48 | @Override 49 | protected String getModelPath() { 50 | // you can download this file from 51 | // see build.gradle for where to obtain this file. It should be auto 52 | // downloaded into assets. 53 | //return "efficientnet-lite0-fp32.tflite"; 54 | return "model_opt.tflite"; 55 | } 56 | 57 | @Override 58 | protected String getLabelPath() { 59 | return "labels_without_background.txt"; 60 | } 61 | 62 | @Override 63 | protected TensorOperator getPreprocessNormalizeOp() { 64 | return new NormalizeOp(IMAGE_MEAN, IMAGE_STD); 65 | } 66 | 67 | @Override 68 | protected TensorOperator getPostprocessNormalizeOp() { 69 | return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | package org.tensorflow.lite.examples.classification.tflite; 17 | 18 | import android.app.Activity; 19 | import java.io.IOException; 20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; 21 | import org.tensorflow.lite.support.common.TensorOperator; 22 | import org.tensorflow.lite.support.common.ops.NormalizeOp; 23 | 24 | /** This TensorFlowLite classifier works with the float MobileNet model. */ 25 | public class ClassifierFloatMobileNet extends Classifier { 26 | 27 | /** Float MobileNet requires additional normalization of the used input. */ 28 | private static final float IMAGE_MEAN = 127.5f; 29 | 30 | private static final float IMAGE_STD = 127.5f; 31 | 32 | /** 33 | * Float model does not need dequantization in the post-processing. Setting mean and std as 0.0f 34 | * and 1.0f, repectively, to bypass the normalization. 35 | */ 36 | private static final float PROBABILITY_MEAN = 0.0f; 37 | 38 | private static final float PROBABILITY_STD = 1.0f; 39 | 40 | /** 41 | * Initializes a {@code ClassifierFloatMobileNet}. 42 | * 43 | * @param activity 44 | */ 45 | public ClassifierFloatMobileNet(Activity activity, Device device, int numThreads) 46 | throws IOException { 47 | super(activity, device, numThreads); 48 | } 49 | 50 | @Override 51 | protected String getModelPath() { 52 | // you can download this file from 53 | // see build.gradle for where to obtain this file. It should be auto 54 | // downloaded into assets. 55 | return "model_0.tflite"; 56 | } 57 | 58 | @Override 59 | protected String getLabelPath() { 60 | return "labels.txt"; 61 | } 62 | 63 | @Override 64 | protected TensorOperator getPreprocessNormalizeOp() { 65 | return new NormalizeOp(IMAGE_MEAN, IMAGE_STD); 66 | } 67 | 68 | @Override 69 | protected TensorOperator getPostprocessNormalizeOp() { 70 | return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedEfficientNet.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | package org.tensorflow.lite.examples.classification.tflite; 17 | 18 | import android.app.Activity; 19 | import java.io.IOException; 20 | import org.tensorflow.lite.support.common.TensorOperator; 21 | import org.tensorflow.lite.support.common.ops.NormalizeOp; 22 | 23 | /** This TensorFlow Lite classifier works with the quantized EfficientNet model. */ 24 | public class ClassifierQuantizedEfficientNet extends Classifier { 25 | 26 | /** 27 | * The quantized model does not require normalization, thus set mean as 0.0f, and std as 1.0f to 28 | * bypass the normalization. 29 | */ 30 | private static final float IMAGE_MEAN = 0.0f; 31 | 32 | private static final float IMAGE_STD = 1.0f; 33 | 34 | /** Quantized MobileNet requires additional dequantization to the output probability. */ 35 | private static final float PROBABILITY_MEAN = 0.0f; 36 | 37 | private static final float PROBABILITY_STD = 255.0f; 38 | 39 | /** 40 | * Initializes a {@code ClassifierQuantizedMobileNet}. 41 | * 42 | * @param activity 43 | */ 44 | public ClassifierQuantizedEfficientNet(Activity activity, Device device, int numThreads) 45 | throws IOException { 46 | super(activity, device, numThreads); 47 | } 48 | 49 | @Override 50 | protected String getModelPath() { 51 | // you can download this file from 52 | // see build.gradle for where to obtain this file. It should be auto 53 | // downloaded into assets. 54 | return "model_quant.tflite"; 55 | } 56 | 57 | @Override 58 | protected String getLabelPath() { 59 | return "labels_without_background.txt"; 60 | } 61 | 62 | @Override 63 | protected TensorOperator getPreprocessNormalizeOp() { 64 | return new NormalizeOp(IMAGE_MEAN, IMAGE_STD); 65 | } 66 | 67 | @Override 68 | protected TensorOperator getPostprocessNormalizeOp() { 69 | return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedMobileNet.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | package org.tensorflow.lite.examples.classification.tflite; 17 | 18 | import android.app.Activity; 19 | import java.io.IOException; 20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; 21 | import org.tensorflow.lite.support.common.TensorOperator; 22 | import org.tensorflow.lite.support.common.ops.NormalizeOp; 23 | 24 | /** This TensorFlow Lite classifier works with the quantized MobileNet model. */ 25 | public class ClassifierQuantizedMobileNet extends Classifier { 26 | 27 | /** 28 | * The quantized model does not require normalization, thus set mean as 0.0f, and std as 1.0f to 29 | * bypass the normalization. 30 | */ 31 | private static final float IMAGE_MEAN = 0.0f; 32 | 33 | private static final float IMAGE_STD = 1.0f; 34 | 35 | /** Quantized MobileNet requires additional dequantization to the output probability. */ 36 | private static final float PROBABILITY_MEAN = 0.0f; 37 | 38 | private static final float PROBABILITY_STD = 255.0f; 39 | 40 | /** 41 | * Initializes a {@code ClassifierQuantizedMobileNet}. 42 | * 43 | * @param activity 44 | */ 45 | public ClassifierQuantizedMobileNet(Activity activity, Device device, int numThreads) 46 | throws IOException { 47 | super(activity, device, numThreads); 48 | } 49 | 50 | @Override 51 | protected String getModelPath() { 52 | // you can download this file from 53 | // see build.gradle for where to obtain this file. It should be auto 54 | // downloaded into assets. 55 | return "model_quant_0.tflite"; 56 | } 57 | 58 | @Override 59 | protected String getLabelPath() { 60 | return "labels.txt"; 61 | } 62 | 63 | @Override 64 | protected TensorOperator getPreprocessNormalizeOp() { 65 | return new NormalizeOp(IMAGE_MEAN, IMAGE_STD); 66 | } 67 | 68 | @Override 69 | protected TensorOperator getPostprocessNormalizeOp() { 70 | return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'com.android.library' 2 | 3 | android { 4 | compileSdkVersion 28 5 | buildToolsVersion "28.0.0" 6 | 7 | defaultConfig { 8 | minSdkVersion 21 9 | targetSdkVersion 28 10 | versionCode 1 11 | versionName "1.0" 12 | 13 | testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" 14 | 15 | } 16 | 17 | buildTypes { 18 | release { 19 | minifyEnabled false 20 | proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' 21 | } 22 | } 23 | compileOptions { 24 | sourceCompatibility = '1.8' 25 | targetCompatibility = '1.8' 26 | } 27 | aaptOptions { 28 | noCompress "tflite" 29 | } 30 | 31 | lintOptions { 32 | checkReleaseBuilds false 33 | // Or, if you prefer, you can continue to check for errors in release builds, 34 | // but continue the build even when errors are found: 35 | abortOnError false 36 | } 37 | } 38 | 39 | dependencies { 40 | implementation fileTree(dir: 'libs', include: ['*.jar']) 41 | implementation project(":models") 42 | implementation 'androidx.appcompat:appcompat:1.1.0' 43 | 44 | // Build off of nightly TensorFlow Lite Task Library 45 | implementation('org.tensorflow:tensorflow-lite-task-vision:0.0.0-nightly') { changing = true } 46 | implementation('org.tensorflow:tensorflow-lite-metadata:0.0.0-nightly') { changing = true } 47 | } 48 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # You can control the set of applied configuration files using the 3 | # proguardFiles setting in build.gradle. 4 | # 5 | # For more details, see 6 | # http://developer.android.com/guide/developing/tools/proguard.html 7 | 8 | # If your project uses WebView with JS, uncomment the following 9 | # and specify the fully qualified class name to the JavaScript interface 10 | # class: 11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 12 | # public *; 13 | #} 14 | 15 | # Uncomment this to preserve the line number information for 16 | # debugging stack traces. 17 | #-keepattributes SourceFile,LineNumberTable 18 | 19 | # If you keep the line number information, uncomment this to 20 | # hide the original source file name. 21 | #-renamesourcefileattribute SourceFile 22 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatEfficientNet.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | package org.tensorflow.lite.examples.classification.tflite; 17 | 18 | import android.app.Activity; 19 | import java.io.IOException; 20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; 21 | 22 | /** This TensorFlowLite classifier works with the float EfficientNet model. */ 23 | public class ClassifierFloatEfficientNet extends Classifier { 24 | 25 | /** 26 | * Initializes a {@code ClassifierFloatMobileNet}. 27 | * 28 | * @param device a {@link Device} object to configure the hardware accelerator 29 | * @param numThreads the number of threads during the inference 30 | * @throws IOException if the model is not loaded correctly 31 | */ 32 | public ClassifierFloatEfficientNet(Activity activity, Device device, int numThreads) 33 | throws IOException { 34 | super(activity, device, numThreads); 35 | } 36 | 37 | @Override 38 | protected String getModelPath() { 39 | // you can download this file from 40 | // see build.gradle for where to obtain this file. It should be auto 41 | // downloaded into assets. 42 | //return "efficientnet-lite0-fp32.tflite"; 43 | return "model.tflite"; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | package org.tensorflow.lite.examples.classification.tflite; 17 | 18 | import android.app.Activity; 19 | import java.io.IOException; 20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; 21 | 22 | /** This TensorFlowLite classifier works with the float MobileNet model. */ 23 | public class ClassifierFloatMobileNet extends Classifier { 24 | /** 25 | * Initializes a {@code ClassifierFloatMobileNet}. 26 | * 27 | * @param device a {@link Device} object to configure the hardware accelerator 28 | * @param numThreads the number of threads during the inference 29 | * @throws IOException if the model is not loaded correctly 30 | */ 31 | public ClassifierFloatMobileNet(Activity activity, Device device, int numThreads) 32 | throws IOException { 33 | super(activity, device, numThreads); 34 | } 35 | 36 | @Override 37 | protected String getModelPath() { 38 | // you can download this file from 39 | // see build.gradle for where to obtain this file. It should be auto 40 | // downloaded into assets. 41 | return "mobilenet_v1_1.0_224.tflite"; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedEfficientNet.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | package org.tensorflow.lite.examples.classification.tflite; 17 | 18 | import android.app.Activity; 19 | import java.io.IOException; 20 | 21 | /** This TensorFlow Lite classifier works with the quantized EfficientNet model. */ 22 | public class ClassifierQuantizedEfficientNet extends Classifier { 23 | 24 | /** 25 | * Initializes a {@code ClassifierQuantizedMobileNet}. 26 | * 27 | * @param device a {@link Device} object to configure the hardware accelerator 28 | * @param numThreads the number of threads during the inference 29 | * @throws IOException if the model is not loaded correctly 30 | */ 31 | public ClassifierQuantizedEfficientNet(Activity activity, Device device, int numThreads) 32 | throws IOException { 33 | super(activity, device, numThreads); 34 | } 35 | 36 | @Override 37 | protected String getModelPath() { 38 | // you can download this file from 39 | // see build.gradle for where to obtain this file. It should be auto 40 | // downloaded into assets. 41 | return "efficientnet-lite0-int8.tflite"; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedMobileNet.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | package org.tensorflow.lite.examples.classification.tflite; 17 | 18 | import android.app.Activity; 19 | import java.io.IOException; 20 | import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; 21 | 22 | /** This TensorFlow Lite classifier works with the quantized MobileNet model. */ 23 | public class ClassifierQuantizedMobileNet extends Classifier { 24 | 25 | /** 26 | * Initializes a {@code ClassifierQuantizedMobileNet}. 27 | * 28 | * @param device a {@link Device} object to configure the hardware accelerator 29 | * @param numThreads the number of threads during the inference 30 | * @throws IOException if the model is not loaded correctly 31 | */ 32 | public ClassifierQuantizedMobileNet(Activity activity, Device device, int numThreads) 33 | throws IOException { 34 | super(activity, device, numThreads); 35 | } 36 | 37 | @Override 38 | protected String getModelPath() { 39 | // you can download this file from 40 | // see build.gradle for where to obtain this file. It should be auto 41 | // downloaded into assets. 42 | return "mobilenet_v1_1.0_224_quant.tflite"; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/models/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'com.android.library' 2 | apply plugin: 'de.undercouch.download' 3 | 4 | android { 5 | compileSdkVersion 28 6 | buildToolsVersion "28.0.0" 7 | 8 | defaultConfig { 9 | minSdkVersion 21 10 | targetSdkVersion 28 11 | versionCode 1 12 | versionName "1.0" 13 | 14 | testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" 15 | 16 | } 17 | 18 | buildTypes { 19 | release { 20 | minifyEnabled false 21 | proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' 22 | } 23 | } 24 | 25 | aaptOptions { 26 | noCompress "tflite" 27 | } 28 | 29 | lintOptions { 30 | checkReleaseBuilds false 31 | // Or, if you prefer, you can continue to check for errors in release builds, 32 | // but continue the build even when errors are found: 33 | abortOnError false 34 | } 35 | } 36 | 37 | // Download default models; if you wish to use your own models then 38 | // place them in the "assets" directory and comment out this line. 39 | project.ext.ASSET_DIR = projectDir.toString() + '/src/main/assets' 40 | apply from:'download.gradle' 41 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/models/download.gradle: -------------------------------------------------------------------------------- 1 | def modelFloatDownloadUrl = "https://github.com/isl-org/MiDaS/releases/download/v2_1/model_opt.tflite" 2 | def modelFloatFile = "model_opt.tflite" 3 | 4 | task downloadModelFloat(type: Download) { 5 | src "${modelFloatDownloadUrl}" 6 | dest project.ext.ASSET_DIR + "/${modelFloatFile}" 7 | overwrite false 8 | } 9 | 10 | preBuild.dependsOn downloadModelFloat 11 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/models/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # You can control the set of applied configuration files using the 3 | # proguardFiles setting in build.gradle. 4 | # 5 | # For more details, see 6 | # http://developer.android.com/guide/developing/tools/proguard.html 7 | 8 | # If your project uses WebView with JS, uncomment the following 9 | # and specify the fully qualified class name to the JavaScript interface 10 | # class: 11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 12 | # public *; 13 | #} 14 | 15 | # Uncomment this to preserve the line number information for 16 | # debugging stack traces. 17 | #-keepattributes SourceFile,LineNumberTable 18 | 19 | # If you keep the line number information, uncomment this to 20 | # hide the original source file name. 21 | #-renamesourcefileattribute SourceFile 22 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/models/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/models/src/main/assets/run_tflite.py: -------------------------------------------------------------------------------- 1 | # Flex ops are included in the nightly build of the TensorFlow Python package. You can use TFLite models containing Flex ops by the same Python API as normal TFLite models. The nightly TensorFlow build can be installed with this command: 2 | # Flex ops will be added to the TensorFlow Python package's and the tflite_runtime package from version 2.3 for Linux and 2.4 for other environments. 3 | # https://www.tensorflow.org/lite/guide/ops_select#running_the_model 4 | 5 | # You must use: tf-nightly 6 | # pip install tf-nightly 7 | 8 | import os 9 | import glob 10 | import cv2 11 | import numpy as np 12 | 13 | import tensorflow as tf 14 | 15 | width=256 16 | height=256 17 | model_name="model.tflite" 18 | #model_name="model_quant.tflite" 19 | image_name="dog.jpg" 20 | 21 | # input 22 | img = cv2.imread(image_name) 23 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 24 | 25 | mean=[0.485, 0.456, 0.406] 26 | std=[0.229, 0.224, 0.225] 27 | img = (img - mean) / std 28 | 29 | img_resized = tf.image.resize(img, [width,height], method='bicubic', preserve_aspect_ratio=False) 30 | #img_resized = tf.transpose(img_resized, [2, 0, 1]) 31 | img_input = img_resized.numpy() 32 | reshape_img = img_input.reshape(1,width,height,3) 33 | tensor = tf.convert_to_tensor(reshape_img, dtype=tf.float32) 34 | 35 | # load model 36 | print("Load model...") 37 | interpreter = tf.lite.Interpreter(model_path=model_name) 38 | print("Allocate tensor...") 39 | interpreter.allocate_tensors() 40 | print("Get input/output details...") 41 | input_details = interpreter.get_input_details() 42 | output_details = interpreter.get_output_details() 43 | print("Get input shape...") 44 | input_shape = input_details[0]['shape'] 45 | print(input_shape) 46 | print(input_details) 47 | print(output_details) 48 | #input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32) 49 | print("Set input tensor...") 50 | interpreter.set_tensor(input_details[0]['index'], tensor) 51 | 52 | print("invoke()...") 53 | interpreter.invoke() 54 | 55 | # The function `get_tensor()` returns a copy of the tensor data. 56 | # Use `tensor()` in order to get a pointer to the tensor. 57 | print("get output tensor...") 58 | output = interpreter.get_tensor(output_details[0]['index']) 59 | #output = np.squeeze(output) 60 | output = output.reshape(width, height) 61 | #print(output) 62 | prediction = np.array(output) 63 | print("reshape prediction...") 64 | prediction = prediction.reshape(width, height) 65 | 66 | # output file 67 | #prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) 68 | print(" Write image to: output.png") 69 | depth_min = prediction.min() 70 | depth_max = prediction.max() 71 | img_out = (255 * (prediction - depth_min) / (depth_max - depth_min)).astype("uint8") 72 | print("save output image...") 73 | cv2.imwrite("output.png", img_out) 74 | 75 | print("finished") -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/android/settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'TFLite Image Classification Demo App' 2 | include ':app', ':lib_support', ':lib_task_api', ':models' -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/.gitignore: -------------------------------------------------------------------------------- 1 | # ignore model file 2 | #*.tflite 3 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcuserdata/admin.xcuserdatad/UserInterfaceState.xcuserstate: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcuserdata/admin.xcuserdatad/UserInterfaceState.xcuserstate -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas.xcodeproj/xcuserdata/admin.xcuserdatad/xcschemes/xcschememanagement.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SchemeUserState 6 | 7 | PoseNet.xcscheme_^#shared#^_ 8 | 9 | orderHint 10 | 3 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/AppDelegate.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | import UIKit 16 | 17 | @UIApplicationMain 18 | class AppDelegate: UIResponder, UIApplicationDelegate { 19 | 20 | var window: UIWindow? 21 | 22 | func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { 23 | return true 24 | } 25 | 26 | func applicationWillResignActive(_ application: UIApplication) { 27 | } 28 | 29 | func applicationDidEnterBackground(_ application: UIApplication) { 30 | } 31 | 32 | func applicationWillEnterForeground(_ application: UIApplication) { 33 | } 34 | 35 | func applicationDidBecomeActive(_ application: UIApplication) { 36 | } 37 | 38 | func applicationWillTerminate(_ application: UIApplication) { 39 | } 40 | } 41 | 42 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | {"images":[{"size":"60x60","expected-size":"180","filename":"180.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"40x40","expected-size":"80","filename":"80.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"40x40","expected-size":"120","filename":"120.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"60x60","expected-size":"120","filename":"120.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"57x57","expected-size":"57","filename":"57.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"1x"},{"size":"29x29","expected-size":"58","filename":"58.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"29x29","expected-size":"29","filename":"29.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"1x"},{"size":"29x29","expected-size":"87","filename":"87.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"57x57","expected-size":"114","filename":"114.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"20x20","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"20x20","expected-size":"60","filename":"60.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"1024x1024","filename":"1024.png","expected-size":"1024","idiom":"ios-marketing","folder":"Assets.xcassets/AppIcon.appiconset/","scale":"1x"},{"size":"40x40","expected-size":"80","filename":"80.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"72x72","expected-size":"72","filename":"72.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"76x76","expected-size":"152","filename":"152.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"50x50","expected-size":"100","filename":"100.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"29x29","expected-size":"58","filename":"58.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"76x76","expected-size":"76","filename":"76.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"29x29","expected-size":"29","filename":"29.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"50x50","expected-size":"50","filename":"50.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"72x72","expected-size":"144","filename":"144.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"40x40","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"83.5x83.5","expected-size":"167","filename":"167.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"20x20","expected-size":"20","filename":"20.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"20x20","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"}]} -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "version" : 1, 4 | "author" : "xcode" 5 | } 6 | } -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Camera Feed/PreviewView.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | import UIKit 16 | import AVFoundation 17 | 18 | /// The camera frame is displayed on this view. 19 | class PreviewView: UIView { 20 | var previewLayer: AVCaptureVideoPreviewLayer { 21 | guard let layer = layer as? AVCaptureVideoPreviewLayer else { 22 | fatalError("Layer expected is of type VideoPreviewLayer") 23 | } 24 | return layer 25 | } 26 | 27 | var session: AVCaptureSession? { 28 | get { 29 | return previewLayer.session 30 | } 31 | set { 32 | previewLayer.session = newValue 33 | } 34 | } 35 | 36 | override class var layerClass: AnyClass { 37 | return AVCaptureVideoPreviewLayer.self 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Cells/InfoCell.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | import UIKit 16 | 17 | /// Table cell for inference result in bottom view. 18 | class InfoCell: UITableViewCell { 19 | @IBOutlet weak var fieldNameLabel: UILabel! 20 | @IBOutlet weak var infoLabel: UILabel! 21 | } 22 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Constants.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================= 15 | 16 | enum Constants { 17 | // MARK: - Constants related to the image processing 18 | static let bgraPixel = (channels: 4, alphaComponent: 3, lastBgrComponent: 2) 19 | static let rgbPixelChannels = 3 20 | static let maxRGBValue: Float32 = 255.0 21 | 22 | // MARK: - Constants related to the model interperter 23 | static let defaultThreadCount = 2 24 | static let defaultDelegate: Delegates = .CPU 25 | } 26 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Extensions/CGSizeExtension.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================= 15 | 16 | import Accelerate 17 | import Foundation 18 | 19 | extension CGSize { 20 | /// Returns `CGAfineTransform` to resize `self` to fit in destination size, keeping aspect ratio 21 | /// of `self`. `self` image is resized to be inscribe to destination size and located in center of 22 | /// destination. 23 | /// 24 | /// - Parameter toFitIn: destination size to be filled. 25 | /// - Returns: `CGAffineTransform` to transform `self` image to `dest` image. 26 | func transformKeepAspect(toFitIn dest: CGSize) -> CGAffineTransform { 27 | let sourceRatio = self.height / self.width 28 | let destRatio = dest.height / dest.width 29 | 30 | // Calculates ratio `self` to `dest`. 31 | var ratio: CGFloat 32 | var x: CGFloat = 0 33 | var y: CGFloat = 0 34 | if sourceRatio > destRatio { 35 | // Source size is taller than destination. Resized to fit in destination height, and find 36 | // horizontal starting point to be centered. 37 | ratio = dest.height / self.height 38 | x = (dest.width - self.width * ratio) / 2 39 | } else { 40 | ratio = dest.width / self.width 41 | y = (dest.height - self.height * ratio) / 2 42 | } 43 | return CGAffineTransform(a: ratio, b: 0, c: 0, d: ratio, tx: x, ty: y) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Extensions/TFLiteExtension.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | // ============================================================================= 15 | 16 | import Accelerate 17 | import CoreImage 18 | import Foundation 19 | import TensorFlowLite 20 | 21 | // MARK: - Data 22 | extension Data { 23 | /// Creates a new buffer by copying the buffer pointer of the given array. 24 | /// 25 | /// - Warning: The given array's element type `T` must be trivial in that it can be copied bit 26 | /// for bit with no indirection or reference-counting operations; otherwise, reinterpreting 27 | /// data from the resulting buffer has undefined behavior. 28 | /// - Parameter array: An array with elements of type `T`. 29 | init(copyingBufferOf array: [T]) { 30 | self = array.withUnsafeBufferPointer(Data.init) 31 | } 32 | 33 | /// Convert a Data instance to Array representation. 34 | func toArray(type: T.Type) -> [T] where T: AdditiveArithmetic { 35 | var array = [T](repeating: T.zero, count: self.count / MemoryLayout.stride) 36 | _ = array.withUnsafeMutableBytes { self.copyBytes(to: $0) } 37 | return array 38 | } 39 | } 40 | 41 | // MARK: - Wrappers 42 | /// Struct for handling multidimension `Data` in flat `Array`. 43 | struct FlatArray { 44 | private var array: [Element] 45 | var dimensions: [Int] 46 | 47 | init(tensor: Tensor) { 48 | dimensions = tensor.shape.dimensions 49 | array = tensor.data.toArray(type: Element.self) 50 | } 51 | 52 | private func flatIndex(_ index: [Int]) -> Int { 53 | guard index.count == dimensions.count else { 54 | fatalError("Invalid index: got \(index.count) index(es) for \(dimensions.count) index(es).") 55 | } 56 | 57 | var result = 0 58 | for i in 0.. index[i] else { 60 | fatalError("Invalid index: \(index[i]) is bigger than \(dimensions[i])") 61 | } 62 | result = dimensions[i] * result + index[i] 63 | } 64 | return result 65 | } 66 | 67 | subscript(_ index: Int...) -> Element { 68 | get { 69 | return array[flatIndex(index)] 70 | } 71 | set(newValue) { 72 | array[flatIndex(index)] = newValue 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | APPL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | LSRequiresIPhoneOS 22 | 23 | NSCameraUsageDescription 24 | This app will use camera to continuously estimate the depth map. 25 | UILaunchStoryboardName 26 | LaunchScreen 27 | UIMainStoryboardFile 28 | Main 29 | UIRequiredDeviceCapabilities 30 | 31 | armv7 32 | 33 | UISupportedInterfaceOrientations 34 | 35 | UIInterfaceOrientationPortrait 36 | 37 | UISupportedInterfaceOrientations~ipad 38 | 39 | UIInterfaceOrientationPortrait 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Midas/Views/OverlayView.swift: -------------------------------------------------------------------------------- 1 | // Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | import UIKit 16 | 17 | /// UIView for rendering inference output. 18 | class OverlayView: UIView { 19 | 20 | var dots = [CGPoint]() 21 | var lines = [Line]() 22 | 23 | override func draw(_ rect: CGRect) { 24 | for dot in dots { 25 | drawDot(of: dot) 26 | } 27 | for line in lines { 28 | drawLine(of: line) 29 | } 30 | } 31 | 32 | func drawDot(of dot: CGPoint) { 33 | let dotRect = CGRect( 34 | x: dot.x - Traits.dot.radius / 2, y: dot.y - Traits.dot.radius / 2, 35 | width: Traits.dot.radius, height: Traits.dot.radius) 36 | let dotPath = UIBezierPath(ovalIn: dotRect) 37 | 38 | Traits.dot.color.setFill() 39 | dotPath.fill() 40 | } 41 | 42 | func drawLine(of line: Line) { 43 | let linePath = UIBezierPath() 44 | linePath.move(to: CGPoint(x: line.from.x, y: line.from.y)) 45 | linePath.addLine(to: CGPoint(x: line.to.x, y: line.to.y)) 46 | linePath.close() 47 | 48 | linePath.lineWidth = Traits.line.width 49 | Traits.line.color.setStroke() 50 | 51 | linePath.stroke() 52 | } 53 | 54 | func clear() { 55 | self.dots = [] 56 | self.lines = [] 57 | } 58 | } 59 | 60 | private enum Traits { 61 | static let dot = (radius: CGFloat(5), color: UIColor.orange) 62 | static let line = (width: CGFloat(1.0), color: UIColor.orange) 63 | } 64 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/Podfile: -------------------------------------------------------------------------------- 1 | # Uncomment the next line to define a global platform for your project 2 | platform :ios, '12.0' 3 | 4 | target 'Midas' do 5 | # Comment the next line if you're not using Swift and don't want to use dynamic frameworks 6 | use_frameworks! 7 | 8 | # Pods for Midas 9 | pod 'TensorFlowLiteSwift', '~> 0.0.1-nightly' 10 | pod 'TensorFlowLiteSwift/CoreML', '~> 0.0.1-nightly' 11 | pod 'TensorFlowLiteSwift/Metal', '~> 0.0.1-nightly' 12 | end 13 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/mobile/ios/RunScripts/download_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Download TF Lite model from the internet if it does not exist. 3 | 4 | TFLITE_MODEL="model_opt.tflite" 5 | TFLITE_FILE="Midas/Model/${TFLITE_MODEL}" 6 | MODEL_SRC="https://github.com/isl-org/MiDaS/releases/download/v2/${TFLITE_MODEL}" 7 | 8 | if test -f "${TFLITE_FILE}"; then 9 | echo "INFO: TF Lite model already exists. Skip downloading and use the local model." 10 | else 11 | curl --create-dirs -o "${TFLITE_FILE}" -LJO "${MODEL_SRC}" 12 | echo "INFO: Downloaded TensorFlow Lite model to ${TFLITE_FILE}." 13 | fi 14 | 15 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/output/.placeholder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/output/.placeholder -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Alexey 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/do_catkin_make.sh: -------------------------------------------------------------------------------- 1 | mkdir src 2 | catkin_make 3 | source devel/setup.bash 4 | echo $ROS_PACKAGE_PATH 5 | chmod +x ./devel/setup.bash 6 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/downloads.sh: -------------------------------------------------------------------------------- 1 | mkdir ~/.ros 2 | wget https://github.com/isl-org/MiDaS/releases/download/v2_1/model-small-traced.pt 3 | cp ./model-small-traced.pt ~/.ros/model-small-traced.pt 4 | 5 | 6 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_melodic_ubuntu_17_18.sh: -------------------------------------------------------------------------------- 1 | #@title { display-mode: "code" } 2 | 3 | #from http://wiki.ros.org/indigo/Installation/Ubuntu 4 | 5 | #1.2 Setup sources.list 6 | sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list' 7 | 8 | # 1.3 Setup keys 9 | sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654 10 | sudo apt-key adv --keyserver 'hkp://ha.pool.sks-keyservers.net:80' --recv-key 421C365BD9FF1F717815A3895523BAEEB01FA116 11 | 12 | curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add - 13 | 14 | # 1.4 Installation 15 | sudo apt-get update 16 | sudo apt-get upgrade 17 | 18 | # Desktop-Full Install: 19 | sudo apt-get install ros-melodic-desktop-full 20 | 21 | printf "\nsource /opt/ros/melodic/setup.bash\n" >> ~/.bashrc 22 | 23 | # 1.5 Initialize rosdep 24 | sudo rosdep init 25 | rosdep update 26 | 27 | 28 | # 1.7 Getting rosinstall (python) 29 | sudo apt-get install python-rosinstall 30 | sudo apt-get install python-catkin-tools 31 | sudo apt-get install python-rospy 32 | sudo apt-get install python-rosdep 33 | sudo apt-get install python-roscd 34 | sudo apt-get install python-pip -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/install_ros_noetic_ubuntu_20.sh: -------------------------------------------------------------------------------- 1 | #@title { display-mode: "code" } 2 | 3 | #from http://wiki.ros.org/indigo/Installation/Ubuntu 4 | 5 | #1.2 Setup sources.list 6 | sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list' 7 | 8 | # 1.3 Setup keys 9 | sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654 10 | 11 | curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add - 12 | 13 | # 1.4 Installation 14 | sudo apt-get update 15 | sudo apt-get upgrade 16 | 17 | # Desktop-Full Install: 18 | sudo apt-get install ros-noetic-desktop-full 19 | 20 | printf "\nsource /opt/ros/noetic/setup.bash\n" >> ~/.bashrc 21 | 22 | # 1.5 Initialize rosdep 23 | sudo rosdep init 24 | rosdep update 25 | 26 | 27 | # 1.7 Getting rosinstall (python) 28 | sudo apt-get install python3-rosinstall 29 | sudo apt-get install python3-catkin-tools 30 | sudo apt-get install python3-rospy 31 | sudo apt-get install python3-rosdep 32 | sudo apt-get install python3-roscd 33 | sudo apt-get install python3-pip -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/additions/make_package_cpp.sh: -------------------------------------------------------------------------------- 1 | cd ~/catkin_ws/src 2 | catkin_create_pkg midas_cpp std_msgs roscpp cv_bridge sensor_msgs image_transport 3 | cd ~/catkin_ws 4 | catkin_make 5 | 6 | chmod +x ~/catkin_ws/devel/setup.bash 7 | printf "\nsource ~/catkin_ws/devel/setup.bash" >> ~/.bashrc 8 | source ~/catkin_ws/devel/setup.bash 9 | 10 | 11 | sudo rosdep init 12 | rosdep update 13 | #rospack depends1 midas_cpp 14 | roscd midas_cpp 15 | #cat package.xml 16 | #rospack depends midas_cpp -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/launch_midas_cpp.sh: -------------------------------------------------------------------------------- 1 | source ~/catkin_ws/devel/setup.bash 2 | roslaunch midas_cpp midas_cpp.launch model_name:="model-small-traced.pt" input_topic:="image_topic" output_topic:="midas_topic" out_orig_size:="true" -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_cpp.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/launch/midas_talker_listener.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | midas_cpp 4 | 0.1.0 5 | The midas_cpp package 6 | 7 | Alexey Bochkovskiy 8 | MIT 9 | https://github.com/isl-org/MiDaS/tree/master/ros 10 | 11 | 12 | 13 | 14 | 15 | 16 | TODO 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | catkin 52 | cv_bridge 53 | image_transport 54 | roscpp 55 | rospy 56 | sensor_msgs 57 | std_msgs 58 | cv_bridge 59 | image_transport 60 | roscpp 61 | rospy 62 | sensor_msgs 63 | std_msgs 64 | cv_bridge 65 | image_transport 66 | roscpp 67 | rospy 68 | sensor_msgs 69 | std_msgs 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import print_function 3 | 4 | import roslib 5 | #roslib.load_manifest('my_package') 6 | import sys 7 | import rospy 8 | import cv2 9 | import numpy as np 10 | from std_msgs.msg import String 11 | from sensor_msgs.msg import Image 12 | from cv_bridge import CvBridge, CvBridgeError 13 | 14 | class video_show: 15 | 16 | def __init__(self): 17 | self.show_output = rospy.get_param('~show_output', True) 18 | self.save_output = rospy.get_param('~save_output', False) 19 | self.output_video_file = rospy.get_param('~output_video_file','result.mp4') 20 | # rospy.loginfo(f"Listener - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}") 21 | 22 | self.bridge = CvBridge() 23 | self.image_sub = rospy.Subscriber("midas_topic", Image, self.callback) 24 | 25 | def callback(self, data): 26 | try: 27 | cv_image = self.bridge.imgmsg_to_cv2(data) 28 | except CvBridgeError as e: 29 | print(e) 30 | return 31 | 32 | if cv_image.size == 0: 33 | return 34 | 35 | rospy.loginfo("Listener: Received new frame") 36 | cv_image = cv_image.astype("uint8") 37 | 38 | if self.show_output==True: 39 | cv2.imshow("video_show", cv_image) 40 | cv2.waitKey(10) 41 | 42 | if self.save_output==True: 43 | if self.video_writer_init==False: 44 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 45 | self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0])) 46 | 47 | self.out.write(cv_image) 48 | 49 | 50 | 51 | def main(args): 52 | rospy.init_node('listener', anonymous=True) 53 | ic = video_show() 54 | try: 55 | rospy.spin() 56 | except KeyboardInterrupt: 57 | print("Shutting down") 58 | cv2.destroyAllWindows() 59 | 60 | if __name__ == '__main__': 61 | main(sys.argv) -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/listener_original.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from __future__ import print_function 3 | 4 | import roslib 5 | #roslib.load_manifest('my_package') 6 | import sys 7 | import rospy 8 | import cv2 9 | import numpy as np 10 | from std_msgs.msg import String 11 | from sensor_msgs.msg import Image 12 | from cv_bridge import CvBridge, CvBridgeError 13 | 14 | class video_show: 15 | 16 | def __init__(self): 17 | self.show_output = rospy.get_param('~show_output', True) 18 | self.save_output = rospy.get_param('~save_output', False) 19 | self.output_video_file = rospy.get_param('~output_video_file','result.mp4') 20 | # rospy.loginfo(f"Listener original - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}") 21 | 22 | self.bridge = CvBridge() 23 | self.image_sub = rospy.Subscriber("image_topic", Image, self.callback) 24 | 25 | def callback(self, data): 26 | try: 27 | cv_image = self.bridge.imgmsg_to_cv2(data) 28 | except CvBridgeError as e: 29 | print(e) 30 | return 31 | 32 | if cv_image.size == 0: 33 | return 34 | 35 | rospy.loginfo("Listener_original: Received new frame") 36 | cv_image = cv_image.astype("uint8") 37 | 38 | if self.show_output==True: 39 | cv2.imshow("video_show_orig", cv_image) 40 | cv2.waitKey(10) 41 | 42 | if self.save_output==True: 43 | if self.video_writer_init==False: 44 | fourcc = cv2.VideoWriter_fourcc(*'XVID') 45 | self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0])) 46 | 47 | self.out.write(cv_image) 48 | 49 | 50 | 51 | def main(args): 52 | rospy.init_node('listener_original', anonymous=True) 53 | ic = video_show() 54 | try: 55 | rospy.spin() 56 | except KeyboardInterrupt: 57 | print("Shutting down") 58 | cv2.destroyAllWindows() 59 | 60 | if __name__ == '__main__': 61 | main(sys.argv) -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/midas_cpp/scripts/talker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | 4 | import roslib 5 | #roslib.load_manifest('my_package') 6 | import sys 7 | import rospy 8 | import cv2 9 | from std_msgs.msg import String 10 | from sensor_msgs.msg import Image 11 | from cv_bridge import CvBridge, CvBridgeError 12 | 13 | 14 | def talker(): 15 | rospy.init_node('talker', anonymous=True) 16 | 17 | use_camera = rospy.get_param('~use_camera', False) 18 | input_video_file = rospy.get_param('~input_video_file','test.mp4') 19 | # rospy.loginfo(f"Talker - params: use_camera={use_camera}, input_video_file={input_video_file}") 20 | 21 | # rospy.loginfo("Talker: Trying to open a video stream") 22 | if use_camera == True: 23 | cap = cv2.VideoCapture(0) 24 | else: 25 | cap = cv2.VideoCapture(input_video_file) 26 | 27 | pub = rospy.Publisher('image_topic', Image, queue_size=1) 28 | rate = rospy.Rate(30) # 30hz 29 | bridge = CvBridge() 30 | 31 | while not rospy.is_shutdown(): 32 | ret, cv_image = cap.read() 33 | if ret==False: 34 | print("Talker: Video is over") 35 | rospy.loginfo("Video is over") 36 | return 37 | 38 | try: 39 | image = bridge.cv2_to_imgmsg(cv_image, "bgr8") 40 | except CvBridgeError as e: 41 | rospy.logerr("Talker: cv2image conversion failed: ", e) 42 | print(e) 43 | continue 44 | 45 | rospy.loginfo("Talker: Publishing frame") 46 | pub.publish(image) 47 | rate.sleep() 48 | 49 | if __name__ == '__main__': 50 | try: 51 | talker() 52 | except rospy.ROSInterruptException: 53 | pass 54 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/ros/run_talker_listener_test.sh: -------------------------------------------------------------------------------- 1 | # place any test.mp4 file near with this file 2 | 3 | # roscore 4 | # rosnode kill -a 5 | 6 | source ~/catkin_ws/devel/setup.bash 7 | 8 | roscore & 9 | P1=$! 10 | rosrun midas_cpp talker.py & 11 | P2=$! 12 | rosrun midas_cpp listener_original.py & 13 | P3=$! 14 | rosrun midas_cpp listener.py & 15 | P4=$! 16 | wait $P1 $P2 $P3 $P4 -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/input/.placeholder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/input/.placeholder -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/make_onnx_model.py: -------------------------------------------------------------------------------- 1 | """Compute depth maps for images in the input folder. 2 | """ 3 | import os 4 | import ntpath 5 | import glob 6 | import torch 7 | import utils 8 | import cv2 9 | import numpy as np 10 | from torchvision.transforms import Compose, Normalize 11 | from torchvision import transforms 12 | 13 | from shutil import copyfile 14 | import fileinput 15 | import sys 16 | sys.path.append(os.getcwd() + '/..') 17 | 18 | def modify_file(): 19 | modify_filename = '../midas/blocks.py' 20 | copyfile(modify_filename, modify_filename+'.bak') 21 | 22 | with open(modify_filename, 'r') as file : 23 | filedata = file.read() 24 | 25 | filedata = filedata.replace('align_corners=True', 'align_corners=False') 26 | filedata = filedata.replace('import torch.nn as nn', 'import torch.nn as nn\nimport torchvision.models as models') 27 | filedata = filedata.replace('torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl")', 'models.resnext101_32x8d()') 28 | 29 | with open(modify_filename, 'w') as file: 30 | file.write(filedata) 31 | 32 | def restore_file(): 33 | modify_filename = '../midas/blocks.py' 34 | copyfile(modify_filename+'.bak', modify_filename) 35 | 36 | modify_file() 37 | 38 | from midas.midas_net import MidasNet 39 | from midas.transforms import Resize, NormalizeImage, PrepareForNet 40 | 41 | restore_file() 42 | 43 | 44 | class MidasNet_preprocessing(MidasNet): 45 | """Network for monocular depth estimation. 46 | """ 47 | def forward(self, x): 48 | """Forward pass. 49 | 50 | Args: 51 | x (tensor): input data (image) 52 | 53 | Returns: 54 | tensor: depth 55 | """ 56 | 57 | mean = torch.tensor([0.485, 0.456, 0.406]) 58 | std = torch.tensor([0.229, 0.224, 0.225]) 59 | x.sub_(mean[None, :, None, None]).div_(std[None, :, None, None]) 60 | 61 | return MidasNet.forward(self, x) 62 | 63 | 64 | def run(model_path): 65 | """Run MonoDepthNN to compute depth maps. 66 | 67 | Args: 68 | model_path (str): path to saved model 69 | """ 70 | print("initialize") 71 | 72 | # select device 73 | 74 | # load network 75 | #model = MidasNet(model_path, non_negative=True) 76 | model = MidasNet_preprocessing(model_path, non_negative=True) 77 | 78 | model.eval() 79 | 80 | print("start processing") 81 | 82 | # input 83 | img_input = np.zeros((3, 384, 384), np.float32) 84 | 85 | # compute 86 | with torch.no_grad(): 87 | sample = torch.from_numpy(img_input).unsqueeze(0) 88 | prediction = model.forward(sample) 89 | prediction = ( 90 | torch.nn.functional.interpolate( 91 | prediction.unsqueeze(1), 92 | size=img_input.shape[:2], 93 | mode="bicubic", 94 | align_corners=False, 95 | ) 96 | .squeeze() 97 | .cpu() 98 | .numpy() 99 | ) 100 | 101 | torch.onnx.export(model, sample, ntpath.basename(model_path).rsplit('.', 1)[0]+'.onnx', opset_version=9) 102 | 103 | print("finished") 104 | 105 | 106 | if __name__ == "__main__": 107 | # set paths 108 | # MODEL_PATH = "model.pt" 109 | MODEL_PATH = "../model-f6b98070.pt" 110 | 111 | # compute depth maps 112 | run(MODEL_PATH) 113 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/output/.placeholder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XLabs-AI/x-flux/47495425dbed499be1e8e5a6e52628b07349cba2/src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/output/.placeholder -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/run_onnx.py: -------------------------------------------------------------------------------- 1 | """Compute depth maps for images in the input folder. 2 | """ 3 | import os 4 | import glob 5 | import utils 6 | import cv2 7 | import sys 8 | import numpy as np 9 | import argparse 10 | 11 | import onnx 12 | import onnxruntime as rt 13 | 14 | from transforms import Resize, NormalizeImage, PrepareForNet 15 | 16 | 17 | def run(input_path, output_path, model_path, model_type="large"): 18 | """Run MonoDepthNN to compute depth maps. 19 | 20 | Args: 21 | input_path (str): path to input folder 22 | output_path (str): path to output folder 23 | model_path (str): path to saved model 24 | """ 25 | print("initialize") 26 | 27 | # select device 28 | device = "CUDA:0" 29 | #device = "CPU" 30 | print("device: %s" % device) 31 | 32 | # network resolution 33 | if model_type == "large": 34 | net_w, net_h = 384, 384 35 | elif model_type == "small": 36 | net_w, net_h = 256, 256 37 | else: 38 | print(f"model_type '{model_type}' not implemented, use: --model_type large") 39 | assert False 40 | 41 | # load network 42 | print("loading model...") 43 | model = rt.InferenceSession(model_path) 44 | input_name = model.get_inputs()[0].name 45 | output_name = model.get_outputs()[0].name 46 | 47 | resize_image = Resize( 48 | net_w, 49 | net_h, 50 | resize_target=None, 51 | keep_aspect_ratio=False, 52 | ensure_multiple_of=32, 53 | resize_method="upper_bound", 54 | image_interpolation_method=cv2.INTER_CUBIC, 55 | ) 56 | 57 | def compose2(f1, f2): 58 | return lambda x: f2(f1(x)) 59 | 60 | transform = compose2(resize_image, PrepareForNet()) 61 | 62 | # get input 63 | img_names = glob.glob(os.path.join(input_path, "*")) 64 | num_images = len(img_names) 65 | 66 | # create output folder 67 | os.makedirs(output_path, exist_ok=True) 68 | 69 | print("start processing") 70 | 71 | for ind, img_name in enumerate(img_names): 72 | 73 | print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) 74 | 75 | # input 76 | img = utils.read_image(img_name) 77 | img_input = transform({"image": img})["image"] 78 | 79 | # compute 80 | output = model.run([output_name], {input_name: img_input.reshape(1, 3, net_h, net_w).astype(np.float32)})[0] 81 | prediction = np.array(output).reshape(net_h, net_w) 82 | prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) 83 | 84 | # output 85 | filename = os.path.join( 86 | output_path, os.path.splitext(os.path.basename(img_name))[0] 87 | ) 88 | utils.write_depth(filename, prediction, bits=2) 89 | 90 | print("finished") 91 | 92 | 93 | if __name__ == "__main__": 94 | parser = argparse.ArgumentParser() 95 | 96 | parser.add_argument('-i', '--input_path', 97 | default='input', 98 | help='folder with input images' 99 | ) 100 | 101 | parser.add_argument('-o', '--output_path', 102 | default='output', 103 | help='folder for output images' 104 | ) 105 | 106 | parser.add_argument('-m', '--model_weights', 107 | default='model-f6b98070.onnx', 108 | help='path to the trained weights of model' 109 | ) 110 | 111 | parser.add_argument('-t', '--model_type', 112 | default='large', 113 | help='model type: large or small' 114 | ) 115 | 116 | args = parser.parse_args() 117 | 118 | # compute depth maps 119 | run(args.input_path, args.output_path, args.model_weights, args.model_type) 120 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/base_models/midas_repo/tf/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import cv2 4 | 5 | 6 | def write_pfm(path, image, scale=1): 7 | """Write pfm file. 8 | Args: 9 | path (str): pathto file 10 | image (array): data 11 | scale (int, optional): Scale. Defaults to 1. 12 | """ 13 | 14 | with open(path, "wb") as file: 15 | color = None 16 | 17 | if image.dtype.name != "float32": 18 | raise Exception("Image dtype must be float32.") 19 | 20 | image = np.flipud(image) 21 | 22 | if len(image.shape) == 3 and image.shape[2] == 3: # color image 23 | color = True 24 | elif ( 25 | len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1 26 | ): # greyscale 27 | color = False 28 | else: 29 | raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.") 30 | 31 | file.write("PF\n" if color else "Pf\n".encode()) 32 | file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) 33 | 34 | endian = image.dtype.byteorder 35 | 36 | if endian == "<" or endian == "=" and sys.byteorder == "little": 37 | scale = -scale 38 | 39 | file.write("%f\n".encode() % scale) 40 | 41 | image.tofile(file) 42 | 43 | def read_image(path): 44 | """Read image and output RGB image (0-1). 45 | Args: 46 | path (str): path to file 47 | Returns: 48 | array: RGB image (0-1) 49 | """ 50 | img = cv2.imread(path) 51 | 52 | if img.ndim == 2: 53 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) 54 | 55 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 56 | 57 | return img 58 | 59 | def write_depth(path, depth, bits=1): 60 | """Write depth map to pfm and png file. 61 | Args: 62 | path (str): filepath without extension 63 | depth (array): depth 64 | """ 65 | write_pfm(path + ".pfm", depth.astype(np.float32)) 66 | 67 | depth_min = depth.min() 68 | depth_max = depth.max() 69 | 70 | max_val = (2**(8*bits))-1 71 | 72 | if depth_max - depth_min > np.finfo("float").eps: 73 | out = max_val * (depth - depth_min) / (depth_max - depth_min) 74 | else: 75 | out = 0 76 | 77 | if bits == 1: 78 | cv2.imwrite(path + ".png", out.astype("uint8")) 79 | elif bits == 2: 80 | cv2.imwrite(path + ".png", out.astype("uint16")) 81 | 82 | return -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | from .depth_model import DepthModel 27 | 28 | def build_model(config) -> DepthModel: 29 | """Builds a model from a config. The model is specified by the model name and version in the config. The model is then constructed using the build_from_config function of the model interface. 30 | This function should be used to construct models for training and evaluation. 31 | 32 | Args: 33 | config (dict): Config dict. Config is constructed in utils/config.py. Each model has its own config file(s) saved in its root model folder. 34 | 35 | Returns: 36 | torch.nn.Module: Model corresponding to name and version as specified in config 37 | """ 38 | module_name = f"zoedepth.models.{config.model}" 39 | try: 40 | module = import_module(module_name) 41 | except ModuleNotFoundError as e: 42 | # print the original error message 43 | print(e) 44 | raise ValueError( 45 | f"Model {config.model} not found. Refer above error for details.") from e 46 | try: 47 | get_version = getattr(module, "get_version") 48 | except AttributeError as e: 49 | raise ValueError( 50 | f"Model {config.model} has no get_version function.") from e 51 | return get_version(config.version_name).build_from_config(config) 52 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/model_io.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import torch 26 | 27 | def load_state_dict(model, state_dict): 28 | """Load state_dict into model, handling DataParallel and DistributedDataParallel. Also checks for "model" key in state_dict. 29 | 30 | DataParallel prefixes state_dict keys with 'module.' when saving. 31 | If the model is not a DataParallel model but the state_dict is, then prefixes are removed. 32 | If the model is a DataParallel model but the state_dict is not, then prefixes are added. 33 | """ 34 | state_dict = state_dict.get('model', state_dict) 35 | # if model is a DataParallel model, then state_dict keys are prefixed with 'module.' 36 | 37 | do_prefix = isinstance( 38 | model, (torch.nn.DataParallel, torch.nn.parallel.DistributedDataParallel)) 39 | state = {} 40 | for k, v in state_dict.items(): 41 | if k.startswith('module.') and not do_prefix: 42 | k = k[7:] 43 | 44 | if not k.startswith('module.') and do_prefix: 45 | k = 'module.' + k 46 | 47 | state[k] = v 48 | 49 | model.load_state_dict(state) 50 | print("Loaded successfully") 51 | return model 52 | 53 | 54 | def load_wts(model, checkpoint_path): 55 | ckpt = torch.load(checkpoint_path, map_location='cpu') 56 | return load_state_dict(model, ckpt) 57 | 58 | 59 | def load_state_dict_from_url(model, url, **kwargs): 60 | state_dict = torch.hub.load_state_dict_from_url(url, map_location='cpu', **kwargs) 61 | return load_state_dict(model, state_dict) 62 | 63 | 64 | def load_state_from_resource(model, resource: str): 65 | """Loads weights to the model from a given resource. A resource can be of following types: 66 | 1. URL. Prefixed with "url::" 67 | e.g. url::http(s)://url.resource.com/ckpt.pt 68 | 69 | 2. Local path. Prefixed with "local::" 70 | e.g. local::/path/to/ckpt.pt 71 | 72 | 73 | Args: 74 | model (torch.nn.Module): Model 75 | resource (str): resource string 76 | 77 | Returns: 78 | torch.nn.Module: Model with loaded weights 79 | """ 80 | print(f"Using pretrained resource {resource}") 81 | 82 | if resource.startswith('url::'): 83 | url = resource.split('url::')[1] 84 | return load_state_dict_from_url(model, url, progress=True) 85 | 86 | elif resource.startswith('local::'): 87 | path = resource.split('local::')[1] 88 | return load_wts(model, path) 89 | 90 | else: 91 | raise ValueError("Invalid resource type, only url:: and local:: are supported") 92 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/zoedepth/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_v1 import ZoeDepth 26 | 27 | all_versions = { 28 | "v1": ZoeDepth, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepth", 4 | "version_name": "v1", 5 | "n_bins": 64, 6 | "bin_embedding_dim": 128, 7 | "bin_centers_type": "softplus", 8 | "n_attractors":[16, 8, 4, 1], 9 | "attractor_alpha": 1000, 10 | "attractor_gamma": 2, 11 | "attractor_kind" : "mean", 12 | "attractor_type" : "inv", 13 | "midas_model_type" : "DPT_BEiT_L_384", 14 | "min_temp": 0.0212, 15 | "max_temp": 50.0, 16 | "output_distribution": "logbinomial", 17 | "memory_efficient": true, 18 | "inverse_midas": false, 19 | "img_size": [384, 512] 20 | }, 21 | 22 | "train": { 23 | "train_midas": true, 24 | "use_pretrained_midas": true, 25 | "trainer": "zoedepth", 26 | "epochs": 5, 27 | "bs": 16, 28 | "optim_kwargs": {"lr": 0.000161, "wd": 0.01}, 29 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 30 | "same_lr": false, 31 | "w_si": 1, 32 | "w_domain": 0.2, 33 | "w_reg": 0, 34 | "w_grad": 0, 35 | "avoid_boundary": false, 36 | "random_crop": false, 37 | "input_width": 640, 38 | "input_height": 480, 39 | "midas_lr_factor": 1, 40 | "encoder_lr_factor":10, 41 | "pos_enc_lr_factor":10, 42 | "freeze_midas_bn": true 43 | 44 | }, 45 | 46 | "infer":{ 47 | "train_midas": false, 48 | "use_pretrained_midas": false, 49 | "pretrained_resource" : null, 50 | "force_keep_ar": true 51 | }, 52 | 53 | "eval":{ 54 | "train_midas": false, 55 | "use_pretrained_midas": false, 56 | "pretrained_resource" : null 57 | } 58 | } -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "bin_centers_type": "normed", 4 | "img_size": [384, 768] 5 | }, 6 | 7 | "train": { 8 | }, 9 | 10 | "infer":{ 11 | "train_midas": false, 12 | "use_pretrained_midas": false, 13 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt", 14 | "force_keep_ar": true 15 | }, 16 | 17 | "eval":{ 18 | "train_midas": false, 19 | "use_pretrained_midas": false, 20 | "pretrained_resource" : "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_K.pt" 21 | } 22 | } -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/zoedepth_nk/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from .zoedepth_nk_v1 import ZoeDepthNK 26 | 27 | all_versions = { 28 | "v1": ZoeDepthNK, 29 | } 30 | 31 | get_version = lambda v : all_versions[v] -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "name": "ZoeDepthNK", 4 | "version_name": "v1", 5 | "bin_conf" : [ 6 | { 7 | "name": "nyu", 8 | "n_bins": 64, 9 | "min_depth": 1e-3, 10 | "max_depth": 10.0 11 | }, 12 | { 13 | "name": "kitti", 14 | "n_bins": 64, 15 | "min_depth": 1e-3, 16 | "max_depth": 80.0 17 | } 18 | ], 19 | "bin_embedding_dim": 128, 20 | "bin_centers_type": "softplus", 21 | "n_attractors":[16, 8, 4, 1], 22 | "attractor_alpha": 1000, 23 | "attractor_gamma": 2, 24 | "attractor_kind" : "mean", 25 | "attractor_type" : "inv", 26 | "min_temp": 0.0212, 27 | "max_temp": 50.0, 28 | "memory_efficient": true, 29 | "midas_model_type" : "DPT_BEiT_L_384", 30 | "img_size": [384, 512] 31 | }, 32 | 33 | "train": { 34 | "train_midas": true, 35 | "use_pretrained_midas": true, 36 | "trainer": "zoedepth_nk", 37 | "epochs": 5, 38 | "bs": 16, 39 | "optim_kwargs": {"lr": 0.0002512, "wd": 0.01}, 40 | "sched_kwargs": {"div_factor": 1, "final_div_factor": 10000, "pct_start": 0.7, "three_phase":false, "cycle_momentum": true}, 41 | "same_lr": false, 42 | "w_si": 1, 43 | "w_domain": 100, 44 | "avoid_boundary": false, 45 | "random_crop": false, 46 | "input_width": 640, 47 | "input_height": 480, 48 | "w_grad": 0, 49 | "w_reg": 0, 50 | "midas_lr_factor": 10, 51 | "encoder_lr_factor":10, 52 | "pos_enc_lr_factor":10 53 | }, 54 | 55 | "infer": { 56 | "train_midas": false, 57 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 58 | "use_pretrained_midas": false, 59 | "force_keep_ar": true 60 | }, 61 | 62 | "eval": { 63 | "train_midas": false, 64 | "pretrained_resource": "url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt", 65 | "use_pretrained_midas": false 66 | } 67 | } -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/trainers/builder.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | from importlib import import_module 26 | 27 | 28 | def get_trainer(config): 29 | """Builds and returns a trainer based on the config. 30 | 31 | Args: 32 | config (dict): the config dict (typically constructed using utils.config.get_config) 33 | config.trainer (str): the name of the trainer to use. The module named "{config.trainer}_trainer" must exist in trainers root module 34 | 35 | Raises: 36 | ValueError: If the specified trainer does not exist under trainers/ folder 37 | 38 | Returns: 39 | Trainer (inherited from zoedepth.trainers.BaseTrainer): The Trainer object 40 | """ 41 | assert "trainer" in config and config.trainer is not None and config.trainer != '', "Trainer not specified. Config: {0}".format( 42 | config) 43 | try: 44 | Trainer = getattr(import_module( 45 | f"zoedepth.trainers.{config.trainer}_trainer"), 'Trainer') 46 | except ModuleNotFoundError as e: 47 | raise ValueError(f"Trainer {config.trainer}_trainer not found.") from e 48 | return Trainer 49 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/utils/arg_utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def infer_type(x): # hacky way to infer type from string args 4 | if not isinstance(x, str): 5 | return x 6 | 7 | try: 8 | x = int(x) 9 | return x 10 | except ValueError: 11 | pass 12 | 13 | try: 14 | x = float(x) 15 | return x 16 | except ValueError: 17 | pass 18 | 19 | return x 20 | 21 | 22 | def parse_unknown(unknown_args): 23 | clean = [] 24 | for a in unknown_args: 25 | if "=" in a: 26 | k, v = a.split("=") 27 | clean.extend([k, v]) 28 | else: 29 | clean.append(a) 30 | 31 | keys = clean[::2] 32 | values = clean[1::2] 33 | return {k.replace("--", ""): infer_type(v) for k, v in zip(keys, values)} 34 | -------------------------------------------------------------------------------- /src/flux/annotator/zoe/zoedepth/utils/geometry.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | # Copyright (c) 2022 Intelligent Systems Lab Org 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # File author: Shariq Farooq Bhat 24 | 25 | import numpy as np 26 | 27 | def get_intrinsics(H,W): 28 | """ 29 | Intrinsics for a pinhole camera model. 30 | Assume fov of 55 degrees and central principal point. 31 | """ 32 | f = 0.5 * W / np.tan(0.5 * 55 * np.pi / 180.0) 33 | cx = 0.5 * W 34 | cy = 0.5 * H 35 | return np.array([[f, 0, cx], 36 | [0, f, cy], 37 | [0, 0, 1]]) 38 | 39 | def depth_to_points(depth, R=None, t=None): 40 | 41 | K = get_intrinsics(depth.shape[1], depth.shape[2]) 42 | Kinv = np.linalg.inv(K) 43 | if R is None: 44 | R = np.eye(3) 45 | if t is None: 46 | t = np.zeros(3) 47 | 48 | # M converts from your coordinate to PyTorch3D's coordinate system 49 | M = np.eye(3) 50 | M[0, 0] = -1.0 51 | M[1, 1] = -1.0 52 | 53 | height, width = depth.shape[1:3] 54 | 55 | x = np.arange(width) 56 | y = np.arange(height) 57 | coord = np.stack(np.meshgrid(x, y), -1) 58 | coord = np.concatenate((coord, np.ones_like(coord)[:, :, [0]]), -1) # z=1 59 | coord = coord.astype(np.float32) 60 | # coord = torch.as_tensor(coord, dtype=torch.float32, device=device) 61 | coord = coord[None] # bs, h, w, 3 62 | 63 | D = depth[:, :, :, None, None] 64 | # print(D.shape, Kinv[None, None, None, ...].shape, coord[:, :, :, :, None].shape ) 65 | pts3D_1 = D * Kinv[None, None, None, ...] @ coord[:, :, :, :, None] 66 | # pts3D_1 live in your coordinate system. Convert them to Py3D's 67 | pts3D_1 = M[None, None, None, ...] @ pts3D_1 68 | # from reference to targe tviewpoint 69 | pts3D_2 = R[None, None, None, ...] @ pts3D_1 + t[None, None, None, :, None] 70 | # pts3D_2 = pts3D_1 71 | # depth_2 = pts3D_2[:, :, :, 2, :] # b,1,h,w 72 | return pts3D_2[:, :, :, :3, 0][0] 73 | 74 | 75 | def create_triangles(h, w, mask=None): 76 | """ 77 | Reference: https://github.com/google-research/google-research/blob/e96197de06613f1b027d20328e06d69829fa5a89/infinite_nature/render_utils.py#L68 78 | Creates mesh triangle indices from a given pixel grid size. 79 | This function is not and need not be differentiable as triangle indices are 80 | fixed. 81 | Args: 82 | h: (int) denoting the height of the image. 83 | w: (int) denoting the width of the image. 84 | Returns: 85 | triangles: 2D numpy array of indices (int) with shape (2(W-1)(H-1) x 3) 86 | """ 87 | x, y = np.meshgrid(range(w - 1), range(h - 1)) 88 | tl = y * w + x 89 | tr = y * w + x + 1 90 | bl = (y + 1) * w + x 91 | br = (y + 1) * w + x + 1 92 | triangles = np.array([tl, bl, tr, br, tr, bl]) 93 | triangles = np.transpose(triangles, (1, 2, 0)).reshape( 94 | ((w - 1) * (h - 1) * 2, 3)) 95 | if mask is not None: 96 | mask = mask.reshape(-1) 97 | triangles = triangles[mask[triangles].all(1)] 98 | return triangles 99 | -------------------------------------------------------------------------------- /src/flux/math.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from einops import rearrange 3 | from torch import Tensor 4 | 5 | 6 | def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor: 7 | q, k = apply_rope(q, k, pe) 8 | 9 | x = torch.nn.functional.scaled_dot_product_attention(q, k, v) 10 | x = rearrange(x, "B H L D -> B L (H D)") 11 | 12 | return x 13 | 14 | 15 | def rope(pos: Tensor, dim: int, theta: int) -> Tensor: 16 | assert dim % 2 == 0 17 | scale = torch.arange(0, dim, 2, dtype=torch.float64, device=pos.device) / dim 18 | omega = 1.0 / (theta**scale) 19 | out = torch.einsum("...n,d->...nd", pos, omega) 20 | out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1) 21 | out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2) 22 | return out.float() 23 | 24 | 25 | def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]: 26 | xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2) 27 | xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2) 28 | xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] 29 | xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1] 30 | return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) 31 | -------------------------------------------------------------------------------- /src/flux/modules/conditioner.py: -------------------------------------------------------------------------------- 1 | from torch import Tensor, nn 2 | from transformers import (CLIPTextModel, CLIPTokenizer, T5EncoderModel, 3 | T5Tokenizer) 4 | 5 | 6 | class HFEmbedder(nn.Module): 7 | def __init__(self, version: str, max_length: int, **hf_kwargs): 8 | super().__init__() 9 | self.is_clip = version.startswith("openai") 10 | self.max_length = max_length 11 | self.output_key = "pooler_output" if self.is_clip else "last_hidden_state" 12 | 13 | if self.is_clip: 14 | self.tokenizer: CLIPTokenizer = CLIPTokenizer.from_pretrained(version, max_length=max_length) 15 | self.hf_module: CLIPTextModel = CLIPTextModel.from_pretrained(version, **hf_kwargs) 16 | else: 17 | self.tokenizer: T5Tokenizer = T5Tokenizer.from_pretrained(version, max_length=max_length) 18 | self.hf_module: T5EncoderModel = T5EncoderModel.from_pretrained(version, **hf_kwargs) 19 | 20 | self.hf_module = self.hf_module.eval().requires_grad_(False) 21 | 22 | def forward(self, text: list[str]) -> Tensor: 23 | batch_encoding = self.tokenizer( 24 | text, 25 | truncation=True, 26 | max_length=self.max_length, 27 | return_length=False, 28 | return_overflowing_tokens=False, 29 | padding="max_length", 30 | return_tensors="pt", 31 | ) 32 | 33 | outputs = self.hf_module( 34 | input_ids=batch_encoding["input_ids"].to(self.hf_module.device), 35 | attention_mask=None, 36 | output_hidden_states=False, 37 | ) 38 | return outputs[self.output_key] 39 | -------------------------------------------------------------------------------- /train_configs/test_canny_controlnet.yaml: -------------------------------------------------------------------------------- 1 | model_name: "flux-dev" 2 | data_config: 3 | train_batch_size: 4 4 | num_workers: 4 5 | img_size: 512 6 | img_dir: images/ 7 | report_to: wandb 8 | train_batch_size: 3 9 | output_dir: saves_canny/ 10 | max_train_steps: 100000 11 | learning_rate: 2e-5 12 | lr_scheduler: constant 13 | lr_warmup_steps: 10 14 | adam_beta1: 0.9 15 | adam_beta2: 0.999 16 | adam_weight_decay: 0.01 17 | adam_epsilon: 1e-8 18 | max_grad_norm: 1.0 19 | logging_dir: logs 20 | mixed_precision: "bf16" 21 | checkpointing_steps: 2500 22 | checkpoints_total_limit: 10 23 | tracker_project_name: canny_training 24 | resume_from_checkpoint: latest 25 | gradient_accumulation_steps: 2 26 | -------------------------------------------------------------------------------- /train_configs/test_finetune.yaml: -------------------------------------------------------------------------------- 1 | model_name: "flux-dev" 2 | data_config: 3 | train_batch_size: 1 4 | num_workers: 4 5 | img_size: 512 6 | img_dir: images/ 7 | report_to: wandb 8 | train_batch_size: 1 9 | output_dir: saves/ 10 | max_train_steps: 100000 11 | learning_rate: 1e-5 12 | lr_scheduler: constant 13 | lr_warmup_steps: 10 14 | adam_beta1: 0.9 15 | adam_beta2: 0.999 16 | adam_weight_decay: 0.01 17 | adam_epsilon: 1e-8 18 | max_grad_norm: 1.0 19 | logging_dir: logs 20 | mixed_precision: "bf16" 21 | checkpointing_steps: 2500 22 | checkpoints_total_limit: 10 23 | tracker_project_name: finetune_test 24 | resume_from_checkpoint: latest 25 | gradient_accumulation_steps: 2 26 | -------------------------------------------------------------------------------- /train_configs/test_lora.yaml: -------------------------------------------------------------------------------- 1 | model_name: "flux-dev" 2 | data_config: 3 | train_batch_size: 1 4 | num_workers: 4 5 | img_size: 512 6 | img_dir: images/ 7 | random_ratio: true # support multi crop preprocessing 8 | report_to: wandb 9 | train_batch_size: 1 10 | output_dir: lora/ 11 | max_train_steps: 100000 12 | learning_rate: 1e-5 13 | lr_scheduler: constant 14 | lr_warmup_steps: 10 15 | adam_beta1: 0.9 16 | adam_beta2: 0.999 17 | adam_weight_decay: 0.01 18 | adam_epsilon: 1e-8 19 | max_grad_norm: 1.0 20 | logging_dir: logs 21 | mixed_precision: "bf16" 22 | checkpointing_steps: 2500 23 | checkpoints_total_limit: 10 24 | tracker_project_name: lora_test 25 | resume_from_checkpoint: latest 26 | gradient_accumulation_steps: 2 27 | rank: 16 28 | single_blocks: "1,2,3,4" 29 | double_blocks: null 30 | disable_sampling: false 31 | sample_every: 250 # sample every this many steps 32 | sample_width: 1024 33 | sample_height: 1024 34 | sample_steps: 20 35 | sample_prompts: 36 | - "woman with red hair, playing chess at the park, bomb going off in the background" 37 | - "a woman holding a coffee cup, in a beanie, sitting at a cafe" 38 | --------------------------------------------------------------------------------