├── .github └── workflows │ └── main.yml ├── .gitignore ├── .pre-commit-config.yaml ├── MANIFEST.in ├── README.md ├── examples ├── add_remove_persist.ipynb └── intro.ipynb ├── pyproject.toml ├── requirements.txt ├── setup.py ├── tests └── test_explorer.py └── yoloexplorer ├── __init__.py ├── assets └── docs │ ├── dash_intro.gif │ ├── intro.gif │ ├── plotting.png │ ├── sim_index.png │ └── sim_plotting.png ├── config.py ├── dataset.py ├── explorer.py ├── frontend ├── __init__.py ├── datasets.py ├── layout.py ├── pages │ ├── 1_table.py │ └── 2_analysis.py ├── redirect.py ├── states.py ├── streamlit_dash │ ├── __init__.py │ └── frontend │ │ ├── .prettierrc │ │ ├── build │ │ ├── asset-manifest.json │ │ ├── index.html │ │ ├── precache-manifest.cfc10f28dbda458a05dba1d053ca3f16.js │ │ ├── service-worker.js │ │ ├── static │ │ │ └── js │ │ │ │ ├── 2.ea259f3e.chunk.js │ │ │ │ ├── 2.ea259f3e.chunk.js.LICENSE.txt │ │ │ │ ├── 2.ea259f3e.chunk.js.map │ │ │ │ ├── main.c396fd5a.chunk.js │ │ │ │ ├── main.c396fd5a.chunk.js.map │ │ │ │ ├── runtime-main.58369df8.js │ │ │ │ └── runtime-main.58369df8.js.map │ │ └── styles.css │ │ ├── package-lock.json │ │ ├── package.json │ │ ├── public │ │ ├── index.html │ │ └── styles.css │ │ ├── src │ │ ├── index.tsx │ │ └── react-app-env.d.ts │ │ └── tsconfig.json └── styles │ └── __init__.py └── yolo_predictor.py /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.8", "3.9", "3.10", "3.11"] 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v4 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install pytest 27 | pip install -e . 28 | pip install pandas==2.0.3 29 | 30 | - name: Test with pytest 31 | run: | 32 | pytest tests 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### JetBrains ### 2 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 3 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 4 | 5 | # User-specific stuff 6 | .idea/**/workspace.xml 7 | .idea/**/tasks.xml 8 | .idea/**/usage.statistics.xml 9 | .idea/**/dictionaries 10 | .idea/**/shelf 11 | 12 | # AWS User-specific 13 | .idea/**/aws.xml 14 | 15 | # Generated files 16 | .idea/**/contentModel.xml 17 | 18 | # Sensitive or high-churn files 19 | .idea/**/dataSources/ 20 | .idea/**/dataSources.ids 21 | .idea/**/dataSources.local.xml 22 | .idea/**/sqlDataSources.xml 23 | .idea/**/dynamic.xml 24 | .idea/**/uiDesigner.xml 25 | .idea/**/dbnavigator.xml 26 | 27 | # Gradle 28 | .idea/**/gradle.xml 29 | .idea/**/libraries 30 | 31 | # Gradle and Maven with auto-import 32 | # When using Gradle or Maven with auto-import, you should exclude module files, 33 | # since they will be recreated, and may cause churn. Uncomment if using 34 | # auto-import. 35 | # .idea/artifacts 36 | # .idea/compiler.xml 37 | # .idea/jarRepositories.xml 38 | # .idea/modules.xml 39 | # .idea/*.iml 40 | # .idea/modules 41 | # *.iml 42 | # *.ipr 43 | 44 | # CMake 45 | cmake-build-*/ 46 | 47 | # Mongo Explorer plugin 48 | .idea/**/mongoSettings.xml 49 | 50 | # File-based project format 51 | *.iws 52 | 53 | # IntelliJ 54 | out/ 55 | 56 | # mpeltonen/sbt-idea plugin 57 | .idea_modules/ 58 | 59 | # JIRA plugin 60 | atlassian-ide-plugin.xml 61 | 62 | # Cursive Clojure plugin 63 | .idea/replstate.xml 64 | 65 | # SonarLint plugin 66 | .idea/sonarlint/ 67 | 68 | # Crashlytics plugin (for Android Studio and IntelliJ) 69 | com_crashlytics_export_strings.xml 70 | crashlytics.properties 71 | crashlytics-build.properties 72 | fabric.properties 73 | 74 | # Editor-based Rest Client 75 | .idea/httpRequests 76 | 77 | # Android studio 3.1+ serialized cache file 78 | .idea/caches/build_file_checksums.ser 79 | 80 | ### JetBrains Patch ### 81 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 82 | 83 | # *.iml 84 | # modules.xml 85 | # .idea/misc.xml 86 | # *.ipr 87 | 88 | # Sonarlint plugin 89 | # https://plugins.jetbrains.com/plugin/7973-sonarlint 90 | .idea/**/sonarlint/ 91 | 92 | # SonarQube Plugin 93 | # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin 94 | .idea/**/sonarIssues.xml 95 | 96 | # Markdown Navigator plugin 97 | # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced 98 | .idea/**/markdown-navigator.xml 99 | .idea/**/markdown-navigator-enh.xml 100 | .idea/**/markdown-navigator/ 101 | 102 | # Cache file creation bug 103 | # See https://youtrack.jetbrains.com/issue/JBR-2257 104 | .idea/$CACHE_FILE$ 105 | 106 | # CodeStream plugin 107 | # https://plugins.jetbrains.com/plugin/12206-codestream 108 | .idea/codestream.xml 109 | 110 | # Azure Toolkit for IntelliJ plugin 111 | # https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij 112 | .idea/**/azureSettings.xml 113 | 114 | ### Linux ### 115 | *~ 116 | 117 | # temporary files which can be created if a process still has a handle open of a deleted file 118 | .fuse_hidden* 119 | 120 | # KDE directory preferences 121 | .directory 122 | 123 | # Linux trash folder which might appear on any partition or disk 124 | .Trash-* 125 | 126 | # .nfs files are created when an open file is removed but is still being accessed 127 | .nfs* 128 | 129 | ### macOS ### 130 | # General 131 | .DS_Store 132 | .config 133 | *.egg-info/* 134 | 135 | .AppleDouble 136 | .LSOverride 137 | 138 | # Icon must end with two \r 139 | Icon 140 | 141 | 142 | # Thumbnails 143 | ._* 144 | 145 | # Files that might appear in the root of a volume 146 | .DocumentRevisions-V100 147 | .fseventsd 148 | .Spotlight-V100 149 | .TemporaryItems 150 | .Trashes 151 | .VolumeIcon.icns 152 | .com.apple.timemachine.donotpresent 153 | 154 | # Directories potentially created on remote AFP share 155 | .AppleDB 156 | .AppleDesktop 157 | Network Trash Folder 158 | Temporary Items 159 | .apdisk 160 | 161 | ### macOS Patch ### 162 | # iCloud generated files 163 | *.icloud 164 | 165 | ### Python ### 166 | # Byte-compiled / optimized / DLL files 167 | __pycache__/ 168 | *.py[cod] 169 | *$py.class 170 | 171 | # C extensions 172 | *.so 173 | 174 | # Distribution / packaging 175 | .Python 176 | #build/ 177 | develop-eggs/ 178 | dist/ 179 | downloads/ 180 | eggs/ 181 | .eggs/ 182 | lib/ 183 | lib64/ 184 | parts/ 185 | sdist/ 186 | var/ 187 | wheels/ 188 | share/python-wheels/ 189 | *.egg-info/ 190 | .installed.cfg 191 | *.egg 192 | MANIFEST 193 | 194 | # PyInstaller 195 | # Usually these files are written by a python script from a template 196 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 197 | *.manifest 198 | *.spec 199 | 200 | # Installer logs 201 | pip-log.txt 202 | pip-delete-this-directory.txt 203 | 204 | # Unit test / coverage reports 205 | htmlcov/ 206 | .tox/ 207 | .nox/ 208 | .coverage 209 | .coverage.* 210 | .cache 211 | nosetests.xml 212 | coverage.xml 213 | *.cover 214 | *.py,cover 215 | .hypothesis/ 216 | .pytest_cache/ 217 | cover/ 218 | 219 | # Translations 220 | *.mo 221 | *.pot 222 | 223 | # Django stuff: 224 | *.log 225 | local_settings.py 226 | db.sqlite3 227 | db.sqlite3-journal 228 | 229 | # Flask stuff: 230 | instance/ 231 | .webassets-cache 232 | 233 | # Scrapy stuff: 234 | .scrapy 235 | 236 | # Sphinx documentation 237 | docs/_build/ 238 | 239 | # PyBuilder 240 | .pybuilder/ 241 | target/ 242 | 243 | # Jupyter Notebook 244 | .ipynb_checkpoints 245 | 246 | # IPython 247 | profile_default/ 248 | ipython_config.py 249 | 250 | # pyenv 251 | # For a library or package, you might want to ignore these files since the code is 252 | # intended to run in multiple environments; otherwise, check them in: 253 | # .python-version 254 | 255 | # pipenv 256 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 257 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 258 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 259 | # install all needed dependencies. 260 | #Pipfile.lock 261 | 262 | # poetry 263 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 264 | # This is especially recommended for binary packages to ensure reproducibility, and is more 265 | # commonly ignored for libraries. 266 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 267 | #poetry.lock 268 | 269 | # pdm 270 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 271 | #pdm.lock 272 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 273 | # in version control. 274 | # https://pdm.fming.dev/#use-with-ide 275 | .pdm.toml 276 | 277 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 278 | __pypackages__/ 279 | 280 | # Celery stuff 281 | celerybeat-schedule 282 | celerybeat.pid 283 | 284 | # SageMath parsed files 285 | *.sage.py 286 | 287 | # Environments 288 | .env 289 | .venv 290 | env/ 291 | venv/ 292 | ENV/ 293 | env.bak/ 294 | venv.bak/ 295 | 296 | # Spyder project settings 297 | .spyderproject 298 | .spyproject 299 | 300 | # Rope project settings 301 | .ropeproject 302 | 303 | # mkdocs documentation 304 | /site 305 | 306 | # mypy 307 | .mypy_cache/ 308 | .dmypy.json 309 | dmypy.json 310 | 311 | # Pyre type checker 312 | .pyre/ 313 | 314 | # pytype static type analyzer 315 | .pytype/ 316 | 317 | # Cython debug symbols 318 | cython_debug/ 319 | 320 | # PyCharm 321 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 322 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 323 | # and can be added to the global gitignore or merged into this file. For a more nuclear 324 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 325 | #.idea/ 326 | 327 | ### Python Patch ### 328 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 329 | poetry.toml 330 | 331 | # ruff 332 | .ruff_cache/ 333 | 334 | # LSP config files 335 | pyrightconfig.json 336 | 337 | ### Windows ### 338 | # Windows thumbnail cache files 339 | Thumbs.db 340 | Thumbs.db:encryptable 341 | ehthumbs.db 342 | ehthumbs_vista.db 343 | 344 | # Dump file 345 | *.stackdump 346 | 347 | # Folder config file 348 | [Dd]esktop.ini 349 | 350 | # Recycle Bin used on file shares 351 | $RECYCLE.BIN/ 352 | 353 | # Windows Installer files 354 | *.cab 355 | *.msi 356 | *.msix 357 | *.msm 358 | *.msp 359 | 360 | # Windows shortcuts 361 | *.lnk 362 | 363 | run/ 364 | node_modules/ 365 | *.pt 366 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | 4 | default_language_version: 5 | python: python3.8 6 | repos: 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v4.4.0 9 | hooks: 10 | - id: check-added-large-files 11 | - id: check-toml 12 | - id: check-yaml 13 | args: 14 | - --unsafe 15 | - id: end-of-file-fixer 16 | - id: trailing-whitespace 17 | - repo: https://github.com/asottile/pyupgrade 18 | rev: v3.10.1 19 | hooks: 20 | - id: pyupgrade 21 | args: 22 | - --py3-plus 23 | - --keep-runtime-typing 24 | - repo: https://github.com/astral-sh/ruff-pre-commit 25 | rev: v0.0.282 26 | hooks: 27 | - id: ruff 28 | args: 29 | - --fix 30 | - repo: https://github.com/psf/black 31 | rev: 23.7.0 32 | hooks: 33 | - id: black 34 | ci: 35 | autofix_prs: true 36 | autoupdate_schedule: monthly 37 | autofix_commit_msg: "fix(pre_commit): 🎨 auto format pre-commit hooks" 38 | autoupdate_commit_msg: "fix(pre_commit): ⬆ pre_commit autoupdate" 39 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOExplorer 2 | 3 | Explore, manipulate and iterate on Computer Vision datasets with precision using simple APIs. 4 | Supports SQL filters, vector similarity search, native interface with Pandas and more. 5 | 6 | 7 | * Analyse your datasets with powerful custom queries 8 | * Find and remove bad images (duplicates, out of domain data and more) 9 | * Enrich datasets by adding more examples from another datasets 10 | * And more 11 | 12 | 🌟 NEW: Supports GUI Dashboard, Pythonic and notebook workflows 13 | ### Dashboard Workflows 14 |
15 | Multiple dataset support 16 | You can now explore multiple datasets, search across them, add/remove images across multiple datasets to enrich bad examples. Start training on new dataset within seconds. 17 | Here's an example of using VOC, coco128 and coco8 datasets together with VOC being the primary. 18 |
 19 | from yoloexplorer import Explorer
 20 | 
 21 | exp = Explorer("VOC.yaml")
 22 | exp.build_embeddings()
 23 | 
 24 | coco_exp = Explorer("coco128.yaml")
 25 | coco_exp.build_embeddings()
 26 |  #Init coco8 similarly
 27 | 
 28 | exp.dash([coco_exp, coco8])
 29 | #Automatic analysis coming soon with dash(..., analysis=True)
 30 | 
 31 | 
32 | 33 | ![ezgif com-optimize (3)](https://github.com/lancedb/yoloexplorer/assets/15766192/3422a536-138a-4fce-af2c-cef97f171aed) 34 | 35 |
36 | 37 | 38 |
39 | Multiple model support 40 | 41 | You can now explore multiple pretrained models listed 42 | `"resnet18", "resnet50", "efficientnet_b0", "efficientnet_v2_s", "googlenet", "mobilenet_v3_small"` for extracting better features out of images to improve searching across multiple datasets.
 43 | from yoloexplorer import Explorer
 44 | 
 45 | exp = Explorer("coco128.yaml", model="resnet50")
 46 | exp.build_embeddings()
 47 | 
 48 | coco_exp = Explorer("coco128.yaml", model="mobilenet_v3_small")
 49 | coco_exp.build_embeddings()
 50 | 
 51 | #Use force=True as a parameter in build_embedding if embeddings already exists
 52 | 
 53 | exp.dash([coco_exp, coco8])
 54 | #Automatic analysis coming soon with dash(..., analysis=True)
 55 | 
56 | 57 |
58 | Query using SQL and semantic search, View dataset as pandas DF and explore embeddings 59 | 60 | ![ezgif com-optimize (4)](https://github.com/lancedb/yoloexplorer/assets/15766192/b786e2f1-dc8e-411e-b13b-84b26ec50d41) 61 | 62 | ![ezgif com-optimize (5)](https://github.com/lancedb/yoloexplorer/assets/15766192/38d42a38-810e-48f3-89ea-1ccf304a1047) 63 | 64 |
65 | 66 |
67 | Try an example colab Open In Colab 68 | 69 | Colab / Notebook 70 | 71 |
72 | 73 | ### Installation 74 | ``` 75 | pip install yoloexplorer 76 | ``` 77 | Install from source branch 78 | ``` 79 | pip install git+https://github.com/lancedb/yoloexplorer.git 80 | ``` 81 | Pypi installation coming soon 82 | 83 | ## Quickstart 84 | YOLOExplorer can be used to rapidly generate new versions of CV datasets trainable on [Ultralytics YOLO, SAM, FAST-SAM, RT-DETR](https://github.com/ultralytics/ultralytics) and more models. 85 | 86 | Start exploring your Datasets in 2 simple steps 87 | * Select a supported dataset or bring your own. Supports all Ultralytics YOLO datasets currently 88 | ```python 89 | from yoloexplorer import Explorer 90 | 91 | coco_exp = Explorer("coco128.yaml") 92 | ``` 93 | * Build the LanceDB table to allow querying 94 | ```python 95 | coco_exp.build_embeddings() 96 | coco_exp.dash() # Launch the GUI dashboard 97 | ``` 98 |
99 | Querying Basics 100 | 101 | You can get the schema of you dataset once the table is built 102 | ``` 103 | schema = coco_exp.table.schema 104 | ``` 105 | You can use this schema to run queries 106 | 107 | SQL query
108 | Let's try this query and print 4 result - Select instances that contain one or more 'person' and 'cat' 109 | ```python 110 | df = coco_exp.sql("SELECT * from 'table' WHERE labels like '%person%' and labels LIKE '%cat%'") 111 | coco_exp.plot_imgs(ids=df["id"][0:4].to_list()) 112 | ``` 113 | Result 114 | 115 |
116 | The above is equivlant to plotting directly with a query: 117 | ```python 118 | voc_exp.plot_imgs(query=query, n=4) 119 | ``` 120 | 121 | Querying by similarity
122 | Now lets say your model confuses between cetain classes( cat & dog for example) so you want to look find images similar to the ones above to investigate. 123 | 124 | The id of the first image in this case was 117 125 | ```python 126 | imgs, ids = coco_exp.get_similar_imgs(117, n=6) # accepts ids/idx, Path, or img blob 127 | voc_exp.plot_imgs(ids) 128 | ``` 129 |
130 | The above is equivlant to directly calling `plot_similar_imgs` 131 | ```python 132 | voc_exp.plot_similar_imgs(117, n=6) 133 | ``` 134 | NOTE: You can also pass any image file for similarity search, even the ones that are not in the dataset 135 | 136 | 137 | Similarity Search with SQL Filter (Coming Soon)
138 | Soon you'll be able to have a finer control over the queries by pre-filtering your table 139 | ``` 140 | coco_exp.get_similar_imgs(..., query="WHERE labels LIKE '%motorbike%'") 141 | coco_exp.plot_similar_imgs(query="WHERE labels LIKE '%motorbike%'") 142 | ``` 143 |
144 | 145 |
146 | Plotting 147 | 148 | | Visualization Method | Description | Arguments | 149 | |---|---|---| 150 | | `plot_imgs(ids, query, n=10)` | Plots the given `ids` or the result of the SQL query. One of the 2 must be provided. | `ids`: A list of image IDs or a SQL query. `n`: The number of images to plot. | 151 | | `plot_similar_imgs(img/idx, n=10)` | Plots `n` top similar images to the given img. Accepts img idx from the dataset, Path to imgs or encoded/binary img | `img/idx`: The image to plot similar images for. `n`: The number of similar images to plot. | 152 | | `plot_similarity_index(top_k=0.01, sim_thres=0.90, reduce=False, sorted=False)` | Plots the similarity index of the dataset. This gives measure of how similar an img is when compared to all the imgs of the dataset. | `top_k`: The percentage of images to keep for the similarity index. `sim_thres`: The similarity threshold. `reduce`: Whether to reduce the dimensionality of the similarity index. `sorted`: Whether to sort the similarity index. | 153 | 154 | **Additional Details** 155 | 156 | * The `plot_imgs` method can be used to visualize a subset of images from the dataset. The `ids` argument can be a list of image IDs, or a SQL query that returns a list of image IDs. The `n` argument specifies the number of images to plot. 157 | * The `plot_similar_imgs` method can be used to visualize the top `n` similar images to a given image. The `img/idx` argument can be the index of the image in the dataset, the path to the image file, or the encoded/binary representation of the image. 158 | * The `plot_similarity_index` method can be used to visualize the similarity index of the dataset. The similarity index is a measure of how similar each image is to all the other images in the dataset. The `top_k` argument specifies the percentage of images to keep for the similarity index. The `sim_thres` argument specifies the similarity threshold. The `reduce` argument specifies whether to reduce the dimensionality of embeddings before calculating the index. The `sorted` argument specifies whether to sort the similarity index. 159 | 160 | 161 |
162 | 163 |
164 | Add, remove, merge parts of datasets, persist new Datasets, and start training! 165 | Once you've found the right images that you'd like to add or remove, you can simply add/remove them from your dataset and generate the updated version. 166 | 167 | Removing data
168 | You can simply remove images by passing a list of `ids` from the table. 169 | ``` 170 | coco_exp.remove_imgs([100,120,300..n]) # Removes images at the given ids. 171 | ``` 172 | 173 | Adding data
174 | For adding data from another dataset, you need an explorer object of that dataset with embeddings built. You can then pass that object along with the ids of the imgs that you'd like to add from that dataset. 175 | ``` 176 | coco_exp.add_imgs(exp, idxs) # 177 | ``` 178 | Note: You can use SQL querying and/or similarity searches to get the desired ids from the datasets. 179 | 180 | Persisting the Table: Create new dataset and start training
181 | After making the desired changes, you can persist the table to create the new dataset. 182 | ``` 183 | coco_exp.persist() 184 | ``` 185 | This creates a new dataset and outputs the training command that you can simply paste in your terminal to train a new model! 186 | 187 | Resetting the Table
188 | You can reset the table to its original or last persisted state (whichever is latest) 189 | ``` 190 | coco_exp.reset() 191 | ``` 192 |
193 | 194 |
195 | (Advanced querying)Getting insights from Similarity index 196 | The `plot_similarity_index` method can be used to visualize the similarity index of the dataset. The similarity index is a measure of how similar each image is to all the other images in the dataset. 197 | Let's the the similarity index of the VOC dataset keeping all the default settings 198 | 199 | ```python 200 | voc_exp.plot_similarity_index() 201 | ``` 202 | 203 |
204 | You can also get the the similarity index as a numpy array to perform advanced querys. 205 | 206 | ```python 207 | sim = voc_exp.get_similarity_index() 208 | ``` 209 | Now you can combine the similarity index with other querying options discussed above to create even more powerful queries. Here's an example: 210 | 211 | "Let's say you've created a list of candidates you wish to remove from the dataset. Now, you want to filter out the images that have similarity index less than 250, i.e, remove the images that are 90%(`sim_thres`) or more similar to more than 250 images in the dataset. 212 | " 213 | ```python 214 | ids = [...] # filtered ids list 215 | filter = np.where(sim > 250) 216 | final_ids = np.intersect1d(ids, filter) # intersect both arrays 217 | 218 | exp.remove_imgs(final_ids) 219 | ``` 220 |
221 | 222 |

Coming Soon

223 | 224 | Pre-filtering 225 | * To allow adding filter to searches. 226 | * Have a finer control over embeddings search space 227 | 228 | Pre-filtering will enable powerful queries like - "Show me images similar to and include only ones that contain one or more(or exactly one) person, 2 cars and 1 horse"
229 | 230 | * Automatically find potential duplicate images 231 | 232 | * Better embedding plotting and analytics insights 233 | 234 | * Better dashboard for visualizing imgs 235 |
236 | 237 | Notes: 238 | * The API will have some minor changes going from dev to minor release 239 | * For all practical purposes the ids are same as row number and is reset after every addition or removal 240 | -------------------------------------------------------------------------------- /examples/add_remove_persist.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "f5fc3d97", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "from yoloexplorer import Explorer" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "0992199f", 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stderr", 21 | "output_type": "stream", 22 | "text": [ 23 | "Ultralytics YOLOv8.0.120 🚀 Python-3.11.4 torch-2.0.1 CPU\n", 24 | "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n", 25 | "Scanning /Users/ayushchaurasia/Documents/ultralytics/datasets/VOC/labels/train2007.cache... 16551 images, 0 backgroun\n", 26 | "LanceDB embedding space already exists. Attempting to reuse it. Use force=True to overwrite.\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "voc_exp = Explorer(\"VOC.yaml\")\n", 32 | "voc_exp.build_embeddings()" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 4, 38 | "id": "9bb61d27", 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stderr", 43 | "output_type": "stream", 44 | "text": [ 45 | "Ultralytics YOLOv8.0.120 🚀 Python-3.11.4 torch-2.0.1 CPU\n", 46 | "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n", 47 | "Scanning /Users/ayushchaurasia/Documents/ultralytics/datasets/coco8/labels/train.cache... 4 images, 0 backgrounds, 0 corrupt: 100%|█████████\n" 48 | ] 49 | }, 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "WARNING: rate limit only support up to 3.10, proceeding without rate limiter\n" 55 | ] 56 | }, 57 | { 58 | "name": "stderr", 59 | "output_type": "stream", 60 | "text": [ 61 | "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 16.75it/s]\n", 62 | "\u001b[34m\u001b[1mLanceDB:\u001b[0m Embedding space built successfully.\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "coco_exp = Explorer(\"coco8.yaml\")\n", 68 | "coco_exp.build_embeddings()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 6, 74 | "id": "d2f17222", 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stderr", 79 | "output_type": "stream", 80 | "text": [ 81 | "\n", 82 | "|-----------------------------------------------|\n", 83 | "\t Number of images: 16555\n", 84 | "|------------------------------------------------|\n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "voc_exp.add_imgs(coco_exp, [0,1,2,3])" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 8, 95 | "id": "fbca050d", 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/html": [ 101 | "
\n", 102 | "\n", 115 | "\n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | "
idpathclslabelsbboxesimgvector
1655016550/Users/ayushchaurasia/Documents/ultralytics/da...[0][aeroplane][[0.4880000352859497, 0.40942928194999695, 0.8...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.17438583, -0.17087273, -0.13651106, -0.177...
165510/Users/ayushchaurasia/Documents/ultralytics/da...[45, 45, 45, 49, 49, 49, 49, 50][bowl, bowl, bowl, orange, orange, orange, ora...[[0.4794920086860657, 0.6887710094451904, 0.95...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.02985644, 0.015207137, 0.22520831, 0.13441...
165521/Users/ayushchaurasia/Documents/ultralytics/da...[23, 23][giraffe, giraffe][[0.7703359723091125, 0.4896950125694275, 0.33...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.2151032, -0.15736936, -0.08698822, -0.0328...
165532/Users/ayushchaurasia/Documents/ultralytics/da...[58, 75][potted plant, vase][[0.5192189812660217, 0.4511209726333618, 0.39...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.1577483, -0.07757879, 0.1852913, 0.0405876...
165543/Users/ayushchaurasia/Documents/ultralytics/da...[22][zebra][[0.3462109863758087, 0.4932590126991272, 0.68...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.14650379, -0.2218233, -0.10506437, -0.1519...
\n", 181 | "
" 182 | ], 183 | "text/plain": [ 184 | " id path \\\n", 185 | "16550 16550 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 186 | "16551 0 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 187 | "16552 1 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 188 | "16553 2 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 189 | "16554 3 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 190 | "\n", 191 | " cls \\\n", 192 | "16550 [0] \n", 193 | "16551 [45, 45, 45, 49, 49, 49, 49, 50] \n", 194 | "16552 [23, 23] \n", 195 | "16553 [58, 75] \n", 196 | "16554 [22] \n", 197 | "\n", 198 | " labels \\\n", 199 | "16550 [aeroplane] \n", 200 | "16551 [bowl, bowl, bowl, orange, orange, orange, ora... \n", 201 | "16552 [giraffe, giraffe] \n", 202 | "16553 [potted plant, vase] \n", 203 | "16554 [zebra] \n", 204 | "\n", 205 | " bboxes \\\n", 206 | "16550 [[0.4880000352859497, 0.40942928194999695, 0.8... \n", 207 | "16551 [[0.4794920086860657, 0.6887710094451904, 0.95... \n", 208 | "16552 [[0.7703359723091125, 0.4896950125694275, 0.33... \n", 209 | "16553 [[0.5192189812660217, 0.4511209726333618, 0.39... \n", 210 | "16554 [[0.3462109863758087, 0.4932590126991272, 0.68... \n", 211 | "\n", 212 | " img \\\n", 213 | "16550 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 214 | "16551 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 215 | "16552 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 216 | "16553 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 217 | "16554 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 218 | "\n", 219 | " vector \n", 220 | "16550 [-0.17438583, -0.17087273, -0.13651106, -0.177... \n", 221 | "16551 [-0.02985644, 0.015207137, 0.22520831, 0.13441... \n", 222 | "16552 [-0.2151032, -0.15736936, -0.08698822, -0.0328... \n", 223 | "16553 [-0.1577483, -0.07757879, 0.1852913, 0.0405876... \n", 224 | "16554 [-0.14650379, -0.2218233, -0.10506437, -0.1519... " 225 | ] 226 | }, 227 | "execution_count": 8, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "voc_exp.table.to_pandas().tail()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 9, 239 | "id": "0652d847", 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "name": "stderr", 244 | "output_type": "stream", 245 | "text": [ 246 | "Ultralytics YOLOv8.0.120 🚀 Python-3.11.4 torch-2.0.1 CPU\n", 247 | "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n", 248 | "Scanning /Users/ayushchaurasia/Documents/ultralytics/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100%|█\n", 249 | "LanceDB embedding space already exists. Attempting to reuse it. Use force=True to overwrite.\n", 250 | "Ultralytics YOLOv8.0.120 🚀 Python-3.11.4 torch-2.0.1 CPU\n", 251 | "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n", 252 | "Scanning /Users/ayushchaurasia/Documents/ultralytics/datasets/coco8/labels/train.cache... 4 images, 0 backgrounds, 0 corrupt: 100%|█████████\n", 253 | "LanceDB embedding space already exists. Attempting to reuse it. Use force=True to overwrite.\n" 254 | ] 255 | } 256 | ], 257 | "source": [ 258 | "coco128_exp = Explorer(\"coco128.yaml\")\n", 259 | "coco128_exp.build_embeddings()\n", 260 | "\n", 261 | "coco8_exp = Explorer(\"coco8.yaml\")\n", 262 | "coco8_exp.build_embeddings()\n" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 10, 268 | "id": "4211c07e", 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stderr", 273 | "output_type": "stream", 274 | "text": [ 275 | "\n", 276 | "|-----------------------------------------------|\n", 277 | "\t Number of images: 130\n", 278 | "|------------------------------------------------|\n" 279 | ] 280 | } 281 | ], 282 | "source": [ 283 | "coco128_exp.add_imgs(coco8_exp, [2,3])" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 11, 289 | "id": "e9210257", 290 | "metadata": {}, 291 | "outputs": [ 292 | { 293 | "data": { 294 | "text/html": [ 295 | "
\n", 296 | "\n", 309 | "\n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | "
idpathclslabelsbboxesimgvector
125125/Users/ayushchaurasia/Documents/ultralytics/da...[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 5][person, person, person, person, person, perso...[[0.912320077419281, 0.5608879923820496, 0.017...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.1441657, -0.18747765, -0.16826846, -0.1800...
126126/Users/ayushchaurasia/Documents/ultralytics/da...[39, 39, 45, 62, 64, 73, 73, 73, 73, 73, 73, 7...[bottle, bottle, bowl, tv, mouse, book, book, ...[[0.18036000430583954, 0.8277199864387512, 0.0...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.10117926, -0.09878854, 0.06405701, -0.0227...
127127/Users/ayushchaurasia/Documents/ultralytics/da...[2, 15][car, cat][[0.5018590092658997, 0.8207259178161621, 0.99...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.13692749, -0.14139369, 0.24011154, 0.07854...
1282/Users/ayushchaurasia/Documents/ultralytics/da...[58, 75][potted plant, vase][[0.5192189812660217, 0.4511209726333618, 0.39...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.1577483, -0.07757879, 0.1852913, 0.0405876...
1293/Users/ayushchaurasia/Documents/ultralytics/da...[22][zebra][[0.3462109863758087, 0.4932590126991272, 0.68...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.14650379, -0.2218233, -0.10506437, -0.1519...
\n", 375 | "
" 376 | ], 377 | "text/plain": [ 378 | " id path \\\n", 379 | "125 125 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 380 | "126 126 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 381 | "127 127 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 382 | "128 2 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 383 | "129 3 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 384 | "\n", 385 | " cls \\\n", 386 | "125 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 5] \n", 387 | "126 [39, 39, 45, 62, 64, 73, 73, 73, 73, 73, 73, 7... \n", 388 | "127 [2, 15] \n", 389 | "128 [58, 75] \n", 390 | "129 [22] \n", 391 | "\n", 392 | " labels \\\n", 393 | "125 [person, person, person, person, person, perso... \n", 394 | "126 [bottle, bottle, bowl, tv, mouse, book, book, ... \n", 395 | "127 [car, cat] \n", 396 | "128 [potted plant, vase] \n", 397 | "129 [zebra] \n", 398 | "\n", 399 | " bboxes \\\n", 400 | "125 [[0.912320077419281, 0.5608879923820496, 0.017... \n", 401 | "126 [[0.18036000430583954, 0.8277199864387512, 0.0... \n", 402 | "127 [[0.5018590092658997, 0.8207259178161621, 0.99... \n", 403 | "128 [[0.5192189812660217, 0.4511209726333618, 0.39... \n", 404 | "129 [[0.3462109863758087, 0.4932590126991272, 0.68... \n", 405 | "\n", 406 | " img \\\n", 407 | "125 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 408 | "126 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 409 | "127 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 410 | "128 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 411 | "129 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 412 | "\n", 413 | " vector \n", 414 | "125 [-0.1441657, -0.18747765, -0.16826846, -0.1800... \n", 415 | "126 [-0.10117926, -0.09878854, 0.06405701, -0.0227... \n", 416 | "127 [-0.13692749, -0.14139369, 0.24011154, 0.07854... \n", 417 | "128 [-0.1577483, -0.07757879, 0.1852913, 0.0405876... \n", 418 | "129 [-0.14650379, -0.2218233, -0.10506437, -0.1519... " 419 | ] 420 | }, 421 | "execution_count": 11, 422 | "metadata": {}, 423 | "output_type": "execute_result" 424 | } 425 | ], 426 | "source": [ 427 | "coco128_exp.table.to_pandas().tail()" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": 12, 433 | "id": "d0f8e343", 434 | "metadata": {}, 435 | "outputs": [ 436 | { 437 | "data": { 438 | "text/html": [ 439 | "
\n", 440 | "\n", 453 | "\n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | "
idpathclslabelsbboxesimgvector
00/Users/ayushchaurasia/Documents/ultralytics/da...[45, 45, 45, 49, 49, 49, 49, 50][bowl, bowl, bowl, orange, orange, orange, ora...[[0.4794920086860657, 0.6887710094451904, 0.95...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.02985644, 0.015207137, 0.22520831, 0.13441...
11/Users/ayushchaurasia/Documents/ultralytics/da...[23, 23][giraffe, giraffe][[0.7703359723091125, 0.4896950125694275, 0.33...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.2151032, -0.15736936, -0.08698822, -0.0328...
22/Users/ayushchaurasia/Documents/ultralytics/da...[58, 75][potted plant, vase][[0.5192189812660217, 0.4511209726333618, 0.39...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.1577483, -0.07757879, 0.1852913, 0.0405876...
33/Users/ayushchaurasia/Documents/ultralytics/da...[22][zebra][[0.3462109863758087, 0.4932590126991272, 0.68...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.14650379, -0.2218233, -0.10506437, -0.1519...
\n", 509 | "
" 510 | ], 511 | "text/plain": [ 512 | " id path \\\n", 513 | "0 0 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 514 | "1 1 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 515 | "2 2 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 516 | "3 3 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 517 | "\n", 518 | " cls \\\n", 519 | "0 [45, 45, 45, 49, 49, 49, 49, 50] \n", 520 | "1 [23, 23] \n", 521 | "2 [58, 75] \n", 522 | "3 [22] \n", 523 | "\n", 524 | " labels \\\n", 525 | "0 [bowl, bowl, bowl, orange, orange, orange, ora... \n", 526 | "1 [giraffe, giraffe] \n", 527 | "2 [potted plant, vase] \n", 528 | "3 [zebra] \n", 529 | "\n", 530 | " bboxes \\\n", 531 | "0 [[0.4794920086860657, 0.6887710094451904, 0.95... \n", 532 | "1 [[0.7703359723091125, 0.4896950125694275, 0.33... \n", 533 | "2 [[0.5192189812660217, 0.4511209726333618, 0.39... \n", 534 | "3 [[0.3462109863758087, 0.4932590126991272, 0.68... \n", 535 | "\n", 536 | " img \\\n", 537 | "0 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 538 | "1 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 539 | "2 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 540 | "3 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 541 | "\n", 542 | " vector \n", 543 | "0 [-0.02985644, 0.015207137, 0.22520831, 0.13441... \n", 544 | "1 [-0.2151032, -0.15736936, -0.08698822, -0.0328... \n", 545 | "2 [-0.1577483, -0.07757879, 0.1852913, 0.0405876... \n", 546 | "3 [-0.14650379, -0.2218233, -0.10506437, -0.1519... " 547 | ] 548 | }, 549 | "execution_count": 12, 550 | "metadata": {}, 551 | "output_type": "execute_result" 552 | } 553 | ], 554 | "source": [ 555 | "coco8_exp.table.to_pandas()" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": 13, 561 | "id": "733c19b6", 562 | "metadata": {}, 563 | "outputs": [ 564 | { 565 | "name": "stderr", 566 | "output_type": "stream", 567 | "text": [ 568 | "Persisting changes to the dataset...\n", 569 | "\n", 570 | "|-----------------------------------------------|\n", 571 | "\t Number of images: 130\n", 572 | "|------------------------------------------------|\n", 573 | "100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 130/130 [00:00<00:00, 17542.05it/s]\n", 574 | "Changes persisted to the dataset.\n", 575 | "\u001b[34m\u001b[1mLanceDB: \u001b[0mNew dataset created successfully! Run the following command to train a model:\n", 576 | "yolo train data=run/coco_updated epochs=10\n" 577 | ] 578 | } 579 | ], 580 | "source": [ 581 | "coco128_exp.persist(\"coco_updated\")" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": 15, 587 | "id": "612c7464", 588 | "metadata": { 589 | "scrolled": true 590 | }, 591 | "outputs": [ 592 | { 593 | "name": "stderr", 594 | "output_type": "stream", 595 | "text": [ 596 | "Ultralytics YOLOv8.0.120 🚀 Python-3.11.4 torch-2.0.1 CPU\n", 597 | "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n", 598 | "Scanning /Users/ayushchaurasia/Documents/ultralytics/datasets/coco128/labels/train2017... 128 images, 2 backgrounds, 0 corrupt: 100%|███████\n", 599 | "New cache created: /Users/ayushchaurasia/Documents/ultralytics/datasets/coco128/labels/train2017.cache\n" 600 | ] 601 | }, 602 | { 603 | "name": "stdout", 604 | "output_type": "stream", 605 | "text": [ 606 | "WARNING: rate limit only support up to 3.10, proceeding without rate limiter\n" 607 | ] 608 | }, 609 | { 610 | "name": "stderr", 611 | "output_type": "stream", 612 | "text": [ 613 | "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 130/130 [00:06<00:00, 21.50it/s]\n", 614 | "\u001b[34m\u001b[1mLanceDB:\u001b[0m Embedding space built successfully.\n" 615 | ] 616 | } 617 | ], 618 | "source": [ 619 | "coco_updated = Explorer(\"coco_updated/coco_updated.yaml\")\n", 620 | "coco_updated.build_embeddings()" 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": 23, 626 | "id": "1f2ec47e", 627 | "metadata": {}, 628 | "outputs": [ 629 | { 630 | "data": { 631 | "text/html": [ 632 | "
\n", 633 | "\n", 646 | "\n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | "
idpathclslabelsbboxesimgvector
07/Users/ayushchaurasia/Documents/ultralytics/da...[0, 0, 0, 20, 20][person, person, person, elephant, elephant][[0.44568800926208496, 0.48061496019363403, 0....[255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...[-0.16883952915668488, -0.19498196244239807, -...
145/Users/ayushchaurasia/Documents/ultralytics/da...[20, 20][elephant, elephant][[0.6323270201683044, 0.6266880035400391, 0.73...[255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...[-0.13958469033241272, -0.14195983111858368, 0...
251/Users/ayushchaurasia/Documents/ultralytics/da...[20, 20, 20, 20, 20, 20][elephant, elephant, elephant, elephant, eleph...[[0.18141399323940277, 0.6764050126075745, 0.3...[255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...[-0.19354629516601562, -0.1750415414571762, -0...
3114/Users/ayushchaurasia/Documents/ultralytics/da...[20, 20, 20, 20, 20, 20, 20][elephant, elephant, elephant, elephant, eleph...[[0.5598670244216919, 0.7241129279136658, 0.06...[255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...[-0.18665747344493866, -0.1859922856092453, -0...
\n", 702 | "
" 703 | ], 704 | "text/plain": [ 705 | " id path \\\n", 706 | "0 7 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 707 | "1 45 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 708 | "2 51 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 709 | "3 114 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 710 | "\n", 711 | " cls \\\n", 712 | "0 [0, 0, 0, 20, 20] \n", 713 | "1 [20, 20] \n", 714 | "2 [20, 20, 20, 20, 20, 20] \n", 715 | "3 [20, 20, 20, 20, 20, 20, 20] \n", 716 | "\n", 717 | " labels \\\n", 718 | "0 [person, person, person, elephant, elephant] \n", 719 | "1 [elephant, elephant] \n", 720 | "2 [elephant, elephant, elephant, elephant, eleph... \n", 721 | "3 [elephant, elephant, elephant, elephant, eleph... \n", 722 | "\n", 723 | " bboxes \\\n", 724 | "0 [[0.44568800926208496, 0.48061496019363403, 0.... \n", 725 | "1 [[0.6323270201683044, 0.6266880035400391, 0.73... \n", 726 | "2 [[0.18141399323940277, 0.6764050126075745, 0.3... \n", 727 | "3 [[0.5598670244216919, 0.7241129279136658, 0.06... \n", 728 | "\n", 729 | " img \\\n", 730 | "0 [255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,... \n", 731 | "1 [255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,... \n", 732 | "2 [255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,... \n", 733 | "3 [255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,... \n", 734 | "\n", 735 | " vector \n", 736 | "0 [-0.16883952915668488, -0.19498196244239807, -... \n", 737 | "1 [-0.13958469033241272, -0.14195983111858368, 0... \n", 738 | "2 [-0.19354629516601562, -0.1750415414571762, -0... \n", 739 | "3 [-0.18665747344493866, -0.1859922856092453, -0... " 740 | ] 741 | }, 742 | "execution_count": 23, 743 | "metadata": {}, 744 | "output_type": "execute_result" 745 | } 746 | ], 747 | "source": [ 748 | "# Remove data containing elephant\n", 749 | "coco_updated.sql(\"SELECT * FROM 'table' WHERE labels LIKE '%elephant, elephant%'\")" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": 24, 755 | "id": "a9d676f9", 756 | "metadata": {}, 757 | "outputs": [ 758 | { 759 | "name": "stderr", 760 | "output_type": "stream", 761 | "text": [ 762 | "\n", 763 | "|-----------------------------------------------|\n", 764 | "\t Number of images: 126\n", 765 | "|------------------------------------------------|\n" 766 | ] 767 | } 768 | ], 769 | "source": [ 770 | "coco_updated.remove_imgs([7, 45, 51, 114])" 771 | ] 772 | }, 773 | { 774 | "cell_type": "code", 775 | "execution_count": 18, 776 | "id": "060fb7bf", 777 | "metadata": {}, 778 | "outputs": [ 779 | { 780 | "name": "stdout", 781 | "output_type": "stream", 782 | "text": [ 783 | "130\n" 784 | ] 785 | }, 786 | { 787 | "data": { 788 | "text/html": [ 789 | "
\n", 790 | "\n", 803 | "\n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | "
idpathclslabelsbboxesimgvector
125125/Users/ayushchaurasia/Documents/ultralytics/da...[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 5][person, person, person, person, person, perso...[[0.912320077419281, 0.5608879923820496, 0.017...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.1441657, -0.18747765, -0.16826846, -0.1800...
126126/Users/ayushchaurasia/Documents/ultralytics/da...[39, 39, 45, 62, 64, 73, 73, 73, 73, 73, 73, 7...[bottle, bottle, bowl, tv, mouse, book, book, ...[[0.18036000430583954, 0.8277199864387512, 0.0...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.10117926, -0.09878854, 0.06405701, -0.0227...
127127/Users/ayushchaurasia/Documents/ultralytics/da...[2, 15][car, cat][[0.5018590092658997, 0.8207259178161621, 0.99...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.13692749, -0.14139369, 0.24011154, 0.07854...
128128/Users/ayushchaurasia/Documents/ultralytics/da...[58, 75][potted plant, vase][[0.5192189812660217, 0.4511209726333618, 0.39...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.1577483, -0.07757879, 0.1852913, 0.0405876...
129129/Users/ayushchaurasia/Documents/ultralytics/da...[22][zebra][[0.3462109863758087, 0.4932590126991272, 0.68...b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...[-0.14650379, -0.2218233, -0.10506437, -0.1519...
\n", 869 | "
" 870 | ], 871 | "text/plain": [ 872 | " id path \\\n", 873 | "125 125 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 874 | "126 126 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 875 | "127 127 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 876 | "128 128 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 877 | "129 129 /Users/ayushchaurasia/Documents/ultralytics/da... \n", 878 | "\n", 879 | " cls \\\n", 880 | "125 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 5] \n", 881 | "126 [39, 39, 45, 62, 64, 73, 73, 73, 73, 73, 73, 7... \n", 882 | "127 [2, 15] \n", 883 | "128 [58, 75] \n", 884 | "129 [22] \n", 885 | "\n", 886 | " labels \\\n", 887 | "125 [person, person, person, person, person, perso... \n", 888 | "126 [bottle, bottle, bowl, tv, mouse, book, book, ... \n", 889 | "127 [car, cat] \n", 890 | "128 [potted plant, vase] \n", 891 | "129 [zebra] \n", 892 | "\n", 893 | " bboxes \\\n", 894 | "125 [[0.912320077419281, 0.5608879923820496, 0.017... \n", 895 | "126 [[0.18036000430583954, 0.8277199864387512, 0.0... \n", 896 | "127 [[0.5018590092658997, 0.8207259178161621, 0.99... \n", 897 | "128 [[0.5192189812660217, 0.4511209726333618, 0.39... \n", 898 | "129 [[0.3462109863758087, 0.4932590126991272, 0.68... \n", 899 | "\n", 900 | " img \\\n", 901 | "125 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 902 | "126 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 903 | "127 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 904 | "128 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 905 | "129 b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00... \n", 906 | "\n", 907 | " vector \n", 908 | "125 [-0.1441657, -0.18747765, -0.16826846, -0.1800... \n", 909 | "126 [-0.10117926, -0.09878854, 0.06405701, -0.0227... \n", 910 | "127 [-0.13692749, -0.14139369, 0.24011154, 0.07854... \n", 911 | "128 [-0.1577483, -0.07757879, 0.1852913, 0.0405876... \n", 912 | "129 [-0.14650379, -0.2218233, -0.10506437, -0.1519... " 913 | ] 914 | }, 915 | "execution_count": 18, 916 | "metadata": {}, 917 | "output_type": "execute_result" 918 | } 919 | ], 920 | "source": [ 921 | "print(len(coco_updated.table)) #should be updated\n", 922 | "coco_updated.table.to_pandas().tail()" 923 | ] 924 | }, 925 | { 926 | "cell_type": "code", 927 | "execution_count": 4, 928 | "id": "6313c9a8", 929 | "metadata": {}, 930 | "outputs": [ 931 | { 932 | "name": "stderr", 933 | "output_type": "stream", 934 | "text": [ 935 | "100%|██████████████████████████████████████████████████████████████████████████| 16551/16551 [11:06<00:00, 24.83it/s]\n" 936 | ] 937 | } 938 | ], 939 | "source": [ 940 | "sim = voc_exp.get_similarity_index()" 941 | ] 942 | }, 943 | { 944 | "cell_type": "code", 945 | "execution_count": 6, 946 | "id": "135a6953", 947 | "metadata": {}, 948 | "outputs": [ 949 | { 950 | "name": "stderr", 951 | "output_type": "stream", 952 | "text": [ 953 | "100%|██████████████████████████████████████████████████████████████████████████| 16551/16551 [11:07<00:00, 24.81it/s]\n" 954 | ] 955 | }, 956 | { 957 | "data": { 958 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAGwCAYAAABIC3rIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA1yElEQVR4nO3de3RU1f3+8WdCyIVLEgJmkiiEKFaIgNwkxAv4LSnhUhGlVmyqESlYG6RcRKEVKKgFaQWrpVBdFnRJFalIW6RoDCAqMWA03E1RI6CQpDUmA3JJSPbvD3+ZMiRgRuZ+3q+1Zq3knD2Tz96zz8yTM+ecsRljjAAAACwszN8FAAAA+BuBCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWF64vwsIBvX19Tp8+LDatm0rm83m73IAAEAzGGN09OhRJScnKyzs/PuACETNcPjwYXXs2NHfZQAAgO/g0KFDuuSSS87bhkDUDG3btpX0zYDGxMT4uRoAANAcDodDHTt2dL6Pnw+BqBkaPiaLiYkhEAEAEGSac7gLB1UDAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADL82sg2rJli2688UYlJyfLZrNp7dq1LuuNMZo9e7aSkpIUHR2tzMxM7d+/36VNZWWlsrOzFRMTo7i4OI0bN07Hjh1zabNz505df/31ioqKUseOHbVw4UJvdw0AAAQRvwair7/+WldddZWWLFnS5PqFCxfqySef1LJly1RYWKjWrVsrKytLJ0+edLbJzs7Wnj17lJeXp3Xr1mnLli2aMGGCc73D4dCQIUOUkpKioqIi/e53v9NvfvMbPf30017vHwAACA42Y4zxdxGSZLPZ9Oqrr2rUqFGSvtk7lJycrGnTpun++++XJFVXV8tut2vFihUaM2aM9u3bp7S0NG3fvl39+vWTJG3YsEHDhw/X559/ruTkZC1dulS//vWvVVZWpoiICEnSjBkztHbtWn300UdN1nLq1CmdOnXK+bvD4VDHjh1VXV2tmJgYL44CAADwFIfDodjY2Ga9fwfsMUSlpaUqKytTZmamc1lsbKzS09NVUFAgSSooKFBcXJwzDElSZmamwsLCVFhY6GwzcOBAZxiSpKysLJWUlOirr75q8m/Pnz9fsbGxzlvHjh290UUAABAgAjYQlZWVSZLsdrvLcrvd7lxXVlamhIQEl/Xh4eGKj493adPUY5z5N842c+ZMVVdXO2+HDh268A4BAICAFe7vAgJRZGSkIiMj/V0GAADwkYDdQ5SYmChJKi8vd1leXl7uXJeYmKiKigqX9adPn1ZlZaVLm6Ye48y/AQAArC1gA1FqaqoSExOVn5/vXOZwOFRYWKiMjAxJUkZGhqqqqlRUVORss3HjRtXX1ys9Pd3ZZsuWLaqtrXW2ycvL0xVXXKF27dr5qDcAACCQ+TUQHTt2TMXFxSouLpb0zYHUxcXFOnjwoGw2myZPnqxHHnlE//jHP7Rr1y7deeedSk5Odp6J1q1bNw0dOlTjx4/Xtm3b9O6772rixIkaM2aMkpOTJUk/+clPFBERoXHjxmnPnj1atWqV/vCHP2jq1Kl+6jUAAAg4xo82bdpkJDW65eTkGGOMqa+vN7NmzTJ2u91ERkaawYMHm5KSEpfH+PLLL83tt99u2rRpY2JiYszYsWPN0aNHXdrs2LHDXHfddSYyMtJcfPHFZsGCBW7VWV1dbSSZ6urqC+ovAADwHXfevwPmOkSBzJ3rGAAAgMAQEtchAgAA8BUCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAOK/OM17zdwleRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWF9CBqK6uTrNmzVJqaqqio6N12WWX6eGHH5YxxtnGGKPZs2crKSlJ0dHRyszM1P79+10ep7KyUtnZ2YqJiVFcXJzGjRunY8eO+bo7AAAgQAV0IHrssce0dOlS/fGPf9S+ffv02GOPaeHChXrqqaecbRYuXKgnn3xSy5YtU2FhoVq3bq2srCydPHnS2SY7O1t79uxRXl6e1q1bpy1btmjChAn+6BIAAAhANnPm7pYA88Mf/lB2u13PPvusc9no0aMVHR2tF154QcYYJScna9q0abr//vslSdXV1bLb7VqxYoXGjBmjffv2KS0tTdu3b1e/fv0kSRs2bNDw4cP1+eefKzk5+VvrcDgcio2NVXV1tWJiYrzTWQAAAlTnGa/pswUj/F2G29x5/w7oPUTXXHON8vPz9e9//1uStGPHDr3zzjsaNmyYJKm0tFRlZWXKzMx03ic2Nlbp6ekqKCiQJBUUFCguLs4ZhiQpMzNTYWFhKiwsbPLvnjp1Sg6Hw+UGAABCV7i/CzifGTNmyOFwqGvXrmrRooXq6ur06KOPKjs7W5JUVlYmSbLb7S73s9vtznVlZWVKSEhwWR8eHq74+Hhnm7PNnz9fc+fO9XR3AABAgAroPUQvv/yyVq5cqb/+9a/64IMP9Nxzz+n3v/+9nnvuOa/+3ZkzZ6q6utp5O3TokFf/HgAA8K+A3kM0ffp0zZgxQ2PGjJEk9ejRQwcOHND8+fOVk5OjxMRESVJ5ebmSkpKc9ysvL1evXr0kSYmJiaqoqHB53NOnT6uystJ5/7NFRkYqMjLSCz0CAACBKKD3EB0/flxhYa4ltmjRQvX19ZKk1NRUJSYmKj8/37ne4XCosLBQGRkZkqSMjAxVVVWpqKjI2Wbjxo2qr69Xenq6D3oBAAACXUDvIbrxxhv16KOPqlOnTrryyiv14YcfatGiRbr77rslSTabTZMnT9Yjjzyiyy+/XKmpqZo1a5aSk5M1atQoSVK3bt00dOhQjR8/XsuWLVNtba0mTpyoMWPGNOsMMwAAEPoCOhA99dRTmjVrln7xi1+ooqJCycnJuueeezR79mxnmwceeEBff/21JkyYoKqqKl133XXasGGDoqKinG1WrlypiRMnavDgwQoLC9Po0aP15JNP+qNLAAAgAAX0dYgCBdchAgBYGdchAgAAsAACEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDy3A9Hzzz+vU6dONVpeU1Oj559/3iNFAQAA+JLbgWjs2LGqrq5utPzo0aMaO3asR4oCAADwJbcDkTFGNput0fLPP/9csbGxHikKAADAl8Kb27B3796y2Wyy2WwaPHiwwsP/d9e6ujqVlpZq6NChXikSAADAm5odiEaNGiVJKi4uVlZWltq0aeNcFxERoc6dO2v06NEeLxAAAMDbmh2I5syZI0nq3LmzbrvtNkVFRXmtKAAAAF9qdiBqkJOTI+mbs8oqKipUX1/vsr5Tp06eqQwAAMBH3A5E+/fv1913362tW7e6LG842Lqurs5jxQEAAPiC24HorrvuUnh4uNatW6ekpKQmzzgDAAAIJm4HouLiYhUVFalr167eqAcAAMDn3L4OUVpamv773/96oxYAAAC/cDsQPfbYY3rggQe0efNmffnll3I4HC43AACAYOP2R2aZmZmSpMGDB7ss56BqAAAQrNwORJs2bfJGHQAAAH7jdiAaNGiQN+oAAADwG7cD0ZYtW867fuDAgd+5GAAAAH9wOxDdcMMNjZadeS0ijiECAADBxu2zzL766iuXW0VFhTZs2KCrr75ab7zxhjdqBAAA8Cq39xDFxsY2WvaDH/xAERERmjp1qoqKijxSGAAAgK+4vYfoXOx2u0pKSjz1cAAAAD7j9h6inTt3uvxujNGRI0e0YMEC9erVy1N1AQAA+IzbgahXr16y2WwyxrgsHzBggP7yl794rDAAAABfcTsQlZaWuvweFhamiy66SFFRUR4rCgAAwJfcDkQpKSneqAMAAMBvvtNB1W+99ZZuvPFGdenSRV26dNHIkSP19ttve7o2AAAAn3A7EL3wwgvKzMxUq1atNGnSJE2aNEnR0dEaPHiw/vrXv3qjRgAAAK+ymbOPjv4W3bp104QJEzRlyhSX5YsWLdIzzzyjffv2ebTAQOBwOBQbG6vq6mrFxMT4uxwAAHyq84zX9NmCEf4uw23uvH+7vYfo008/1Y033tho+ciRIxsdcA0AABAM3A5EHTt2VH5+fqPlb775pjp27OiRogAAAHzJ7bPMpk2bpkmTJqm4uFjXXHONJOndd9/VihUr9Ic//MHjBQIAAHib24Ho3nvvVWJioh5//HG9/PLLkr45rmjVqlW66aabPF4gAACAt7kdiCTp5ptv1s033+zpWgAAAPzC7WOItm/frsLCwkbLCwsL9f7773ukKAAAAF9yOxDl5ubq0KFDjZZ/8cUXys3N9UhRAAAAvuR2INq7d6/69OnTaHnv3r21d+9ejxQFAADgS24HosjISJWXlzdafuTIEYWHf6dDks7riy++0E9/+lO1b99e0dHR6tGjh8tHc8YYzZ49W0lJSYqOjlZmZqb279/v8hiVlZXKzs5WTEyM4uLiNG7cOB07dszjtQIAgODkdiAaMmSIZs6cqerqaueyqqoq/epXv9IPfvADjxb31Vdf6dprr1XLli31r3/9S3v37tXjjz+udu3aOdssXLhQTz75pJYtW6bCwkK1bt1aWVlZOnnypLNNdna29uzZo7y8PK1bt05btmzRhAkTPForAAAIXm5/dccXX3yhgQMH6ssvv1Tv3r0lScXFxbLb7crLy/PoxRlnzJihd99995xfHGuMUXJysqZNm6b7779fklRdXS273a4VK1ZozJgx2rdvn9LS0rR9+3b169dPkrRhwwYNHz5cn3/+uZKTk7+1Dr66AwBgZXx1RxMuvvhi7dy5UwsXLlRaWpr69u2rP/zhD9q1a5fHr1T9j3/8Q/369dOtt96qhIQE9e7dW88884xzfWlpqcrKypSZmelcFhsbq/T0dBUUFEiSCgoKFBcX5wxDkpSZmamwsLAmz5aTpFOnTsnhcLjcAABA6PpOB/20bt3aJx85ffrpp1q6dKmmTp2qX/3qV9q+fbsmTZqkiIgI5eTkqKysTJJkt9td7me3253rysrKlJCQ4LI+PDxc8fHxzjZnmz9/vubOneuFHgEAgEDk9h4iX6qvr1efPn3029/+Vr1799aECRM0fvx4LVu2zKt/t+EYqYZbU5cZAAAAoSOgA1FSUpLS0tJclnXr1k0HDx6UJCUmJkpSo7PeysvLnesSExNVUVHhsv706dOqrKx0tjlbZGSkYmJiXG4AACB0BXQguvbaa1VSUuKy7N///rdSUlIkSampqUpMTFR+fr5zvcPhUGFhoTIyMiRJGRkZqqqqUlFRkbPNxo0bVV9fr/T0dB/0AgAA93Se8Zq/S7Acz184yIOmTJmia665Rr/97W/14x//WNu2bdPTTz+tp59+WpJks9k0efJkPfLII7r88suVmpqqWbNmKTk5WaNGjZL0zR6loUOHOj9qq62t1cSJEzVmzJhmnWEGAABCn9t7iHJycrRlyxZv1NLI1VdfrVdffVUvvviiunfvrocfflhPPPGEsrOznW0eeOAB3XfffZowYYKuvvpqHTt2TBs2bFBUVJSzzcqVK9W1a1cNHjxYw4cP13XXXecMVQAAAG5fh2jUqFFav369UlJSNHbsWOXk5Ojiiy/2Vn0BgesQAQB8KdCu+xNo9TSXV69DtHbtWn3xxRe69957tWrVKnXu3FnDhg3T3/72N9XW1n7nogEAAPzlOx1UfdFFF2nq1KnasWOHCgsL1aVLF91xxx1KTk7WlClTGn2XGAAAQCC7oLPMjhw5ory8POXl5alFixYaPny4du3apbS0NC1evNhTNQIAAHiV24GotrZWr7zyin74wx8qJSVFq1ev1uTJk3X48GE999xzevPNN/Xyyy9r3rx53qgXAADA49w+7T4pKUn19fW6/fbbtW3bNvXq1atRm//7v/9TXFycB8oDAADwPrcD0eLFi3Xrrbe6nNZ+tri4OJWWll5QYQAuXLCeGQIAvub2R2abNm1q8myyr7/+WnfffbdHigIAAPAltwPRc889pxMnTjRafuLECT3//PMeKQoAAMCXmv2RmcPhkDFGxhgdPXrU5SOzuro6rV+/XgkJCV4pEgAAwJuaHYji4uJks9lks9n0ve99r9F6m82muXPnerQ4AAAAX2h2INq0aZOMMfr+97+vV155RfHx8c51ERERSklJ4ctSAQBAUGp2IBo0aJAkqbS0VJ06dZLNZvNaUQAAAL7UrEC0c+dOde/eXWFhYaqurtauXbvO2bZnz54eKw4AAMAXmhWIevXqpbKyMiUkJKhXr16y2WwyxjRqZ7PZVFdX5/Eigaa4e40drskDADiXZgWi0tJSXXTRRc6fAQAAQkmzAlFKSoqkb77HbO7cuZo1a5ZSU1O9WhgAAICvuHVhxpYtW+qVV17xVi0AAAB+4faVqkeNGqW1a9d6oRQAAAD/cPvLXS+//HLNmzdP7777rvr27avWrVu7rJ80aZLHigMAAPAFtwPRs88+q7i4OBUVFamoqMhlnc1mIxABAICg43Yg4iwzAICvcdkMeJvbxxABAACEGrf3EEnS559/rn/84x86ePCgampqXNYtWrTII4UBAAD4ituBKD8/XyNHjtSll16qjz76SN27d9dnn30mY4z69OnjjRoBAAC8yu2PzGbOnKn7779fu3btUlRUlF555RUdOnRIgwYN0q233uqNGgEAALzK7UC0b98+3XnnnZKk8PBwnThxQm3atNG8efP02GOPebxAAAAAb3M7ELVu3dp53FBSUpI++eQT57r//ve/nqsMAADAR9w+hmjAgAF655131K1bNw0fPlzTpk3Trl27tGbNGg0YMMAbNQIAAHiV24Fo0aJFOnbsmCRp7ty5OnbsmFatWqXLL7+cM8wAAEBQcjsQXXrppc6fW7durWXLlnm0IAAAAF/jwowAAMDymrWHqF27drLZbM16wMrKygsqCAAAwNeaFYieeOIJL5cBAADgP80KRDk5Od6uAwAAwG+aFYgcDodiYmKcP59PQzsAAIBg0exjiI4cOaKEhATFxcU1eTyRMUY2m011dXUeLxIAGnSe8Zo+WzDC32UACDHNCkQbN25UfHy8JGnTpk1eLQgAAMDXmhWIBg0a1OTPAAAAocDtCzNK0smTJ7Vz505VVFSovr7eZd3IkSM9UhgQiPi4BgC+EWqvh24Hog0bNujOO+9s8otcOYYIAAAEI7evVH3ffffp1ltv1ZEjR1RfX+9yIwwBAIBg5HYgKi8v19SpU2W3271RDwAAgM+5HYh+9KMfafPmzV4oBQAAwD/cPoboj3/8o2699Va9/fbb6tGjh1q2bOmyftKkSR4rDgAAwBfcDkQvvvii3njjDUVFRWnz5s0uF2m02WwEIgAAEHTcDkS//vWvNXfuXM2YMUNhYW5/4gYAABBw3E40NTU1uu222whDAAAgZLidanJycrRq1Spv1AKEhM4zXvN3CT5nxT4DCC1uf2RWV1enhQsX6vXXX1fPnj0bHVS9aNEijxUHAADOL9SuGO0vbgeiXbt2qXfv3pKk3bt3u6w78wBrAACAYOF2IOLb7tEc/McCAAgmHBkNAAAsr1l7iG655RatWLFCMTExuuWWW87bds2aNR4pDAAAwFeaFYhiY2OdxwfFxsZ6tSAAAABfa1YgWr58eZM/AwCA8wumYyqDqVZPc/sYohMnTuj48ePO3w8cOKAnnnhCb7zxhkcLa8qCBQtks9k0efJk57KTJ08qNzdX7du3V5s2bTR69GiVl5e73O/gwYMaMWKEWrVqpYSEBE2fPl2nT5/2er0AAN/heli4EG4HoptuuknPP/+8JKmqqkr9+/fX448/rptuuklLly71eIENtm/frj//+c/q2bOny/IpU6bon//8p1avXq233npLhw8fdjnOqa6uTiNGjFBNTY22bt2q5557TitWrNDs2bO9VisAAAgubgeiDz74QNdff70k6W9/+5sSExN14MABPf/883ryySc9XqAkHTt2TNnZ2XrmmWfUrl075/Lq6mo9++yzWrRokb7//e+rb9++Wr58ubZu3ar33ntPkvTGG29o7969euGFF9SrVy8NGzZMDz/8sJYsWaKamhqv1AsgeLBXAaGOOd48bgei48ePq23btpK+CRu33HKLwsLCNGDAAB04cMDjBUpSbm6uRowYoczMTJflRUVFqq2tdVnetWtXderUSQUFBZKkgoIC9ejRQ3a73dkmKytLDodDe/bsafLvnTp1Sg6Hw+UGAJ7EmxQQWNwORF26dNHatWt16NAhvf766xoyZIgkqaKiQjExMR4v8KWXXtIHH3yg+fPnN1pXVlamiIgIxcXFuSy32+0qKytztjkzDDWsb1jXlPnz5ys2NtZ569ixowd6AgAAApXbgWj27Nm6//771blzZ6WnpysjI0PSN3uLGr7Sw1MOHTqkX/7yl1q5cqWioqI8+tjnM3PmTFVXVztvhw4d8tnfBgAAvuf2V3f86Ec/0nXXXacjR47oqquuci4fPHiwbr75Zo8WV1RUpIqKCvXp08e5rK6uTlu2bNEf//hHvf7666qpqVFVVZXLXqLy8nIlJiZKkhITE7Vt2zaXx204C62hzdkiIyMVGRnp0b4AAIDA9Z2+uiMxMVG9e/dWWNj/7t6/f3917drVY4VJ34SsXbt2qbi42Hnr16+fsrOznT+3bNlS+fn5zvuUlJTo4MGDzj1XGRkZ2rVrlyoqKpxt8vLyFBMTo7S0NI/WCwAAgpPbe4h8qW3bturevbvLstatW6t9+/bO5ePGjdPUqVMVHx+vmJgY3XfffcrIyNCAAQMkSUOGDFFaWpruuOMOLVy4UGVlZXrooYeUm5vLXiAAQcvKF9ALNDwXoSGgA1FzLF68WGFhYRo9erROnTqlrKws/elPf3Kub9GihdatW6d7771XGRkZat26tXJycjRv3jw/Vg0AAAJJ0AWizZs3u/weFRWlJUuWaMmSJee8T0pKitavX+/lygAAQLD6TscQAQAAhBICEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEUJO5xmv+bsEACGO15nQQyACAACWRyDCBeG/JAAILrxuN41ABAAALI9ABAAALI9ABABBho88EAoCbR4TiAAAfhFob4iwNgIRgIDAmyMAfyIQAQAAyyMQAQAAyyMQAQAAyyMQAQAAyyMQAQAAyyMQAUGMM7MAwDMIRAAAwPIIRAAAwPIIRAAAwPIIRPA5jnuxDp5rAMGCQAQAACyPQAQA34I9XUDoIxABQAggtAEXhkCEoMKLPgDAGwhEAADA8ghEgBewJwuAv/E65B4CEQAAsDwCEQD4AP+tA4GNQAQAACyPQAQAABqx2l5NAhEAwOes9maLwEcgAgAAlkcgAgAAlkcgAgAAlkcgAgAAlkcgAgDABziQPLARiAAAgOURiAAENP6rBuALBCIAfhWqgSdU+wWEKgIRACCoEDbhDQQiAABgeQEdiObPn6+rr75abdu2VUJCgkaNGqWSkhKXNidPnlRubq7at2+vNm3aaPTo0SovL3dpc/DgQY0YMUKtWrVSQkKCpk+frtOnT/uyKwAAIIAFdCB66623lJubq/fee095eXmqra3VkCFD9PXXXzvbTJkyRf/85z+1evVqvfXWWzp8+LBuueUW5/q6ujqNGDFCNTU12rp1q5577jmtWLFCs2fP9keXAABohI8B/S+gA9GGDRt011136corr9RVV12lFStW6ODBgyoqKpIkVVdX69lnn9WiRYv0/e9/X3379tXy5cu1detWvffee5KkN954Q3v37tULL7ygXr16adiwYXr44Ye1ZMkS1dTU+LN7AJrAGwM8hbkEdwR0IDpbdXW1JCk+Pl6SVFRUpNraWmVmZjrbdO3aVZ06dVJBQYEkqaCgQD169JDdbne2ycrKksPh0J49e5r8O6dOnZLD4XC5AbhwvEEBCFRBE4jq6+s1efJkXXvtterevbskqaysTBEREYqLi3Npa7fbVVZW5mxzZhhqWN+wrinz589XbGys89axY0cP9wYIHIQUXKjOM17z2zxi/sJTgiYQ5ebmavfu3XrppZe8/rdmzpyp6upq5+3QoUNe/5u+1JwXEE+1AQB4Fq+93hEUgWjixIlat26dNm3apEsuucS5PDExUTU1NaqqqnJpX15ersTERGebs886a/i9oc3ZIiMjFRMT43KD//EiAAQeT26XbOPwp4AORMYYTZw4Ua+++qo2btyo1NRUl/V9+/ZVy5YtlZ+f71xWUlKigwcPKiMjQ5KUkZGhXbt2qaKiwtkmLy9PMTExSktL801HfIwXFXgacwrBhPmK7yLc3wWcT25urv7617/q73//u9q2bes85ic2NlbR0dGKjY3VuHHjNHXqVMXHxysmJkb33XefMjIyNGDAAEnSkCFDlJaWpjvuuEMLFy5UWVmZHnroIeXm5ioyMtKf3QMAAAEioPcQLV26VNXV1brhhhuUlJTkvK1atcrZZvHixfrhD3+o0aNHa+DAgUpMTNSaNWuc61u0aKF169apRYsWysjI0E9/+lPdeeedmjdvnj+6hO+A//b8g3EHYCUBHYiMMU3e7rrrLmebqKgoLVmyRJWVlfr666+1Zs2aRscGpaSkaP369Tp+/Lj+85//6Pe//73CwwN659g5hcqblDf6ESpjg+AWCPMwEGoIFYyldQR0III1+fsFyN9/P9AxPgBCEYEIAAD4RCD/Q0UgAgAAlkcgAgAAlkcgClKBvNsxmDGuQHBi28WFIhABAYQXdWvx1fMdDPMqGGpEaCMQAQg4vDkC8DUCEXyCN7jQxXMLIBQQiAB8Z74MQwQvAN5EIAKAANMQ/giBgO8QiEIML6De54kxPvsxeN7gD8w7SMyDBgQiIMjw4uVdjK/vMeYIBAQiAD4Ram96odYfb2O8EOgIRICX8AbwP982FoEyVt+ljvPdx1P9CpTxAUIZgSiI8KIIf2DehR6eU88IpnE8V63u9iGY+uwuAhHcFsobBNCAeY7virkTnAhEOC82bACAFRCIAD8hbIYuntv/6TzjNcYDQTEHCEQB7NteSIJhggVDjc0RKv0A3MXch1UQiAA/88UbTlN/gzc6wH/Y/gIPgQhe4+lTmP0lEGsCcGECYbtuTg2BUOeFCpY+EIiAIBQsLzCBiLHD2bzxdTxonkAaNwIR8P8F0obpK566NgmCF8/1/zAW1kYgAgAEFIIJ/IFABAQY3gwAV2duE2wf8BYCUQDx5IbOi0bgudDnJNgvwYDAYMW5YpU+W6Wf3kIgAr6Dc/3HygsSENi8+Y9JKLJSfwlEIcBKE9Zd/h4bf//9QBZKYxNKfUHTeI5DH4EIAM6BN0HAOghEFsMLPIJpDoTCcXXBNN4I/Ocr0OsLZgSiEBcMG08w1OhrjElw+q7PG8834H8EIgQF3jAaY0wA62L79zwCEbyODfe7C9SxC9S64H++nBvMQ3gSgQhBjxdF+Foozzm+1yt0+eN5Caa5QCCyAC7o9+0Yh9AVbM9tsNUbaAJl/PiewOBDIELA4oXDPYyXNXnzebf6m7qv+2mVcQ1UBCI4hdLGGEp9AUKFFbfLUOhzKPShOQhEIcIbE9YXG4FVNjTgTE3N+0DeFgKltkCpA+cXrM8TgcjCgnXSAmg+tnOgeQhEQDPxxoJAxxz9H8bCt0JhvAlEQShYJ16w1n0+odinYGb1g4DPZtV+n0uwfVQZyC5k3AJ1zAlEASZQJ0qw43goV8FUKwD4AoEIbgmU01B5Q28+K46VFfvsa6E8xsHSt2CpM1gQiNAsbHjBh+cM+B+2B3wbAlGAYuMFfMtX2xzbtnVY5bkOlWOzCERB4rtMrrPv460JGggTPxBq8IXm9DOUn+dz4QtFL0wo9ilYXGiYCNZr0AUiApFFuROWQm3j4AsOvcvfffX3mwnOL1jGPNhfE4OhxkBDIEIjbEjBj+ewaYwLJP/PA398/xy+HYEoADGhPStYx5Nd4e7xZN9CeZwQnJiT3kcggqX46riqUMH4+I+njxfzx/FnzB//4zloPgIRPCqYNr5gP0bAG6za7+bw19jwnCBU5kCg94NAFAACfZLAc3iuPYexBOBJBCIEBN7czo3r4yAQBMpV6v0pEGvyFH9e0iNQEIgAeEyov2CGev++jdX772tWGu9A6CuBCPCxQNjwfcEXx2gF21gGW72AlVgqEC1ZskSdO3dWVFSU0tPTtW3bNn+XhADGm1fg4Ln4Hz5CDWzBMm5c1qMxywSiVatWaerUqZozZ44++OADXXXVVcrKylJFRYW/SwsZwb4xwDuYF57DWALeY5lAtGjRIo0fP15jx45VWlqali1bplatWukvf/mLv0sD0Ex8LQcAbwn3dwG+UFNTo6KiIs2cOdO5LCwsTJmZmSooKGjU/tSpUzp16pTz9+rqakmSw+HwSn31p47L4XCo/tTxJtc3d50vH+PsdqH0GOdqx2OEzvPXacpq7Z6bFbRj35zH6DRltVt/K1Cev7PXdZ/zutuPca52PEZgzwFvvMc2PKYx5tsbGwv44osvjCSzdetWl+XTp083/fv3b9R+zpw5RhI3bty4cePGLQRuhw4d+tasYIk9RO6aOXOmpk6d6vy9vr5elZWVat++vWw2m0f/lsPhUMeOHXXo0CHFxMR49LGDEePhivFwxXi4YjwaY0xcWX08jDE6evSokpOTv7WtJQJRhw4d1KJFC5WXl7ssLy8vV2JiYqP2kZGRioyMdFkWFxfnzRIVExNjycl6LoyHK8bDFePhivFojDFxZeXxiI2NbVY7SxxUHRERob59+yo/P9+5rL6+Xvn5+crIyPBjZQAAIBBYYg+RJE2dOlU5OTnq16+f+vfvryeeeEJff/21xo4d6+/SAACAn1kmEN122236z3/+o9mzZ6usrEy9evXShg0bZLfb/VpXZGSk5syZ0+gjOqtiPFwxHq4YD1eMR2OMiSvGo/lsxjTnXDQAAIDQZYljiAAAAM6HQAQAACyPQAQAACyPQAQAACyPQORHS5YsUefOnRUVFaX09HRt27bN3yV5xPz583X11Verbdu2SkhI0KhRo1RSUuLS5oYbbpDNZnO5/fznP3dpc/DgQY0YMUKtWrVSQkKCpk+frtOnT7u02bx5s/r06aPIyEh16dJFK1as8Hb33Pab3/ymUV+7du3qXH/y5Enl5uaqffv2atOmjUaPHt3oIqKhMhaS1Llz50bjYbPZlJubKyn058aWLVt04403Kjk5WTabTWvXrnVZb4zR7NmzlZSUpOjoaGVmZmr//v0ubSorK5Wdna2YmBjFxcVp3LhxOnbsmEubnTt36vrrr1dUVJQ6duyohQsXNqpl9erV6tq1q6KiotSjRw+tX7/e4/39Nucbj9raWj344IPq0aOHWrdureTkZN155506fPiwy2M0NacWLFjg0iYUxkOS7rrrrkZ9HTp0qEubUJofPuWRLwuD21566SUTERFh/vKXv5g9e/aY8ePHm7i4OFNeXu7v0i5YVlaWWb58udm9e7cpLi42w4cPN506dTLHjh1zthk0aJAZP368OXLkiPNWXV3tXH/69GnTvXt3k5mZaT788EOzfv1606FDBzNz5kxnm08//dS0atXKTJ061ezdu9c89dRTpkWLFmbDhg0+7e+3mTNnjrnyyitd+vqf//zHuf7nP/+56dixo8nPzzfvv/++GTBggLnmmmuc60NpLIwxpqKiwmUs8vLyjCSzadMmY0zoz43169ebX//612bNmjVGknn11Vdd1i9YsMDExsaatWvXmh07dpiRI0ea1NRUc+LECWeboUOHmquuusq899575u233zZdunQxt99+u3N9dXW1sdvtJjs72+zevdu8+OKLJjo62vz5z392tnn33XdNixYtzMKFC83evXvNQw89ZFq2bGl27drl9TE40/nGo6qqymRmZppVq1aZjz76yBQUFJj+/fubvn37ujxGSkqKmTdvnsucOfP1JlTGwxhjcnJyzNChQ136WllZ6dImlOaHLxGI/KR///4mNzfX+XtdXZ1JTk428+fP92NV3lFRUWEkmbfeesu5bNCgQeaXv/zlOe+zfv16ExYWZsrKypzLli5damJiYsypU6eMMcY88MAD5sorr3S532233WaysrI824ELNGfOHHPVVVc1ua6qqsq0bNnSrF692rls3759RpIpKCgwxoTWWDTll7/8pbnssstMfX29McZac+PsN7z6+nqTmJhofve73zmXVVVVmcjISPPiiy8aY4zZu3evkWS2b9/ubPOvf/3L2Gw288UXXxhjjPnTn/5k2rVr5xwPY4x58MEHzRVXXOH8/cc//rEZMWKESz3p6enmnnvu8Wgf3dFUADjbtm3bjCRz4MAB57KUlBSzePHic94nlMYjJyfH3HTTTee8TyjPD2/jIzM/qKmpUVFRkTIzM53LwsLClJmZqYKCAj9W5h3V1dWSpPj4eJflK1euVIcOHdS9e3fNnDlTx48fd64rKChQjx49XC6cmZWVJYfDoT179jjbnDmGDW0CcQz379+v5ORkXXrppcrOztbBgwclSUVFRaqtrXXpR9euXdWpUydnP0JtLM5UU1OjF154QXfffbfLFydbaW6cqbS0VGVlZS61x8bGKj093WU+xMXFqV+/fs42mZmZCgsLU2FhobPNwIEDFRER4WyTlZWlkpISffXVV842wThG1dXVstlsjb5fcsGCBWrfvr169+6t3/3udy4foYbaeGzevFkJCQm64oordO+99+rLL790rrP6/LgQlrlSdSD573//q7q6ukZXybbb7froo4/8VJV31NfXa/Lkybr22mvVvXt35/Kf/OQnSklJUXJysnbu3KkHH3xQJSUlWrNmjSSprKysyfFpWHe+Ng6HQydOnFB0dLQ3u9Zs6enpWrFiha644godOXJEc+fO1fXXX6/du3errKxMERERjV7c7Xb7t/azYd352gTaWJxt7dq1qqqq0l133eVcZqW5cbaG+puq/cy+JSQkuKwPDw9XfHy8S5vU1NRGj9Gwrl27ducco4bHCEQnT57Ugw8+qNtvv93li0onTZqkPn36KD4+Xlu3btXMmTN15MgRLVq0SFJojcfQoUN1yy23KDU1VZ988ol+9atfadiwYSooKFCLFi0sPT8uFIEIXpWbm6vdu3frnXfecVk+YcIE5889evRQUlKSBg8erE8++USXXXaZr8v0qmHDhjl/7tmzp9LT05WSkqKXX345YN+YfeXZZ5/VsGHDlJyc7FxmpbmB5qutrdWPf/xjGWO0dOlSl3VTp051/tyzZ09FRETonnvu0fz580PuKyvGjBnj/LlHjx7q2bOnLrvsMm3evFmDBw/2Y2XBj4/M/KBDhw5q0aJFozOJysvLlZiY6KeqPG/ixIlat26dNm3apEsuueS8bdPT0yVJH3/8sSQpMTGxyfFpWHe+NjExMQEdNOLi4vS9731PH3/8sRITE1VTU6OqqiqXNmfOhVAdiwMHDujNN9/Uz372s/O2s9LcaKj/fK8NiYmJqqiocFl/+vRpVVZWemTOBOJrUEMYOnDggPLy8lz2DjUlPT1dp0+f1meffSYp9MbjTJdeeqk6dOjgsn1YbX54CoHIDyIiItS3b1/l5+c7l9XX1ys/P18ZGRl+rMwzjDGaOHGiXn31VW3cuLHRrtmmFBcXS5KSkpIkSRkZGdq1a5fLht3wQpiWluZsc+YYNrQJ9DE8duyYPvnkEyUlJalv375q2bKlSz9KSkp08OBBZz9CdSyWL1+uhIQEjRgx4rztrDQ3UlNTlZiY6FK7w+FQYWGhy3yoqqpSUVGRs83GjRtVX1/vDI8ZGRnasmWLamtrnW3y8vJ0xRVXqF27ds42wTBGDWFo//79evPNN9W+fftvvU9xcbHCwsKcHx2F0nic7fPPP9eXX37psn1YaX54lL+P6raql156yURGRpoVK1aYvXv3mgkTJpi4uDiXM2eC1b333mtiY2PN5s2bXU4NPX78uDHGmI8//tjMmzfPvP/++6a0tNT8/e9/N5deeqkZOHCg8zEaTq0eMmSIKS4uNhs2bDAXXXRRk6dWT58+3ezbt88sWbIkYE6tPtO0adPM5s2bTWlpqXn33XdNZmam6dChg6moqDDGfHPafadOnczGjRvN+++/bzIyMkxGRobz/qE0Fg3q6upMp06dzIMPPuiy3Apz4+jRo+bDDz80H374oZFkFi1aZD788EPnWVMLFiwwcXFx5u9//7vZuXOnuemmm5o87b53796msLDQvPPOO+byyy93Oa26qqrK2O12c8cdd5jdu3ebl156ybRq1arRadXh4eHm97//vdm3b5+ZM2eOX06rPt941NTUmJEjR5pLLrnEFBcXu7yeNJwhtXXrVrN48WJTXFxsPvnkE/PCCy+Yiy66yNx5550hNx5Hjx41999/vykoKDClpaXmzTffNH369DGXX365OXnypPMxQml++BKByI+eeuop06lTJxMREWH69+9v3nvvPX+X5BGSmrwtX77cGGPMwYMHzcCBA018fLyJjIw0Xbp0MdOnT3e51owxxnz22Wdm2LBhJjo62nTo0MFMmzbN1NbWurTZtGmT6dWrl4mIiDCXXnqp828Ekttuu80kJSWZiIgIc/HFF5vbbrvNfPzxx871J06cML/4xS9Mu3btTKtWrczNN99sjhw54vIYoTIWDV5//XUjyZSUlLgst8Lc2LRpU5PbR05OjjHmm1PvZ82aZex2u4mMjDSDBw9uNE5ffvmluf32202bNm1MTEyMGTt2rDl69KhLmx07dpjrrrvOREZGmosvvtgsWLCgUS0vv/yy+d73vmciIiLMlVdeaV577TWv9ftczjcepaWl53w9abhuVVFRkUlPTzexsbEmKirKdOvWzfz2t791CQjGhMZ4HD9+3AwZMsRcdNFFpmXLliYlJcWMHz++0T/SoTQ/fMlmjDE+2BEFAAAQsDiGCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCEBIu+GGGzR58uRzru/cubOeeOIJn9UDIDCF+7sAAPCmNWvWqGXLlv4uA0CAIxABCGnx8fH+LgFAEOAjMwAh7cyPzCoqKnTjjTcqOjpaqampWrlypUvbzZs3KyIiQm+//bZz2cKFC5WQkKDy8nJflg3Ax9hDBMAy7rrrLh0+fFibNm1Sy5YtNWnSJFVUVDjXN4SnO+64Qzt27NCnn36qWbNmafXq1bLb7X6sHIC3EYgAWMK///1v/etf/9K2bdt09dVXS5KeffZZdevWzaXdI488ory8PE2YMEG7d+9WTk6ORo4c6Y+SAfgQgQiAJezbt0/h4eHq27evc1nXrl0VFxfn0i4iIkIrV65Uz549lZKSosWLF/u4UgD+wDFEAHCWrVu3SpIqKytVWVnp52oA+AKBCIAldO3aVadPn1ZRUZFzWUlJiaqqqlzaffLJJ5oyZYqeeeYZpaenKycnR/X19T6uFoCvEYgAWMIVV1yhoUOH6p577lFhYaGKior0s5/9TNHR0c42dXV1+ulPf6qsrCyNHTtWy5cv186dO/X444/7sXIAvkAgAmAZy5cvV3JysgYNGqRbbrlFEyZMUEJCgnP9o48+qgMHDujPf/6zJCkpKUlPP/20HnroIe3YscNfZQPwAZsxxvi7CAAAAH9iDxEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALC8/wct0sTYYZ2urgAAAABJRU5ErkJggg==", 959 | "text/plain": [ 960 | "
" 961 | ] 962 | }, 963 | "metadata": {}, 964 | "output_type": "display_data" 965 | } 966 | ], 967 | "source": [ 968 | "voc_exp.plot_similirity_index()" 969 | ] 970 | }, 971 | { 972 | "cell_type": "code", 973 | "execution_count": null, 974 | "id": "731cad56", 975 | "metadata": {}, 976 | "outputs": [], 977 | "source": [ 978 | "sim.where(sim>500)" 979 | ] 980 | }, 981 | { 982 | "cell_type": "code", 983 | "execution_count": null, 984 | "id": "91122255", 985 | "metadata": {}, 986 | "outputs": [], 987 | "source": [] 988 | } 989 | ], 990 | "metadata": { 991 | "kernelspec": { 992 | "display_name": "Python 3 (ipykernel)", 993 | "language": "python", 994 | "name": "python3" 995 | }, 996 | "language_info": { 997 | "codemirror_mode": { 998 | "name": "ipython", 999 | "version": 3 1000 | }, 1001 | "file_extension": ".py", 1002 | "mimetype": "text/x-python", 1003 | "name": "python", 1004 | "nbconvert_exporter": "python", 1005 | "pygments_lexer": "ipython3", 1006 | "version": "3.11.4" 1007 | } 1008 | }, 1009 | "nbformat": 4, 1010 | "nbformat_minor": 5 1011 | } 1012 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | # Enable the pycodestyle (`E`) and Pyflakes (`F`) rules by default. 3 | # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or 4 | # McCabe complexity (`C901`) by default. 5 | select = ["E9", "F63", "F7", "F82"] 6 | ignore = [] 7 | 8 | # Allow autofix for all enabled rules (when `--fix`) is provided. 9 | fixable = ["ALL"] 10 | unfixable = [] 11 | 12 | # Exclude a variety of commonly ignored directories. 13 | exclude = [ 14 | ".bzr", 15 | ".direnv", 16 | ".eggs", 17 | ".git", 18 | ".git-rewrite", 19 | ".hg", 20 | ".mypy_cache", 21 | ".nox", 22 | ".pants.d", 23 | ".pytype", 24 | ".ruff_cache", 25 | ".svn", 26 | ".tox", 27 | ".venv", 28 | "__pypackages__", 29 | "_build", 30 | "buck-out", 31 | "build", 32 | "dist", 33 | "node_modules", 34 | "venv", 35 | ] 36 | per-file-ignores = {} 37 | line-length = 120 38 | 39 | # Allow unused variables when underscore-prefixed. 40 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" 41 | 42 | # Assume Python 3.8 43 | target-version = "py38" 44 | 45 | [tool.black] 46 | line-length = 120 47 | include = '\.pyi?$' 48 | exclude = ''' 49 | /( 50 | \.git 51 | | \.hg 52 | | \.mypy_cache 53 | | \.tox 54 | | \.venv 55 | | _build 56 | | buck-out 57 | | build 58 | | dist 59 | )/ 60 | ''' 61 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lancedb 2 | duckdb 3 | scikit-learn 4 | streamlit 5 | plotly 6 | ultralytics 7 | pandas==2.0.3 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from setuptools import find_packages, setup 3 | 4 | # import pkg_resources as pkg 5 | 6 | # Settings 7 | FILE = Path(__file__).resolve() 8 | PARENT = FILE.parent # root directory 9 | README = (PARENT / "README.md").read_text(encoding="utf-8") 10 | REQUIREMENTS = (PARENT / "requirements.txt").read_text(encoding="utf-8").splitlines() 11 | 12 | 13 | def get_version(): 14 | return "0.0.1" 15 | 16 | 17 | setup( 18 | name="yoloexplorer", 19 | version=get_version(), 20 | python_requires=">=3.8", 21 | description="", 22 | # long_description=README, 23 | install_requires=REQUIREMENTS, 24 | long_description_content_type="text/markdown", 25 | author="dev@lance", 26 | author_email="contact@lancedb.com", 27 | packages=find_packages(), # required 28 | package_data={"yoloexplorer": ["frontend/streamlit_dash/frontend/**"]}, 29 | include_package_data=True, 30 | ) 31 | -------------------------------------------------------------------------------- /tests/test_explorer.py: -------------------------------------------------------------------------------- 1 | from yoloexplorer import Explorer 2 | 3 | 4 | class TestExplorer: 5 | def test_embeddings_creation(self): 6 | coco_exp = Explorer("coco8.yaml") 7 | coco_exp.build_embeddings(force=True) 8 | assert coco_exp.table_name == "coco8.yaml", "the table name should be coco8.yaml" 9 | assert len(coco_exp.table) == 4, "the length of the embeddings table should be 8" 10 | 11 | def test_sim_idx(self): 12 | coco_exp = Explorer("coco8.yaml") 13 | coco_exp.build_embeddings() 14 | 15 | idx = coco_exp.get_similarity_index(0, 1) # get all imgs 16 | assert len(idx) == 4, "the length of the similar index should be 8" 17 | 18 | def test_operations(self): 19 | coco_exp = Explorer("coco8.yaml") 20 | coco_exp.build_embeddings("yolov8n.pt") 21 | 22 | sim = coco_exp.get_similarity_index() 23 | assert sim.shape[0] == 4, "the length of the embeddings table should be 1" 24 | 25 | _, ids = coco_exp.get_similar_imgs(3, 10) 26 | coco_exp.remove_imgs(ids[0]) 27 | coco_exp.reset() 28 | coco_exp.log_status() 29 | coco_exp.remove_imgs([0, 1]) 30 | coco_exp.remove_imgs([0]) 31 | assert len(coco_exp.table.to_arrow()) == 1, "the length of the embeddings table should be 1" 32 | coco_exp.persist() 33 | assert len(coco_exp.table.to_arrow()) == 1, "the length of the embeddings table should be 1" 34 | 35 | def test_add_imgs(self): 36 | coco_exp = Explorer("coco8.yaml") 37 | coco_exp.build_embeddings() 38 | coco128_exp = Explorer("coco128.yaml") 39 | coco128_exp.build_embeddings() 40 | 41 | coco_exp.add_imgs(coco128_exp, [i for i in range(4)]) 42 | assert len(coco_exp.table) == 8, "the length of the embeddings table should be 8" 43 | 44 | def test_sql(self): 45 | coco_exp = Explorer("coco8.yaml") 46 | coco_exp.build_embeddings() 47 | result = coco_exp.sql("SELECT id FROM 'table' LIMIT 2") 48 | 49 | assert result["id"].to_list() == [ 50 | 0, 51 | 1, 52 | ], f'the result of the sql query should be [0,1] found {result["id"].to_list}' 53 | 54 | def test_id_reassignment(self): 55 | coco_exp = Explorer("coco128.yaml") 56 | coco_exp.build_embeddings(force=True) 57 | 58 | coco8_exp = Explorer("coco8.yaml") 59 | coco8_exp.build_embeddings(force=True) 60 | # test removal 61 | for i in range(4): 62 | coco_exp.remove_imgs([i]) 63 | df = coco_exp.table.to_pandas() 64 | assert df["id"].to_list() == [idx for idx in range(len(df))], "the ids should be reassigned" 65 | 66 | # test addition 67 | coco_exp.add_imgs(coco8_exp, [i for i in range(4)]) 68 | df = coco_exp.table.to_pandas() 69 | assert df["id"].to_list() == [idx for idx in range(len(df))], "the ids should be reassigned" 70 | 71 | # test reset 72 | coco_exp.reset() 73 | df = coco_exp.table.to_pandas() 74 | assert df["id"].to_list() == [idx for idx in range(128)], "the ids should be reassigned" 75 | 76 | def test_sim_search(self): 77 | coco_exp = Explorer("coco8.yaml") 78 | coco_exp.build_embeddings() 79 | coco_exp.get_similar_imgs(0, 10) 80 | 81 | paths = coco_exp.table.to_pandas()["path"].to_list() 82 | coco_exp.get_similar_imgs(paths, 10) 83 | 84 | """ 85 | # Not supported yet 86 | def test_copy_embeddings_from_table(self): 87 | project = 'runs/test/temp/' 88 | ds = Explorer('coco8.yaml', project=project) 89 | ds.build_embeddings() 90 | 91 | table = project + ds.table_name + '.lance' 92 | ds2 = Explorer(table=table) 93 | assert ds2.table_name == 'coco8.yaml', 'the table name should be coco8.yaml' 94 | """ 95 | -------------------------------------------------------------------------------- /yoloexplorer/__init__.py: -------------------------------------------------------------------------------- 1 | from .explorer import Explorer 2 | from .frontend import datasets 3 | from . import config 4 | 5 | __all__ = ["Explorer", "datasets", "config"] 6 | -------------------------------------------------------------------------------- /yoloexplorer/assets/docs/dash_intro.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/assets/docs/dash_intro.gif -------------------------------------------------------------------------------- /yoloexplorer/assets/docs/intro.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/assets/docs/intro.gif -------------------------------------------------------------------------------- /yoloexplorer/assets/docs/plotting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/assets/docs/plotting.png -------------------------------------------------------------------------------- /yoloexplorer/assets/docs/sim_index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/assets/docs/sim_index.png -------------------------------------------------------------------------------- /yoloexplorer/assets/docs/sim_plotting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/assets/docs/sim_plotting.png -------------------------------------------------------------------------------- /yoloexplorer/config.py: -------------------------------------------------------------------------------- 1 | TEMP_CONFIG_PATH = ".config/last_config.json" 2 | -------------------------------------------------------------------------------- /yoloexplorer/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | 5 | from ultralytics.data.dataset import YOLODataset 6 | from ultralytics.data.augment import Format 7 | from ultralytics.data.utils import check_det_dataset 8 | 9 | 10 | def get_dataset_info(data="coco128.yaml", task="detect"): 11 | # TODO: handle other tasks 12 | data = check_det_dataset(data) 13 | 14 | return data 15 | 16 | 17 | def get_relative_path(path1, path2): 18 | """Gets the relative path of `path1` to `path2`. 19 | 20 | Args: 21 | path1: The absolute path of the first file. 22 | path2: The absolute path of the second file. 23 | 24 | Returns: 25 | The relative path of `path1` to `path2`. 26 | """ 27 | 28 | relative_path = os.path.relpath(path1, os.path.dirname(path2)) 29 | 30 | return relative_path 31 | 32 | 33 | class Dataset(YOLODataset): 34 | def __init__(self, *args, data=None, **kwargs): 35 | super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs) 36 | 37 | # NOTE: Load the image directly without any resize operations. 38 | def load_image(self, i): 39 | """Loads 1 image from dataset index 'i', returns (im, resized hw).""" 40 | im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i] 41 | if im is None: # not cached in RAM 42 | if fn.exists(): # load npy 43 | im = np.load(fn) 44 | else: # read image 45 | im = cv2.imread(f) # BGR 46 | if im is None: 47 | raise FileNotFoundError(f"Image Not Found {f}") 48 | h0, w0 = im.shape[:2] # orig hw 49 | return im, (h0, w0), im.shape[:2] 50 | 51 | return self.ims[i], self.im_hw0[i], self.im_hw[i] 52 | 53 | def build_transforms(self, hyp=None): 54 | transforms = Format( 55 | bbox_format="xyxy", 56 | normalize=False, 57 | return_mask=self.use_segments, 58 | return_keypoint=self.use_keypoints, 59 | batch_idx=True, 60 | mask_ratio=hyp.mask_ratio, 61 | mask_overlap=hyp.overlap_mask, 62 | ) 63 | return transforms 64 | -------------------------------------------------------------------------------- /yoloexplorer/explorer.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from collections import defaultdict 3 | import math 4 | import json 5 | 6 | import pandas as pd 7 | import cv2 8 | import duckdb 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | import yaml 12 | from tqdm import tqdm 13 | from ultralytics.utils import LOGGER, colorstr 14 | from ultralytics.utils.plotting import Annotator, colors 15 | from torch import Tensor 16 | import lancedb 17 | import pyarrow as pa 18 | from lancedb.embeddings import with_embeddings 19 | from sklearn.decomposition import PCA 20 | 21 | from yoloexplorer.dataset import get_dataset_info, Dataset 22 | from yoloexplorer.frontend import launch 23 | from yoloexplorer.config import TEMP_CONFIG_PATH 24 | 25 | import torch 26 | import torchvision.models as models 27 | from torchvision import datasets, transforms 28 | from PIL import Image 29 | import sys 30 | 31 | SCHEMA = [ 32 | "id", 33 | # "img", # Make this optional; disabled by default. Not feasible unless we can have row_id/primary key to index 34 | "path", 35 | "cls", 36 | "labels", 37 | "bboxes", 38 | "segments", 39 | "keypoints", 40 | "meta", 41 | ] # + "vector" with embeddings 42 | 43 | 44 | def encode(img_path): 45 | img = cv2.imread(img_path) 46 | ext = Path(img_path).suffix 47 | img_encoded = cv2.imencode(ext, img)[1].tobytes() 48 | 49 | return img_encoded 50 | 51 | 52 | def decode(img_encoded): 53 | nparr = np.frombuffer(img_encoded, np.byte) 54 | img = cv2.imdecode(nparr, cv2.IMREAD_ANYCOLOR) 55 | 56 | return img 57 | 58 | 59 | class Explorer: 60 | """ 61 | Dataset explorer 62 | """ 63 | 64 | def __init__(self, data, device="", model="resnet18", batch_size=64, project="run") -> None: 65 | """ 66 | Args: 67 | data (str, optional): path to dataset file 68 | table (str, optional): path to LanceDB table to load embeddings Table from. 69 | model (str, optional): path to model. Defaults to None. 70 | device (str, optional): device to use. Defaults to ''. If empty, uses the default device. 71 | project (str, optional): path to project. Defaults to "runs/dataset". 72 | """ 73 | self.data = data 74 | self.table = None 75 | self.model = model 76 | self.device = device 77 | self.batch_size = batch_size 78 | self.project = project 79 | self.dataset_info = None 80 | self.predictor = None 81 | self.trainset = None 82 | self.removed_img_count = 0 83 | self.verbose = False # For embedding function 84 | self._sim_index = None 85 | self.version = None 86 | 87 | self.table_name = Path(data).name 88 | self.temp_table_name = self.table_name + "_temp" 89 | 90 | self.model_arch_supported = [ 91 | "resnet18", 92 | "resnet50", 93 | "efficientnet_b0", 94 | "efficientnet_v2_s", 95 | "googlenet", 96 | "mobilenet_v3_small", 97 | ] 98 | 99 | if model: 100 | self.predictor = self._setup_predictor(model, device) 101 | if data: 102 | self.dataset_info = get_dataset_info(self.data) 103 | 104 | self.transform = transforms.Compose( 105 | [ 106 | transforms.Resize((224, 224)), 107 | transforms.ToTensor(), 108 | ] 109 | ) 110 | 111 | def build_embeddings(self, verbose=False, force=False, store_imgs=False): 112 | """ 113 | Builds the dataset in LanceDB table format 114 | 115 | Args: 116 | batch (int, optional): batch size. Defaults to 1000. 117 | verbose (bool, optional): verbose. Defaults to False. 118 | force (bool, optional): force rebuild. Defaults to False. 119 | """ 120 | trainset = self.dataset_info["train"] 121 | trainset = trainset if isinstance(trainset, list) else [trainset] 122 | self.trainset = trainset 123 | self.verbose = verbose 124 | 125 | dataset = Dataset(img_path=trainset, data=self.dataset_info, augment=False, cache=False) 126 | batch_size = self.batch_size # TODO: fix this hardcoding 127 | db = self._connect() 128 | if not force and self.table_name in db.table_names(): 129 | LOGGER.info("LanceDB embedding space already exists. Attempting to reuse it. Use force=True to overwrite.") 130 | self.table = self._open_table(self.table_name) 131 | self.version = self.table.version 132 | if len(self.table) == dataset.ni: 133 | return 134 | else: 135 | self.table = None 136 | LOGGER.info("Table length does not match the number of images in the dataset. Building embeddings...") 137 | 138 | table_data = defaultdict(list) 139 | for idx, batch in enumerate(dataset): 140 | batch["id"] = idx 141 | batch["cls"] = batch["cls"].flatten().int().tolist() 142 | box_cls_pair = sorted(zip(batch["bboxes"].tolist(), batch["cls"]), key=lambda x: x[1]) 143 | batch["bboxes"] = [box for box, _ in box_cls_pair] 144 | batch["cls"] = [cls for _, cls in box_cls_pair] 145 | batch["labels"] = [self.dataset_info["names"][i] for i in batch["cls"]] 146 | batch["path"] = batch["im_file"] 147 | # batch["cls"] = batch["cls"].tolist() 148 | keys = (key for key in SCHEMA if key in batch) 149 | for key in keys: 150 | val = batch[key] 151 | if isinstance(val, Tensor): 152 | val = val.tolist() 153 | table_data[key].append(val) 154 | 155 | table_data["img"].append(encode(batch["im_file"])) if store_imgs else None 156 | 157 | if len(table_data[key]) == batch_size or idx == dataset.ni - 1: 158 | df = pd.DataFrame(table_data) 159 | df = with_embeddings(self._embedding_func, df, "path", batch_size=batch_size) 160 | if self.table: 161 | self.table.add(df) 162 | else: 163 | self.table = self._create_table(self.table_name, data=df, mode="overwrite") 164 | self.version = self.table.version 165 | table_data = defaultdict(list) 166 | 167 | LOGGER.info(f'{colorstr("LanceDB:")} Embedding space built successfully.') 168 | 169 | def plot_embeddings(self): 170 | """ 171 | Projects the embedding space to 2D using PCA 172 | 173 | Args: 174 | n_components (int, optional): number of components. Defaults to 2. 175 | """ 176 | if self.table is None: 177 | LOGGER.error("No embedding space found. Please build the embedding space first.") 178 | return None 179 | pca = PCA(n_components=2) 180 | embeddings = np.array(self.table.to_arrow()["vector"].to_pylist()) 181 | embeddings = pca.fit_transform(embeddings) 182 | plt.scatter(embeddings[:, 0], embeddings[:, 1]) 183 | plt.show() 184 | 185 | def get_similar_imgs(self, img, n=10): 186 | """ 187 | Returns the n most similar images to the given image 188 | 189 | Args: 190 | img (int, str, Path): index of image in the table, or path to image 191 | n (int, optional): number of similar images to return. Defaults to 10. 192 | 193 | Returns: 194 | tuple: (list of paths, list of ids) 195 | """ 196 | embeddings = None 197 | if self.table is None: 198 | LOGGER.error("No embedding space found. Please build the embedding space first.") 199 | return None 200 | if isinstance(img, int): 201 | embeddings = self.table.to_pandas()["vector"][img] 202 | elif isinstance(img, (str, Path)): 203 | img = img 204 | elif isinstance(img, bytes): 205 | img = decode(img) 206 | elif isinstance(img, list): # exceptional case for batch search from dash 207 | df = self.table.to_pandas().set_index("path") 208 | array = None 209 | try: 210 | array = df.loc[img]["vector"].to_list() 211 | embeddings = np.array(array) 212 | except KeyError: 213 | pass 214 | else: 215 | LOGGER.error("img should be index from the table(int), path of an image (str or Path), or bytes") 216 | return 217 | 218 | if embeddings is None: 219 | if isinstance(img, list): 220 | embeddings = np.array( 221 | [self.predictor(self._image_encode(i)).squeeze().cpu().detach().numpy() for i in img] 222 | ) 223 | else: 224 | embeddings = self.predictor(self._image_encode(img)).squeeze().cpu().detach().numpy() 225 | 226 | if len(embeddings.shape) > 1: 227 | embeddings = np.mean(embeddings, axis=0) 228 | 229 | sim = self.table.search(embeddings).limit(n).to_df() 230 | return sim["path"].to_list(), sim["id"].to_list() 231 | 232 | def plot_similar_imgs(self, img, n=10): 233 | """ 234 | Plots the n most similar images to the given image 235 | 236 | Args: 237 | img (int, str, Path): index of image in the table, or path to image. 238 | n (int, optional): number of similar images to return. Defaults to 10. 239 | """ 240 | _, ids = self.get_similar_imgs(img, n) 241 | self.plot_imgs(ids) 242 | 243 | def plot_imgs(self, ids=None, query=None, labels=True): 244 | if ids is None and query is None: 245 | ValueError("ids or query must be provided") 246 | 247 | # Resize the images to the minimum and maximum width and height 248 | resized_images = [] 249 | df = self.sql(query) if query else self.table.to_pandas().iloc[ids] 250 | for _, row in df.iterrows(): 251 | img = cv2.imread(row["path"]) 252 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 253 | if labels: 254 | ann = Annotator(img) 255 | for box, label, cls in zip(row["bboxes"], row["labels"], row["cls"]): 256 | ann.box_label(box, label, color=colors(cls, True)) 257 | 258 | img = ann.result() 259 | resized_images.append(img) 260 | 261 | if not resized_images: 262 | LOGGER.error("No images found") 263 | return 264 | # Create a grid of the images 265 | 266 | cols = 10 if len(resized_images) > 10 else max(2, len(resized_images)) 267 | rows = max(1, math.ceil(len(resized_images) / cols)) 268 | fig, axes = plt.subplots(nrows=rows, ncols=cols) 269 | fig.subplots_adjust(hspace=0, wspace=0) 270 | for i, ax in enumerate(axes.ravel()): 271 | if i < len(resized_images): 272 | ax.imshow(resized_images[i]) 273 | ax.axis("off") 274 | # Display the grid of images 275 | plt.show() 276 | 277 | def get_similarity_index(self, top_k=0.01, sim_thres=0.90, reduce=False, sorted=False): 278 | """ 279 | 280 | Args: 281 | sim_thres (float, optional): Similarity threshold to set the minimum similarity. Defaults to 0.9. 282 | top_k (float, optional): Top k fraction of the similar embeddings to apply the threshold on. Default 0.1. 283 | dim (int, optional): Dimension of the reduced embedding space. Defaults to 256. 284 | sorted (bool, optional): Sort the embeddings by similarity. Defaults to False. 285 | Returns: 286 | np.array: Similarity index 287 | """ 288 | if self.table is None: 289 | LOGGER.error("No embedding space found. Please build the embedding space first.") 290 | return None 291 | if top_k > 1.0: 292 | LOGGER.warning("top_k should be between 0 and 1. Setting top_k to 1.0") 293 | top_k = 1.0 294 | if top_k < 0.0: 295 | LOGGER.warning("top_k should be between 0 and 1. Setting top_k to 0.0") 296 | top_k = 0.0 297 | if sim_thres is not None: 298 | if sim_thres > 1.0: 299 | LOGGER.warning("sim_thres should be between 0 and 1. Setting sim_thres to 1.0") 300 | sim_thres = 1.0 301 | if sim_thres < 0.0: 302 | LOGGER.warning("sim_thres should be between 0 and 1. Setting sim_thres to 0.0") 303 | sim_thres = 0.0 304 | embs = np.array(self.table.to_arrow()["vector"].to_pylist()) 305 | self._sim_index = np.zeros(len(embs)) 306 | limit = max(int(len(embs) * top_k), 1) 307 | 308 | # create a new table with reduced dimensionality to speedup the search 309 | self._search_table = self.table 310 | if reduce: 311 | dim = min(256, embs.shape[1]) # TODO: make this configurable 312 | pca = PCA(n_components=min(dim, len(embs))) 313 | embs = pca.fit_transform(embs) 314 | dim = embs.shape[1] 315 | values = pa.array(embs.reshape(-1), type=pa.float32()) 316 | table_data = pa.FixedSizeListArray.from_arrays(values, dim) 317 | table = pa.table([table_data, self.table.to_arrow()["id"]], names=["vector", "id"]) 318 | self._search_table = self._create_table("reduced_embs", data=table, mode="overwrite") 319 | 320 | # with multiprocessing.Pool() as pool: # multiprocessing doesn't do much. Need to revisit 321 | # list(tqdm(pool.imap(build_index, iterable))) 322 | 323 | for _, emb in enumerate(tqdm(embs)): 324 | df = self._search_table.search(emb).metric("cosine").limit(limit).to_df() 325 | if sim_thres is not None: 326 | df = df.query(f"_distance >= {1.0 - sim_thres}") 327 | for idx in df["id"][1:]: 328 | self._sim_index[idx] += 1 329 | self._drop_table("reduced_embs") if reduce else None 330 | 331 | return self._sim_index if not sorted else np.sort(self._sim_index) 332 | 333 | def plot_similarity_index(self, sim_thres=0.90, top_k=0.01, reduce=False, sorted=False): 334 | """ 335 | Plots the similarity index 336 | 337 | Args: 338 | threshold (float, optional): Similarity threshold to set the minimum similarity. Defaults to 0.9. 339 | top_k (float, optional): Top k fraction of the similar embeddings to apply the threshold on. Default 0.1. 340 | dim (int, optional): Dimension of the reduced embedding space. Defaults to 256. 341 | sorted (bool, optional): Whether to sort the index or not. Defaults to False. 342 | """ 343 | index = self.get_similarity_index(top_k, sim_thres, reduce) 344 | if sorted: 345 | index = np.sort(index) 346 | plt.bar([i for i in range(len(index))], index) 347 | plt.xlabel("idx") 348 | plt.ylabel("similarity count") 349 | plt.show() 350 | 351 | def remove_imgs(self, idxs): 352 | """ 353 | Works on temporary table. To apply the changes to the main table, call `persist()` 354 | 355 | Args: 356 | idxs (int or list): Index of the image to remove from the dataset. 357 | """ 358 | if isinstance(idxs, int): 359 | idxs = [idxs] 360 | 361 | pa_table = self.table.to_arrow() 362 | mask = [True for _ in range(len(pa_table))] 363 | for idx in idxs: 364 | mask[idx] = False 365 | 366 | self.removed_img_count += len(idxs) 367 | 368 | table = pa_table.filter(mask) 369 | ids = [i for i in range(len(table))] 370 | table = table.set_column(0, "id", [ids]) # TODO: Revisit this. This is a hack to fix the ids==dix 371 | self.table = self._create_table(self.temp_table_name, data=table, mode="overwrite") # work on a temporary table 372 | 373 | self.log_status() 374 | 375 | def add_imgs(self, exp, idxs): 376 | """ 377 | Works on temporary table. To apply the changes to the main table, call `persist()` 378 | 379 | Args: 380 | data (pd.DataFrame or pa.Table): Table rows to add to the dataset. 381 | """ 382 | table_df = self.table.to_pandas() 383 | data = exp.table.to_pandas().iloc[idxs] 384 | assert len(table_df["vector"].iloc[0]) == len(data["vector"].iloc[0]), "Vector dimension mismatch" 385 | table_df = pd.concat([table_df, data], ignore_index=True) 386 | ids = [i for i in range(len(table_df))] 387 | table_df["id"] = ids 388 | self.table = self._create_table( 389 | self.temp_table_name, data=table_df, mode="overwrite" 390 | ) # work on a temporary table 391 | self.log_status() 392 | 393 | def reset(self): 394 | """ 395 | Resets the dataset table to its original state or to the last persisted state. 396 | """ 397 | if self.table is None: 398 | LOGGER.info("No changes made to the dataset.") 399 | return 400 | 401 | db = self._connect() 402 | if self.temp_table_name in db.table_names(): 403 | self._drop_table(self.temp_table_name) 404 | 405 | self.table = self._open_table(self.table_name) 406 | self.removed_img_count = 0 407 | # self._sim_index = None # Not sure if we should reset this as computing the index is expensive 408 | LOGGER.info("Dataset reset to original state.") 409 | 410 | def persist(self, name=None): 411 | """ 412 | Persists the changes made to the dataset. Available only if data is provided in the constructor. 413 | 414 | Args: 415 | name (str, optional): Name of the new dataset. Defaults to `data_updated.yaml`. 416 | """ 417 | db = self._connect() 418 | if self.table is None or self.temp_table_name not in db.table_names(): 419 | LOGGER.info("No changes made to the dataset.") 420 | return 421 | 422 | LOGGER.info("Persisting changes to the dataset...") 423 | self.log_status() 424 | 425 | if not name: 426 | name = self.data.split(".")[0] + "_updated" 427 | datafile_name = name + ".yaml" 428 | train_txt = "train_updated.txt" 429 | 430 | path = Path(name).resolve() # add new train.txt file in the dataset parent path 431 | path.mkdir(parents=True, exist_ok=True) 432 | if (path / train_txt).exists(): 433 | (path / train_txt).unlink() # remove existing 434 | 435 | for img in tqdm(self.table.to_pandas()["path"].to_list()): 436 | with open(path / train_txt, "a") as f: 437 | f.write(f"{img}" + "\n") # add image to txt file 438 | 439 | new_dataset_info = self.dataset_info.copy() 440 | new_dataset_info.pop("yaml_file") 441 | new_dataset_info.pop("path") # relative paths will get messed up when merging datasets 442 | new_dataset_info.pop("download") # Assume all files are present offline, there is no way to store metadata yet 443 | new_dataset_info["train"] = (path / train_txt).resolve().as_posix() 444 | for key, value in new_dataset_info.items(): 445 | if isinstance(value, Path): 446 | new_dataset_info[key] = value.as_posix() 447 | 448 | yaml.dump(new_dataset_info, open(path / datafile_name, "w")) # update dataset.yaml file 449 | 450 | # TODO: not sure if this should be called data_final to prevent overwriting the original data? 451 | self.table = self._create_table(datafile_name, data=self.table.to_arrow(), mode="overwrite") 452 | db.drop_table(self.temp_table_name) 453 | 454 | LOGGER.info("Changes persisted to the dataset.") 455 | log = self._log_training_cmd(Path(path / datafile_name).relative_to(Path.cwd()).as_posix()) 456 | 457 | return log 458 | 459 | def log_status(self): 460 | # TODO: Pretty print log status 461 | LOGGER.info("\n|-----------------------------------------------|") 462 | LOGGER.info(f"\t Number of images: {len(self.table.to_arrow())}") 463 | LOGGER.info("|------------------------------------------------|") 464 | 465 | def sql(self, query: str): 466 | """ 467 | Executes a SQL query on the dataset table. 468 | 469 | Args: 470 | query (str): SQL query to execute. 471 | """ 472 | if self.table is None: 473 | LOGGER.info("No table found. Please provide a dataset to work on.") 474 | return 475 | 476 | table = self.table.to_arrow() # noqa 477 | result = duckdb.sql(query).to_df() 478 | 479 | return result 480 | 481 | def dash(self, exps=None, analysis=False): 482 | """ 483 | Launches a dashboard to visualize the dataset. 484 | """ 485 | config = {} 486 | Path(TEMP_CONFIG_PATH).parent.mkdir(exist_ok=True, parents=True) 487 | with open(TEMP_CONFIG_PATH, "w+") as file: 488 | config_exp = [self.config] 489 | if exps: 490 | for exp in exps: 491 | config_exp.append(exp.config) 492 | config["exps"] = config_exp 493 | config["analysis"] = analysis 494 | 495 | json.dump(config, file) 496 | 497 | launch() 498 | 499 | @property 500 | def config(self): 501 | return {"project": self.project, "model": self.model, "device": self.device, "data": self.data} 502 | 503 | def _log_training_cmd(self, data_path): 504 | success_log = ( 505 | f'{colorstr("LanceDB: ") }New dataset created successfully! Run the following command to train a model:' 506 | ) 507 | train_cmd = f"yolo train model={self.model} data={data_path} epochs=10" 508 | success_log = success_log + "\n" + train_cmd 509 | LOGGER.info(success_log) 510 | 511 | return train_cmd 512 | 513 | def _connect(self): 514 | db = lancedb.connect(self.project) 515 | 516 | return db 517 | 518 | def _create_table(self, name, data=None, mode="overwrite"): 519 | db = lancedb.connect(self.project) 520 | table = db.create_table(name, data=data, mode=mode) 521 | 522 | return table 523 | 524 | def _open_table(self, name): 525 | db = lancedb.connect(self.project) 526 | table = db.open_table(name) if name in db.table_names() else None 527 | if table is None: 528 | raise ValueError(f'{colorstr("LanceDB: ") }Table not found.') 529 | return table 530 | 531 | def _drop_table(self, name): 532 | db = lancedb.connect(self.project) 533 | if name in db.table_names(): 534 | db.drop_table(name) 535 | return True 536 | 537 | return False 538 | 539 | def _copy_table_to_project(self, table_path): 540 | if not table_path.endswith(".lance"): 541 | raise ValueError(f"{colorstr('LanceDB: ')} Table must be a .lance file") 542 | 543 | LOGGER.info(f"Copying table from {table_path}") 544 | path = Path(table_path).parent 545 | name = Path(table_path).stem # lancedb doesn't need .lance extension 546 | db = lancedb.connect(path) 547 | table = db.open_table(name) 548 | return self._create_table(self.table_name, data=table.to_arrow(), mode="overwrite") 549 | 550 | def _image_encode(self, img): 551 | image = Image.open(img) 552 | n_channels = np.array(image).ndim 553 | if n_channels == 2: 554 | image = image.convert(mode="RGB") 555 | 556 | img_tensor = self.transform(image) 557 | trans_img = img_tensor.unsqueeze(0) 558 | return trans_img 559 | 560 | def _embedding_func(self, imgs): 561 | embeddings = [] 562 | for img in tqdm(imgs): 563 | encod_img = self._image_encode(img) 564 | embeddings.append(self.predictor(encod_img).squeeze().cpu().detach().numpy()) 565 | 566 | return embeddings 567 | 568 | def _setup_predictor(self, model_arch, device=""): 569 | if model_arch in self.model_arch_supported: 570 | load_model = getattr(models, model_arch) 571 | model = load_model(pretrained=True) 572 | predictor = torch.nn.Sequential(*list(model.children())[:-1]) 573 | return predictor 574 | 575 | else: 576 | LOGGER.error(f"Supported for {model_arch} is not added yet") 577 | sys.exit(1) 578 | 579 | def create_index(self): 580 | # TODO: create index 581 | pass 582 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import launch 2 | 3 | __all__ = ["launch"] 4 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/datasets.py: -------------------------------------------------------------------------------- 1 | import json 2 | import subprocess 3 | 4 | import streamlit as st 5 | from yoloexplorer import config 6 | from yoloexplorer.frontend.states import init_states, update_state, widget_key 7 | from yoloexplorer.frontend.streamlit_dash import image_select 8 | 9 | 10 | @st.cache_data 11 | def _get_config(): 12 | with open(config.TEMP_CONFIG_PATH) as json_file: 13 | data = json.load(json_file) 14 | return data["exps"] 15 | 16 | 17 | @st.cache_data 18 | def _get_dataset(idx=0): 19 | from yoloexplorer import Explorer # function scope import 20 | 21 | config = _get_config()[idx] 22 | exp = Explorer(**config) 23 | exp.build_embeddings() 24 | 25 | return exp 26 | 27 | 28 | def _get_primary_dataset(): 29 | data = st.session_state["PRIMARY_DATASET"] 30 | exp = st.session_state[f"EXPLORER_{data}"] 31 | 32 | return exp 33 | 34 | 35 | def reset_to_init_state(): 36 | if st.session_state.get(f"STAGED_IMGS") is None: # if app is not initialized TODO: better check 37 | cfgs = _get_config() 38 | init_states(cfgs) 39 | for idx, cfg in enumerate(cfgs): 40 | data = cfg["data"].split(".")[0] 41 | exp = _get_dataset(idx) 42 | update_state(f"EXPLORER_{data}", exp) 43 | update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list()) 44 | 45 | 46 | def query_form(data): 47 | with st.form(widget_key("query", data)): 48 | col1, col2 = st.columns([0.8, 0.2]) 49 | with col1: 50 | query = st.text_input("Query", "", label_visibility="collapsed") 51 | with col2: 52 | submit = st.form_submit_button("Query") 53 | if submit: 54 | run_sql_query(data, query) 55 | 56 | 57 | def similarity_form(selected_imgs, selected_staged_imgs, data): 58 | st.write("Similarity Search") 59 | with st.form(widget_key("similarity", data)): 60 | subcol1, subcol2 = st.columns([1, 1]) 61 | with subcol1: 62 | st.write("Limit") 63 | limit = st.number_input("limit", min_value=None, max_value=None, value=25, label_visibility="collapsed") 64 | 65 | with subcol2: 66 | disabled = len(selected_imgs) and len(selected_staged_imgs) 67 | st.write("Selected: ", len(selected_imgs)) 68 | submit = st.form_submit_button("Search", disabled=disabled) 69 | if disabled: 70 | st.error("Cannot search from staging and dataset") 71 | if submit: 72 | find_similar_imgs(data, selected_imgs or selected_staged_imgs, limit) 73 | 74 | 75 | def staging_area_form(data, selected_imgs): 76 | st.write("Staging Area") 77 | with st.form(widget_key("staging_area", data)): 78 | col1, col2 = st.columns([1, 1]) 79 | staged_imgs = set(st.session_state[f"STAGED_IMGS"]) - set(selected_imgs) 80 | with col1: 81 | st.form_submit_button( 82 | ":wastebasket:", 83 | disabled=len(selected_imgs) == 0, 84 | on_click=update_state, 85 | args=("STAGED_IMGS", staged_imgs), 86 | ) 87 | with col2: 88 | st.form_submit_button("Clear", on_click=update_state, args=("STAGED_IMGS", set())) 89 | 90 | 91 | def selected_options_form(data, selected_imgs, selected_staged_imgs, total_staged_imgs): 92 | with st.form(widget_key("selected_options", data)): 93 | col1, col2 = st.columns([1, 1]) 94 | with col1: 95 | st.form_submit_button( 96 | "Add to Stage", 97 | # key=widget_key("staging", data), 98 | on_click=add_to_staging, 99 | args=("STAGED_IMGS", total_staged_imgs), 100 | disabled=not selected_imgs, 101 | ) 102 | 103 | with col2: 104 | if data == st.session_state["PRIMARY_DATASET"]: 105 | st.form_submit_button( 106 | ":wastebasket:", 107 | disabled=not selected_imgs or (len(selected_imgs) and len(selected_staged_imgs)), 108 | on_click=remove_imgs, 109 | args=(data, selected_imgs), 110 | ) 111 | 112 | else: 113 | st.form_submit_button( 114 | f"Add to {st.session_state['PRIMARY_DATASET']}", 115 | on_click=add_imgs, 116 | args=(data, selected_imgs), 117 | disabled=not selected_imgs, 118 | ) 119 | 120 | 121 | def persist_reset_form(): 122 | with st.form(widget_key("persist_reset", "PRIMARY_DATASET")): 123 | col1, col2 = st.columns([1, 1]) 124 | with col1: 125 | st.form_submit_button("Reset", on_click=reset) 126 | 127 | with col2: 128 | st.form_submit_button("Persist", on_click=update_state, args=("PERSISTING", True)) 129 | 130 | 131 | def find_similar_imgs(data, imgs, limit=25, rerun=False): 132 | exp = st.session_state[f"EXPLORER_{data}"] 133 | _, idx = exp.get_similar_imgs(imgs, limit) 134 | paths = exp.table.to_pandas()["path"][idx].to_list() 135 | update_state(f"IMGS_{data}", paths) 136 | st.experimental_rerun() 137 | 138 | 139 | def run_sql_query(data, query): 140 | if query.rstrip().lstrip(): 141 | exp = st.session_state[f"EXPLORER_{data}"] 142 | df = exp.sql(query) 143 | update_state(f"IMGS_{data}", df["path"].to_list()) 144 | st.experimental_rerun() 145 | 146 | 147 | def add_to_staging(key, imgs): 148 | update_state(key, imgs) 149 | # st.experimental_rerun() 150 | 151 | 152 | def remove_imgs(data, imgs): 153 | exp = st.session_state[f"EXPLORER_{data}"] 154 | idxs = exp.table.to_pandas().set_index("path").loc[imgs]["id"].to_list() 155 | exp.remove_imgs(idxs) 156 | update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list()) 157 | # st.experimental_rerun() 158 | 159 | 160 | def add_imgs(from_data, imgs): 161 | data = st.session_state["PRIMARY_DATASET"] 162 | exp = st.session_state[f"EXPLORER_{data}"] 163 | from_exp = st.session_state[f"EXPLORER_{from_data}"] 164 | idxs = from_exp.table.to_pandas().set_index("path").loc[imgs]["id"].to_list() 165 | exp.add_imgs(from_exp, idxs) 166 | update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list()) 167 | update_state(f"SUCCESS_MSG", f"Added {len(imgs)} to {data}") 168 | 169 | 170 | def reset(): 171 | data = st.session_state["PRIMARY_DATASET"] 172 | exp = st.session_state[f"EXPLORER_{data}"] 173 | exp.reset() 174 | update_state("STAGED_IMGS", None) 175 | 176 | 177 | def persist_changes(): 178 | exp = _get_primary_dataset() 179 | log = None 180 | with st.spinner("Creating new dataset..."): 181 | log = exp.persist() 182 | st.success("Dataset created successfully!") 183 | st.code(log, language="shell") 184 | update_state("PERSISTING", False) 185 | st.button("Refresh", on_click=update_state, args=("STAGED_IMGS", None)) 186 | 187 | 188 | def rerender_button(data): 189 | col1, col2, col3 = st.columns([0.26, 0.3, 0.1]) 190 | with col1: 191 | pass 192 | with col2: 193 | st.button( 194 | "Render Imgs :arrows_counterclockwise:", 195 | key=widget_key("render_imgs", data), 196 | help=""" 197 | Imgs might not be rendered automatically in some cases to save memory when stage area is used. 198 | Click this button to force render imgs. 199 | """, 200 | ) 201 | with col3: 202 | pass 203 | 204 | 205 | def layout(): 206 | st.set_page_config(layout="wide", initial_sidebar_state="collapsed") 207 | 208 | if st.session_state.get("PERSISTING"): 209 | persist_changes() 210 | return 211 | 212 | # staging area 213 | selected_staged_imgs = [] 214 | if st.session_state.get(f"STAGED_IMGS"): 215 | staged_imgs = st.session_state[f"STAGED_IMGS"] 216 | total_staged_imgs = len(staged_imgs) 217 | col1, col2 = st.columns([0.8, 0.2], gap="small") 218 | with col1: 219 | selected_staged_imgs = image_select( 220 | f"Staged samples: {total_staged_imgs}", images=list(staged_imgs), use_container_width=False 221 | ) 222 | with col2: 223 | staging_area_form(data="staging_area", selected_imgs=selected_staged_imgs) 224 | # Dataset tabs 225 | cfgs = _get_config() 226 | tabs = st.tabs([cfg["data"].split(".")[0] for cfg in cfgs]) 227 | for idx, tab in enumerate(tabs): 228 | with tab: 229 | data = cfgs[idx]["data"].split(".")[0] 230 | 231 | col1, col2 = st.columns([0.75, 0.25], gap="small") 232 | reset_to_init_state() 233 | 234 | imgs = st.session_state[f"IMGS_{data}"] 235 | total_imgs = len(imgs) 236 | with col1: 237 | subcol1, subcol2, subcol3, subcol4 = st.columns([0.2, 0.2, 0.3, 0.3]) 238 | with subcol1: 239 | num = st.number_input( 240 | "Max Images Displayed", 241 | min_value=0, 242 | max_value=total_imgs, 243 | value=min(250, total_imgs), 244 | key=widget_key("num_imgs_displayed", data), 245 | ) 246 | with subcol2: 247 | start_idx = st.number_input( 248 | "Start Index", min_value=0, max_value=total_imgs, value=0, key=widget_key("start_idx", data) 249 | ) 250 | with subcol3: 251 | select_all = st.checkbox("Select All", value=False, key=widget_key("select_all", data)) 252 | with subcol4: 253 | labels = st.checkbox( 254 | "Labels", 255 | value=False, 256 | key=widget_key("labels", data), 257 | ) 258 | 259 | query_form(data) 260 | selected_imgs = [] 261 | if total_imgs: 262 | imgs_displayed = imgs[start_idx : start_idx + num] 263 | label_args = {"bboxes": None, "labels": None, "classes": None} 264 | if labels: 265 | table = st.session_state[f"EXPLORER_{data}"].table 266 | df = table.to_pandas().set_index("path").loc[imgs_displayed] 267 | label_args["bboxes"] = df["bboxes"].to_list() 268 | label_args["labels"] = df["labels"].to_list() 269 | label_args["classes"] = df["cls"].to_list() 270 | 271 | selected_imgs = image_select( 272 | f"Total samples: {total_imgs}", 273 | images=imgs_displayed, 274 | use_container_width=False, 275 | indices=[i for i in range(num)] if select_all else None, 276 | **label_args, 277 | ) 278 | if st.session_state.get(f"STAGED_IMGS"): 279 | rerender_button(data) 280 | 281 | with col2: 282 | similarity_form(selected_imgs, selected_staged_imgs, data) 283 | total_staged_imgs = set(st.session_state["STAGED_IMGS"]) 284 | total_staged_imgs.update(selected_imgs) 285 | 286 | selected_options_form(data, selected_imgs, selected_staged_imgs, total_staged_imgs) 287 | if data == st.session_state["PRIMARY_DATASET"]: 288 | persist_reset_form() 289 | 290 | 291 | def launch(): 292 | cmd = ["streamlit", "run", __file__, "--server.maxMessageSize", "1024"] 293 | try: 294 | subprocess.run(cmd, check=True) 295 | except Exception as e: 296 | print(e) 297 | 298 | 299 | if __name__ == "__main__": 300 | layout() 301 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/layout.py: -------------------------------------------------------------------------------- 1 | import time 2 | import json 3 | import subprocess 4 | 5 | import streamlit as st 6 | from streamlit_dash import image_select 7 | from yoloexplorer import config 8 | from yoloexplorer.frontend.states import init_states, update_state, widget_key 9 | 10 | 11 | @st.cache_data 12 | def _get_config(): 13 | with open(config.TEMP_CONFIG_PATH) as json_file: 14 | data = json.load(json_file) 15 | return data 16 | 17 | 18 | @st.cache_data 19 | def _get_dataset(idx=0): 20 | from yoloexplorer import Explorer # function scope import 21 | 22 | config = _get_config()[idx] 23 | exp = Explorer(**config) 24 | exp.build_embeddings() 25 | 26 | return exp 27 | 28 | 29 | def _get_primary_dataset(): 30 | data = st.session_state["PRIMARY_DATASET"] 31 | exp = st.session_state[f"EXPLORER_{data}"] 32 | 33 | return exp 34 | 35 | 36 | def reset_to_init_state(): 37 | if st.session_state.get(f"STAGED_IMGS") is None: # if app is not initialized TODO: better check 38 | print("Initializing app...") 39 | cfgs = _get_config() 40 | init_states(cfgs) 41 | for idx, cfg in enumerate(cfgs): 42 | data = cfg["data"].split(".")[0] 43 | exp = _get_dataset(idx) 44 | update_state(f"EXPLORER_{data}", exp) 45 | update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list()) 46 | 47 | 48 | def query_form(data): 49 | with st.form(widget_key("query", data)): 50 | col1, col2 = st.columns([0.8, 0.2]) 51 | with col1: 52 | query = st.text_input("Query", "", label_visibility="collapsed") 53 | with col2: 54 | submit = st.form_submit_button("Query") 55 | if submit: 56 | run_sql_query(data, query) 57 | 58 | 59 | def similarity_form(selected_imgs, selected_staged_imgs, data): 60 | st.write("Similarity Search") 61 | with st.form(widget_key("similarity", data)): 62 | subcol1, subcol2 = st.columns([1, 1]) 63 | with subcol1: 64 | st.write("Limit") 65 | limit = st.number_input("limit", min_value=None, max_value=None, value=25, label_visibility="collapsed") 66 | 67 | with subcol2: 68 | disabled = len(selected_imgs) and len(selected_staged_imgs) 69 | st.write("Selected: ", len(selected_imgs)) 70 | st.form_submit_button( 71 | "Search", 72 | disabled=disabled, 73 | on_click=find_similar_imgs, 74 | args=(data, selected_imgs or selected_staged_imgs, limit), 75 | ) 76 | if disabled: 77 | st.error("Cannot search from staging and dataset") 78 | 79 | 80 | def staging_area_form(data, selected_imgs): 81 | st.write("Staging Area") 82 | with st.form(widget_key("staging_area", data)): 83 | col1, col2 = st.columns([1, 1]) 84 | staged_imgs = set(st.session_state[f"STAGED_IMGS"]) - set(selected_imgs) 85 | with col1: 86 | st.form_submit_button( 87 | ":wastebasket:", 88 | disabled=len(selected_imgs) == 0, 89 | on_click=update_state, 90 | args=("STAGED_IMGS", staged_imgs), 91 | ) 92 | with col2: 93 | st.form_submit_button("Clear", on_click=update_state, args=("STAGED_IMGS", set())) 94 | 95 | 96 | def selected_options_form(data, selected_imgs, selected_staged_imgs, total_staged_imgs): 97 | with st.form(widget_key("selected_options", data)): 98 | col1, col2 = st.columns([1, 1]) 99 | with col1: 100 | st.form_submit_button( 101 | "Add to Staging", 102 | # key=widget_key("staging", data), 103 | disabled=not selected_imgs, 104 | on_click=add_to_staging, 105 | args=("STAGED_IMGS", total_staged_imgs), 106 | ) 107 | 108 | with col2: 109 | if data == st.session_state["PRIMARY_DATASET"]: 110 | st.form_submit_button( 111 | ":wastebasket:", 112 | on_click=remove_imgs, 113 | args=(data, selected_imgs), 114 | disabled=not selected_imgs or (len(selected_imgs) and len(selected_staged_imgs)), 115 | ) 116 | else: 117 | st.form_submit_button( 118 | f"Add to {st.session_state['PRIMARY_DATASET']}", 119 | on_click=add_imgs, 120 | args=(data, selected_imgs), 121 | disabled=not selected_imgs, 122 | ) 123 | 124 | 125 | def persist_reset_form(): 126 | with st.form(widget_key("persist_reset", "PRIMARY_DATASET")): 127 | col1, col2 = st.columns([1, 1]) 128 | with col1: 129 | st.form_submit_button("Reset", on_click=reset) 130 | 131 | with col2: 132 | st.form_submit_button("Persist", on_click=update_state, args=("PERSISTING", True)) 133 | 134 | 135 | def find_similar_imgs(data, imgs, limit=25): 136 | exp = st.session_state[f"EXPLORER_{data}"] 137 | _, idx = exp.get_similar_imgs(imgs, limit) 138 | paths = exp.table.to_pandas()["path"][idx].to_list() 139 | update_state(f"IMGS_{data}", paths) 140 | # st.experimental_rerun() 141 | 142 | 143 | def run_sql_query(data, query): 144 | if query.rstrip().lstrip(): 145 | exp = st.session_state[f"EXPLORER_{data}"] 146 | df = exp.sql(query) 147 | update_state(f"IMGS_{data}", df["path"].to_list()) 148 | st.experimental_rerun() 149 | 150 | 151 | def add_to_staging(key, imgs): 152 | update_state(key, imgs) 153 | # st.experimental_rerun() 154 | 155 | 156 | def remove_imgs(data, imgs): 157 | exp = st.session_state[f"EXPLORER_{data}"] 158 | idxs = exp.table.to_pandas().set_index("path").loc[imgs]["id"].to_list() 159 | exp.remove_imgs(idxs) 160 | update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list()) 161 | 162 | 163 | def add_imgs(from_data, imgs): 164 | data = st.session_state["PRIMARY_DATASET"] 165 | exp = st.session_state[f"EXPLORER_{data}"] 166 | from_exp = st.session_state[f"EXPLORER_{from_data}"] 167 | idxs = from_exp.table.to_pandas().set_index("path").loc[imgs]["id"].to_list() 168 | exp.add_imgs(from_exp, idxs) 169 | update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list()) 170 | update_state(f"SUCCESS_MSG", f"Added {len(imgs)} to {data}") 171 | 172 | 173 | def reset(): 174 | data = st.session_state["PRIMARY_DATASET"] 175 | exp = st.session_state[f"EXPLORER_{data}"] 176 | exp.reset() 177 | update_state("STAGED_IMGS", None) 178 | 179 | 180 | def persist_changes(): 181 | exp = _get_primary_dataset() 182 | with st.spinner("Creating new dataset..."): 183 | exp.persist() 184 | st.success("Dataset created successfully! Auto-reload in 30 seconds...") 185 | update_state("PERSISTING", False) 186 | st.button("Refresh", on_click=update_state, args=("STAGED_IMGS", None)) 187 | 188 | 189 | def layout(): 190 | st.set_page_config(layout="wide", initial_sidebar_state="collapsed") 191 | 192 | if st.session_state.get("PERSISTING"): 193 | persist_changes() 194 | return 195 | 196 | # staging area 197 | selected_staged_imgs = [] 198 | if st.session_state.get(f"STAGED_IMGS"): 199 | staged_imgs = st.session_state[f"STAGED_IMGS"] 200 | total_staged_imgs = len(staged_imgs) 201 | col1, col2 = st.columns([0.8, 0.2], gap="small") 202 | with col1: 203 | selected_staged_imgs = image_select( 204 | f"Staged samples: {total_staged_imgs}", images=list(staged_imgs), use_container_width=False 205 | ) 206 | with col2: 207 | staging_area_form(data="staging_area", selected_imgs=selected_staged_imgs) 208 | 209 | # Dataset tabs 210 | cfgs = _get_config() 211 | tabs = st.tabs([cfg["data"].split(".")[0] for cfg in cfgs]) 212 | for idx, tab in enumerate(tabs): 213 | with tab: 214 | data = cfgs[idx]["data"].split(".")[0] 215 | 216 | col1, col2 = st.columns([0.75, 0.25], gap="small") 217 | reset_to_init_state() 218 | 219 | imgs = st.session_state[f"IMGS_{data}"] 220 | total_imgs = len(imgs) 221 | with col1: 222 | subcol1, subcol2, subcol3 = st.columns([0.2, 0.2, 0.6]) 223 | with subcol1: 224 | num = st.number_input( 225 | "Max Images Displayed", 226 | min_value=0, 227 | max_value=total_imgs, 228 | value=min(250, total_imgs), 229 | key=widget_key("num_imgs_displayed", data), 230 | ) 231 | with subcol2: 232 | start_idx = st.number_input( 233 | "Start Index", min_value=0, max_value=total_imgs, value=0, key=widget_key("start_idx", data) 234 | ) 235 | with subcol3: 236 | select_all = st.checkbox("Select All", value=False, key=widget_key("select_all", data)) 237 | 238 | query_form(data) 239 | if total_imgs: 240 | imgs_displayed = imgs[start_idx : start_idx + num] 241 | selected_imgs = image_select( 242 | f"Total samples: {total_imgs}", 243 | images=imgs_displayed, 244 | use_container_width=False, 245 | indices=[i for i in range(num)] if select_all else None, 246 | ) 247 | 248 | with col2: 249 | similarity_form(selected_imgs, selected_staged_imgs, data) 250 | total_staged_imgs = set(st.session_state["STAGED_IMGS"]) 251 | total_staged_imgs.update(selected_imgs) 252 | 253 | display_labels = st.checkbox("Labels", value=False, key=widget_key("labels", data)) 254 | selected_options_form(data, selected_imgs, selected_staged_imgs, total_staged_imgs) 255 | if data == st.session_state["PRIMARY_DATASET"]: 256 | persist_reset_form() 257 | 258 | 259 | def launch(): 260 | cmd = ["streamlit", "run", __file__, "--server.maxMessageSize", "1024"] 261 | try: 262 | subprocess.run(cmd, check=True) 263 | except Exception as e: 264 | print(e) 265 | 266 | 267 | if __name__ == "__main__": 268 | layout() 269 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/pages/1_table.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import numpy as np 4 | import streamlit as st 5 | from sklearn.manifold import TSNE 6 | from sklearn.decomposition import PCA 7 | import plotly.express as px 8 | 9 | from yoloexplorer import config 10 | from yoloexplorer.frontend.datasets import _get_primary_dataset 11 | 12 | 13 | @st.cache_resource 14 | def reduce_dim(df, alg): 15 | embeddings = np.array(df["vector"].to_list()) 16 | if alg == "TSNE": 17 | tsne = TSNE(n_components=2, random_state=0) 18 | embeddings = tsne.fit_transform(embeddings) 19 | elif alg == "PCA": 20 | pca = PCA(n_components=2) 21 | embeddings = pca.fit_transform(embeddings) 22 | return embeddings 23 | 24 | 25 | def embeddings(): 26 | exp = _get_primary_dataset() 27 | df = exp.table.to_pandas() 28 | col1, col2 = st.columns([0.5, 0.5]) 29 | with col1: 30 | st.dataframe(df, use_container_width=True) 31 | with col2: 32 | option = st.selectbox("Dim Reducer Algorithm", ("TSNE", "PCA", "UMAP (Coming soon)")) 33 | if option == "TSNE": 34 | embeddings = reduce_dim(df, "TSNE") 35 | elif option == "PCA": 36 | embeddings = reduce_dim(df, "TSNE") 37 | elif option == "UMAP (Coming soon)": 38 | st.write("Coming soon") 39 | 40 | fig = px.scatter(x=embeddings[:, 0], y=embeddings[:, 1]) 41 | st.plotly_chart(fig, use_container_width=True) 42 | 43 | 44 | if __name__ == "__main__": 45 | embeddings() 46 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/pages/2_analysis.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import numpy as np 4 | import streamlit as st 5 | 6 | from yoloexplorer import config 7 | from yoloexplorer.frontend.datasets import _get_primary_dataset 8 | 9 | INTEGRATION_IMPORT_ERROR = None 10 | 11 | try: 12 | import data_gradients # noqa 13 | 14 | import data_gradients.feature_extractors.object_detection as detection 15 | from data_gradients.datasets.detection import YoloFormatDetectionDataset 16 | except ImportError: 17 | INTEGRATION_IMPORT_ERROR = "data-gradients" 18 | 19 | 20 | @st.cache_data 21 | def _get_config(): 22 | with open(config.TEMP_CONFIG_PATH) as json_file: 23 | data = json.load(json_file) 24 | return data["analysis"] 25 | 26 | 27 | @st.cache_data 28 | def _get_task_from_data(data): 29 | # TODO: support more tasks 30 | return "detection" 31 | 32 | 33 | DETECTION = ( 34 | { 35 | "DetectionBoundingBoxArea": detection.DetectionBoundingBoxArea, 36 | "DetectionBoundingBoxPerImageCount": detection.DetectionBoundingBoxPerImageCount, 37 | "DetectionBoundingBoxSize": detection.DetectionBoundingBoxSize, 38 | "DetectionClassFrequency": detection.DetectionClassFrequency, 39 | "DetectionClassHeatmap": detection.DetectionClassHeatmap, 40 | "DetectionClassesPerImageCount": detection.DetectionClassesPerImageCount, 41 | "DetectionSampleVisualization": detection.DetectionSampleVisualization, 42 | "DetectionBoundingBoxIoU": detection.DetectionBoundingBoxIoU, 43 | } 44 | if INTEGRATION_IMPORT_ERROR is None 45 | else {} 46 | ) 47 | 48 | SEGMENTATION = {} if INTEGRATION_IMPORT_ERROR is None else {} 49 | 50 | TASK2MODULES = {"detection": DETECTION, "segmentation": SEGMENTATION} 51 | TASK2LABELS = {"detection": "bboxes", "segmentation": "masks"} 52 | 53 | 54 | @st.cache_resource 55 | def analyse_dataset(): 56 | exp = _get_primary_dataset() 57 | info = exp.dataset_info 58 | deci_ds = YoloFormatDetectionDataset( 59 | root_dir=info["path"], 60 | images_dir=info["image_dir"], 61 | ) 62 | 63 | 64 | def analysis(): 65 | if not _get_config(): 66 | st.error("Enable analysis by passing `analysis=True` when launching the dashboard.") 67 | return 68 | 69 | if INTEGRATION_IMPORT_ERROR: 70 | st.error( 71 | f"The following package(s) are required to run this module: `{INTEGRATION_IMPORT_ERROR}`. Please install them and try again." 72 | ) 73 | return 74 | 75 | task = _get_task_from_data(_get_primary_dataset()) 76 | modules = TASK2MODULES[task] 77 | results = [] 78 | for _, module in modules.items(): 79 | pass 80 | 81 | 82 | if __name__ == "__main__": 83 | analysis() 84 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/redirect.py: -------------------------------------------------------------------------------- 1 | """ 2 | TAKEN FROM - https://gist.github.com/schaumb/037f139035d93cff3ad9f4f7e5f739ce 3 | """ 4 | 5 | import streamlit as st 6 | import io 7 | import contextlib 8 | import sys 9 | import re 10 | 11 | 12 | class _Redirect: 13 | class IOStuff(io.StringIO): 14 | def __init__(self, trigger, max_buffer, buffer_separator, regex, dup=None): 15 | super().__init__() 16 | self._trigger = trigger 17 | self._max_buffer = max_buffer 18 | self._buffer_separator = buffer_separator 19 | self._regex = regex and re.compile(regex) 20 | self._dup = dup 21 | 22 | def write(self, __s: str) -> int: 23 | if self._max_buffer: 24 | concatenated_len = super().tell() + len(__s) 25 | if concatenated_len > self._max_buffer: 26 | rest = self.get_filtered_output()[concatenated_len - self._max_buffer :] 27 | if self._buffer_separator is not None: 28 | rest = rest.split(self._buffer_separator, 1)[-1] 29 | super().seek(0) 30 | super().write(rest) 31 | super().truncate(super().tell() + len(__s)) 32 | res = super().write(__s) 33 | if self._dup is not None: 34 | self._dup.write(__s) 35 | self._trigger(self.get_filtered_output()) 36 | return res 37 | 38 | def get_filtered_output(self): 39 | if self._regex is None or self._buffer_separator is None: 40 | return self.getvalue() 41 | 42 | return self._buffer_separator.join( 43 | filter(self._regex.search, self.getvalue().split(self._buffer_separator)) 44 | ) 45 | 46 | def print_at_end(self): 47 | self._trigger(self.get_filtered_output()) 48 | 49 | def __init__( 50 | self, 51 | stdout=None, 52 | stderr=False, 53 | format=None, 54 | to=None, 55 | max_buffer=None, 56 | buffer_separator="\n", 57 | regex=None, 58 | duplicate_out=False, 59 | ): 60 | self.io_args = { 61 | "trigger": self._write, 62 | "max_buffer": max_buffer, 63 | "buffer_separator": buffer_separator, 64 | "regex": regex, 65 | } 66 | self.redirections = [] 67 | self.st = None 68 | self.stderr = stderr is True 69 | self.stdout = stdout is True or (stdout is None and not self.stderr) 70 | self.format = format or "code" 71 | self.to = to 72 | self.fun = None 73 | self.duplicate_out = duplicate_out or None 74 | self.active_nested = None 75 | 76 | if not self.stdout and not self.stderr: 77 | raise ValueError("one of stdout or stderr must be True") 78 | 79 | if self.format not in ["text", "markdown", "latex", "code", "write"]: 80 | raise ValueError( 81 | f"format need oneof the following: {', '.join(['text', 'markdown', 'latex', 'code', 'write'])}" 82 | ) 83 | 84 | if self.to and (not hasattr(self.to, "text") or not hasattr(self.to, "empty")): 85 | raise ValueError(f"'to' is not a streamlit container object") 86 | 87 | def __enter__(self): 88 | if self.st is not None: 89 | if self.to is None: 90 | if self.active_nested is None: 91 | self.active_nested = self( 92 | format=self.format, 93 | max_buffer=self.io_args["max_buffer"], 94 | buffer_separator=self.io_args["buffer_separator"], 95 | regex=self.io_args["regex"], 96 | duplicate_out=self.duplicate_out, 97 | ) 98 | return self.active_nested.__enter__() 99 | else: 100 | raise Exception("Already entered") 101 | to = self.to or st 102 | 103 | to.text( 104 | f"Redirected output from " 105 | f"{'stdout and stderr' if self.stdout and self.stderr else 'stdout' if self.stdout else 'stderr'}" 106 | f"{' [' + self.io_args['regex'] + ']' if self.io_args['regex'] else ''}" 107 | f":" 108 | ) 109 | self.st = to.empty() 110 | self.fun = getattr(self.st, self.format) 111 | 112 | io_obj = None 113 | 114 | def redirect(to_duplicate): 115 | nonlocal io_obj 116 | io_obj = _Redirect.IOStuff(dup=self.duplicate_out and to_duplicate, **self.io_args) 117 | redirection = contextlib.redirect_stdout(io_obj) 118 | self.redirections.append((redirection, io_obj)) 119 | redirection.__enter__() 120 | 121 | if self.stderr: 122 | redirect(sys.stderr) 123 | if self.stdout: 124 | redirect(sys.stdout) 125 | 126 | return io_obj 127 | 128 | def __call__(self, to=None, format=None, max_buffer=None, buffer_separator="\n", regex=None, duplicate_out=False): 129 | return _Redirect( 130 | self.stdout, 131 | self.stderr, 132 | format=format, 133 | to=to, 134 | max_buffer=max_buffer, 135 | buffer_separator=buffer_separator, 136 | regex=regex, 137 | duplicate_out=duplicate_out, 138 | ) 139 | 140 | def __exit__(self, *exc): 141 | if self.active_nested is not None: 142 | nested = self.active_nested 143 | if nested.active_nested is None: 144 | self.active_nested = None 145 | return nested.__exit__(*exc) 146 | 147 | res = None 148 | for redirection, io_obj in reversed(self.redirections): 149 | res = redirection.__exit__(*exc) 150 | io_obj.print_at_end() 151 | 152 | self.redirections = [] 153 | self.st = None 154 | self.fun = None 155 | return res 156 | 157 | def _write(self, data): 158 | self.fun(data) 159 | 160 | 161 | stdout = _Redirect() 162 | stderr = _Redirect(stderr=True) 163 | stdouterr = _Redirect(stdout=True, stderr=True) 164 | 165 | """ 166 | # can be used as 167 | import time 168 | import sys 169 | from random import getrandbits 170 | import streamlit.redirect as rd 171 | st.text('Suboutput:') 172 | so = st.empty() 173 | with rd.stdout, rd.stderr(format='markdown', to=st.sidebar): 174 | print("hello ") 175 | time.sleep(1) 176 | i = 5 177 | while i > 0: 178 | print("**M**izu? ", file=sys.stdout if getrandbits(1) else sys.stderr) 179 | i -= 1 180 | with rd.stdout(to=so): 181 | print(f" cica {i}") 182 | if i: 183 | time.sleep(1) 184 | # """ 185 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/states.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | def widget_key(action, data): 5 | return f"form_{action}_on_{data}" 6 | 7 | 8 | def init_states(config_list): 9 | for config in config_list: 10 | data = config["data"].split(".")[0] 11 | st.session_state[f"EXPLORER_{data}"] = None 12 | st.session_state[f"IMGS_{data}"] = [] 13 | st.session_state[f"SELECTED_IMGS_{data}"] = [] 14 | st.session_state[f"SHOW_LABELS_{data}"] = False 15 | st.session_state["STAGED_IMGS"] = set() 16 | st.session_state["PRIMARY_DATASET"] = config_list[0]["data"].split(".")[0] 17 | st.session_state[f"SUCCESS_MSG"] = "" 18 | st.session_state["PERSISTING"] = False 19 | 20 | 21 | def update_state(state, value): 22 | st.session_state[state] = value 23 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/__init__.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import io 3 | import os 4 | from pathlib import Path 5 | import cv2 6 | 7 | import numpy as np 8 | import streamlit as st 9 | import streamlit.components.v1 as components 10 | from PIL import Image 11 | 12 | from ultralytics.utils.plotting import Annotator, colors 13 | 14 | _RELEASE = True 15 | 16 | if not _RELEASE: 17 | _component_func = components.declare_component("image_select", url="http://localhost:3001") 18 | else: 19 | path = (Path(__file__).parent / "frontend" / "build").resolve() 20 | _component_func = components.declare_component("image_select", path=path) 21 | 22 | 23 | @st.cache_data 24 | def _encode_file(img): 25 | with open(img, "rb") as img_file: 26 | encoded = base64.b64encode(img_file.read()).decode() 27 | return f"data:image/jpeg;base64, {encoded}" 28 | 29 | 30 | @st.cache_data 31 | def _encode_numpy(img): 32 | pil_img = Image.fromarray(img) 33 | buffer = io.BytesIO() 34 | pil_img.save(buffer, format="JPEG") 35 | encoded = base64.b64encode(buffer.getvalue()).decode() 36 | return f"data:image/jpeg;base64, {encoded}" 37 | 38 | 39 | def image_select( 40 | title: str, 41 | images: list, 42 | captions: list = None, 43 | indices: list = None, 44 | *, 45 | use_container_width: bool = True, 46 | return_value: str = "original", 47 | key: str = None, 48 | bboxes=None, 49 | labels=None, 50 | classes=None, 51 | ): 52 | """Shows several images and returns the image selected by the user. 53 | 54 | Args: 55 | title (str): The label shown above the images. 56 | images (list): The images to show. Allowed image formats are paths to local 57 | files, URLs, PIL images, and numpy arrays. 58 | captions (list of str): The captions to show below the images. Defaults to 59 | None, in which case no captions are shown. 60 | indices (list of int, optional): The indices of the images that are selected by default. 61 | Defaults to None. 62 | use_container_width (bool, optional): Whether to stretch the images to the 63 | width of the surrounding container. Defaults to True. 64 | return_value ("original" or "index", optional): Whether to return the 65 | original object passed into `images` or the index of the selected image. 66 | Defaults to "original". 67 | key (str, optional): The key of the component. Defaults to None. 68 | bboxes (list of list of float, optional): The bounding boxes to show on the 69 | images. Defaults to None. 70 | labels (list of str, optional): The labels to show on the bounding boxes. 71 | Defaults to None. 72 | classes (list of str, optional): The classes to show on the bounding boxes. 73 | Returns: 74 | (any): The image selected by the user (same object and type as passed to 75 | `images`). 76 | """ 77 | 78 | # Do some checks to verify the input. 79 | if len(images) < 1: 80 | raise ValueError("At least one image must be passed but `images` is empty.") 81 | if captions is not None and len(images) != len(captions): 82 | raise ValueError( 83 | "The number of images and captions must be equal but `captions` has " 84 | f"{len(captions)} elements and `images` has {len(images)} elements." 85 | ) 86 | if indices is None: 87 | indices = [] 88 | if isinstance(indices, int): 89 | indices = [indices] 90 | if not isinstance(indices, list): 91 | raise ValueError(f"`indices` must be a list of integers but it is {type(indices)}.") 92 | for i, index in enumerate(indices): 93 | if index >= len(images): 94 | raise ValueError( 95 | f"Image index at {i} must be smaller than the number of images ({len(images)}) " f"but it is {index}." 96 | ) 97 | 98 | # Encode local images/numpy arrays/PIL images to base64. 99 | encoded_images = [] 100 | for idx, img in enumerate(images): 101 | if bboxes: 102 | if labels is None: 103 | if classes is None: 104 | raise ValueError("Labels or classes must be passed if bounding boxes are passed.") 105 | labels = classes 106 | img = cv2.imread(img) 107 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 108 | ann = Annotator(img) 109 | for box, label, cls in zip(bboxes[idx], labels[idx], classes[idx]): 110 | ann.box_label(box, label, color=colors(cls, True)) 111 | img = ann.result() 112 | 113 | if isinstance(img, (np.ndarray, Image.Image)): # numpy array or PIL image 114 | img = _encode_numpy(np.asarray(img)) 115 | elif os.path.exists(img): # local file 116 | img = _encode_file(img) 117 | encoded_images.append(img) 118 | 119 | # Pass everything to the frontend. 120 | component_values = _component_func( 121 | label=title, 122 | images=encoded_images, 123 | captions=captions, 124 | indices=indices, 125 | use_container_width=use_container_width, 126 | key=key, 127 | default=indices, 128 | ) 129 | 130 | # The frontend component returns the index of the selected image but we want to 131 | # return the actual image. 132 | if return_value == "original": 133 | return [images[component_value] for component_value in component_values] 134 | elif return_value == "index": 135 | return component_values 136 | else: 137 | raise ValueError("`return_value` must be either 'original' or 'index' " f"but is '{return_value}'.") 138 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "endOfLine": "lf", 3 | "semi": false, 4 | "trailingComma": "es5" 5 | } 6 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/build/asset-manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": { 3 | "main.js": "./static/js/main.c396fd5a.chunk.js", 4 | "main.js.map": "./static/js/main.c396fd5a.chunk.js.map", 5 | "runtime-main.js": "./static/js/runtime-main.58369df8.js", 6 | "runtime-main.js.map": "./static/js/runtime-main.58369df8.js.map", 7 | "static/js/2.ea259f3e.chunk.js": "./static/js/2.ea259f3e.chunk.js", 8 | "static/js/2.ea259f3e.chunk.js.map": "./static/js/2.ea259f3e.chunk.js.map", 9 | "index.html": "./index.html", 10 | "precache-manifest.cfc10f28dbda458a05dba1d053ca3f16.js": "./precache-manifest.cfc10f28dbda458a05dba1d053ca3f16.js", 11 | "service-worker.js": "./service-worker.js", 12 | "static/js/2.ea259f3e.chunk.js.LICENSE.txt": "./static/js/2.ea259f3e.chunk.js.LICENSE.txt" 13 | }, 14 | "entrypoints": [ 15 | "static/js/runtime-main.58369df8.js", 16 | "static/js/2.ea259f3e.chunk.js", 17 | "static/js/main.c396fd5a.chunk.js" 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/build/index.html: -------------------------------------------------------------------------------- 1 | streamlit-image-select 2 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/build/precache-manifest.cfc10f28dbda458a05dba1d053ca3f16.js: -------------------------------------------------------------------------------- 1 | self.__precacheManifest = (self.__precacheManifest || []).concat([ 2 | { 3 | "revision": "87c83509714a5c6e22c524d2ea080b8d", 4 | "url": "./index.html" 5 | }, 6 | { 7 | "revision": "77469b56b388ef2477dc", 8 | "url": "./static/js/2.ea259f3e.chunk.js" 9 | }, 10 | { 11 | "revision": "b5321db7731dbb9a09b1fc4c60b61213", 12 | "url": "./static/js/2.ea259f3e.chunk.js.LICENSE.txt" 13 | }, 14 | { 15 | "revision": "8972a333194e8ffe8afc", 16 | "url": "./static/js/main.c396fd5a.chunk.js" 17 | }, 18 | { 19 | "revision": "18300b1ffba716d884c2", 20 | "url": "./static/js/runtime-main.58369df8.js" 21 | } 22 | ]); 23 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/build/service-worker.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Welcome to your Workbox-powered service worker! 3 | * 4 | * You'll need to register this file in your web app and you should 5 | * disable HTTP caching for this file too. 6 | * See https://goo.gl/nhQhGp 7 | * 8 | * The rest of the code is auto-generated. Please don't update this file 9 | * directly; instead, make changes to your Workbox build configuration 10 | * and re-run your build process. 11 | * See https://goo.gl/2aRDsh 12 | */ 13 | 14 | importScripts("https://storage.googleapis.com/workbox-cdn/releases/4.3.1/workbox-sw.js"); 15 | 16 | importScripts( 17 | "./precache-manifest.cfc10f28dbda458a05dba1d053ca3f16.js" 18 | ); 19 | 20 | self.addEventListener('message', (event) => { 21 | if (event.data && event.data.type === 'SKIP_WAITING') { 22 | self.skipWaiting(); 23 | } 24 | }); 25 | 26 | workbox.core.clientsClaim(); 27 | 28 | /** 29 | * The workboxSW.precacheAndRoute() method efficiently caches and responds to 30 | * requests for URLs in the manifest. 31 | * See https://goo.gl/S9QRab 32 | */ 33 | self.__precacheManifest = [].concat(self.__precacheManifest || []); 34 | workbox.precaching.precacheAndRoute(self.__precacheManifest, {}); 35 | 36 | workbox.routing.registerNavigationRoute(workbox.precaching.getCacheKeyForURL("./index.html"), { 37 | 38 | blacklist: [/^\/_/,/\/[^/?]+\.[^/]+$/], 39 | }); 40 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/build/static/js/2.ea259f3e.chunk.js.LICENSE.txt: -------------------------------------------------------------------------------- 1 | /* 2 | object-assign 3 | (c) Sindre Sorhus 4 | @license MIT 5 | */ 6 | 7 | /*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */ 8 | 9 | /** 10 | * @license 11 | * Copyright 2018-2021 Streamlit Inc. 12 | * 13 | * Licensed under the Apache License, Version 2.0 (the "License"); 14 | * you may not use this file except in compliance with the License. 15 | * You may obtain a copy of the License at 16 | * 17 | * http://www.apache.org/licenses/LICENSE-2.0 18 | * 19 | * Unless required by applicable law or agreed to in writing, software 20 | * distributed under the License is distributed on an "AS IS" BASIS, 21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | * See the License for the specific language governing permissions and 23 | * limitations under the License. 24 | */ 25 | 26 | /** @license React v16.13.1 27 | * react-is.production.min.js 28 | * 29 | * Copyright (c) Facebook, Inc. and its affiliates. 30 | * 31 | * This source code is licensed under the MIT license found in the 32 | * LICENSE file in the root directory of this source tree. 33 | */ 34 | 35 | /** @license React v16.14.0 36 | * react.production.min.js 37 | * 38 | * Copyright (c) Facebook, Inc. and its affiliates. 39 | * 40 | * This source code is licensed under the MIT license found in the 41 | * LICENSE file in the root directory of this source tree. 42 | */ 43 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/build/static/js/main.c396fd5a.chunk.js: -------------------------------------------------------------------------------- 1 | (this["webpackJsonpstreamlit-dash"]=this["webpackJsonpstreamlit-dash"]||[]).push([[0],[,,,function(e,t,s){e.exports=s(4)},function(e,t,s){"use strict";s.r(t);var a=s(0),d=document.body.appendChild(document.createElement("label")),c=d.appendChild(document.createTextNode("")),n=document.body.appendChild(document.createElement("div"));n.classList.add("container");var i=[];a.a.events.addEventListener(a.a.RENDER_EVENT,(function(e){var t=e.detail;t.theme&&(d.style.font=t.theme.font,d.style.color=t.theme.textColor,"dark"===t.theme.base?document.body.querySelectorAll(".box, .caption").forEach((function(e){e.classList.add("dark")})):document.body.querySelectorAll(".box, .caption").forEach((function(e){e.classList.remove("dark")}))),c.textContent=t.args.label;var s=t.args.images,o=t.args.captions;0===n.childNodes.length&&(s.forEach((function(e,s){var d=n.appendChild(document.createElement("div"));d.classList.add("item"),!0===t.args.use_container_width&&d.classList.add("stretch");var c=d.appendChild(document.createElement("div"));c.classList.add("image-box");var l=c.appendChild(document.createElement("img"));if(l.classList.add("image"),l.src=e,o){var r=d.appendChild(document.createElement("div"));r.classList.add("caption"),r.textContent=o[s]}void 0!==t.args.indices&&t.args.indices.includes(s)&&(c.classList.add("selected"),l.classList.add("selected"),i.push(s)),l.onclick=function(){c.classList.contains("selected")?(i.splice(i.indexOf(s),1),c.classList.remove("selected"),l.classList.remove("selected")):(i.push(s),c.classList.add("selected"),l.classList.add("selected")),i.sort(),a.a.setComponentValue(i)}})),a.a.setComponentValue(i)),a.a.setFrameHeight()})),a.a.setComponentReady(),a.a.setFrameHeight()}],[[3,1,2]]]); 2 | //# sourceMappingURL=main.c396fd5a.chunk.js.map 3 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/build/static/js/main.c396fd5a.chunk.js.map: -------------------------------------------------------------------------------- 1 | {"version":3,"sources":["index.tsx"],"names":["labelDiv","document","body","appendChild","createElement","label","createTextNode","container","classList","add","selected_component_values","Streamlit","events","addEventListener","RENDER_EVENT","event","data","detail","theme","style","font","color","textColor","base","querySelectorAll","forEach","el","remove","textContent","args","images","captions","childNodes","length","image","i","item","box","img","src","caption","undefined","includes","push","onclick","contains","splice","indexOf","sort","setComponentValue","setFrameHeight","setComponentReady"],"mappings":"uJAAA,kBAEMA,EAAWC,SAASC,KAAKC,YAAYF,SAASG,cAAc,UAC5DC,EAAQL,EAASG,YAAYF,SAASK,eAAe,KACrDC,EAAYN,SAASC,KAAKC,YAAYF,SAASG,cAAc,QACnEG,EAAUC,UAAUC,IAAI,aACxB,IAAMC,EAAsC,GAuF5CC,IAAUC,OAAOC,iBAAiBF,IAAUG,cAhF5C,SAAkBC,GAEhB,IAAMC,EAAQD,EAAkCE,OAE5CD,EAAKE,QACPlB,EAASmB,MAAMC,KAAOJ,EAAKE,MAAME,KACjCpB,EAASmB,MAAME,MAAQL,EAAKE,MAAMI,UACV,SAApBN,EAAKE,MAAMK,KACbtB,SAASC,KAAKsB,iBAAiB,kBAAkBC,SAAQ,SAACC,GACxDA,EAAGlB,UAAUC,IAAI,WAGnBR,SAASC,KAAKsB,iBAAiB,kBAAkBC,SAAQ,SAACC,GACxDA,EAAGlB,UAAUmB,OAAO,YAO1BtB,EAAMuB,YAAcZ,EAAKa,KAAL,MACpB,IAAIC,EAASd,EAAKa,KAAL,OACTE,EAAWf,EAAKa,KAAL,SAGqB,IAAhCtB,EAAUyB,WAAWC,SACvBH,EAAOL,SAAQ,SAACS,EAAeC,GAC7B,IAAIC,EAAO7B,EAAUJ,YAAYF,SAASG,cAAc,QACxDgC,EAAK5B,UAAUC,IAAI,SACsB,IAArCO,EAAKa,KAAL,qBACFO,EAAK5B,UAAUC,IAAI,WAGrB,IAAI4B,EAAMD,EAAKjC,YAAYF,SAASG,cAAc,QAClDiC,EAAI7B,UAAUC,IAAI,aAElB,IAAI6B,EAAMD,EAAIlC,YAAYF,SAASG,cAAc,QAIjD,GAHAkC,EAAI9B,UAAUC,IAAI,SAClB6B,EAAIC,IAAML,EAENH,EAAU,CACZ,IAAIS,EAAUJ,EAAKjC,YAAYF,SAASG,cAAc,QACtDoC,EAAQhC,UAAUC,IAAI,WACtB+B,EAAQZ,YAAcG,EAASI,QAIJM,IAAzBzB,EAAKa,KAAL,SAAsCb,EAAKa,KAAL,QAAqBa,SAASP,KACtEE,EAAI7B,UAAUC,IAAI,YAClB6B,EAAI9B,UAAUC,IAAI,YAClBC,EAA0BiC,KAAKR,IAGjCG,EAAIM,QAAU,WAERP,EAAI7B,UAAUqC,SAAS,aACzBnC,EAA0BoC,OAAOpC,EAA0BqC,QAAQZ,GAAI,GACvEE,EAAI7B,UAAUmB,OAAO,YACrBW,EAAI9B,UAAUmB,OAAO,cAErBjB,EAA0BiC,KAAKR,GAC/BE,EAAI7B,UAAUC,IAAI,YAClB6B,EAAI9B,UAAUC,IAAI,aAEpBC,EAA0BsC,OAC1BrC,IAAUsC,kBAAkBvC,OAIhCC,IAAUsC,kBAAkBvC,IAO9BC,IAAUuC,oBAQZvC,IAAUwC,oBAIVxC,IAAUuC,mB","file":"static/js/main.c396fd5a.chunk.js","sourcesContent":["import { Streamlit, RenderData } from \"streamlit-component-lib\"\n\nconst labelDiv = document.body.appendChild(document.createElement(\"label\"))\nconst label = labelDiv.appendChild(document.createTextNode(\"\"))\nconst container = document.body.appendChild(document.createElement(\"div\"))\ncontainer.classList.add(\"container\")\nconst selected_component_values: number[] = []\n\n/**\n * The component's render function. This will be called immediately after\n * the component is initially loaded, and then again every time the\n * component gets new data from Python.\n */\nfunction onRender(event: Event): void {\n // Get the RenderData from the event\n const data = (event as CustomEvent).detail\n\n if (data.theme) {\n labelDiv.style.font = data.theme.font\n labelDiv.style.color = data.theme.textColor\n if (data.theme.base === \"dark\") {\n document.body.querySelectorAll(\".box, .caption\").forEach((el) => {\n el.classList.add(\"dark\")\n })\n } else {\n document.body.querySelectorAll(\".box, .caption\").forEach((el) => {\n el.classList.remove(\"dark\")\n })\n }\n\n // TODO: Gray out the component if it's disabled.\n }\n\n label.textContent = data.args[\"label\"]\n let images = data.args[\"images\"]\n let captions = data.args[\"captions\"]\n // console.log(captions)\n\n if (container.childNodes.length === 0) {\n images.forEach((image: string, i: number) => {\n let item = container.appendChild(document.createElement(\"div\"))\n item.classList.add(\"item\")\n if (data.args[\"use_container_width\"] === true) {\n item.classList.add(\"stretch\")\n }\n\n let box = item.appendChild(document.createElement(\"div\"))\n box.classList.add(\"image-box\")\n\n let img = box.appendChild(document.createElement(\"img\"))\n img.classList.add(\"image\")\n img.src = image\n\n if (captions) {\n let caption = item.appendChild(document.createElement(\"div\"))\n caption.classList.add(\"caption\")\n caption.textContent = captions[i]\n }\n\n // check if i is in the index array\n if (data.args[\"indices\"] !== undefined && data.args[\"indices\"].includes(i)) {\n box.classList.add(\"selected\")\n img.classList.add(\"selected\")\n selected_component_values.push(i)\n }\n\n img.onclick = function () {\n // check if the image is already selected, then un-select it and remove it from the array\n if (box.classList.contains(\"selected\")) {\n selected_component_values.splice(selected_component_values.indexOf(i), 1)\n box.classList.remove(\"selected\")\n img.classList.remove(\"selected\")\n } else {\n selected_component_values.push(i)\n box.classList.add(\"selected\")\n img.classList.add(\"selected\")\n }\n selected_component_values.sort()\n Streamlit.setComponentValue(selected_component_values)\n }\n })\n // return selected_component_values\n Streamlit.setComponentValue(selected_component_values)\n }\n\n // We tell Streamlit to update our frameHeight after each render event, in\n // case it has changed. (This isn't strictly necessary for the example\n // because our height stays fixed, but this is a low-cost function, so\n // there's no harm in doing it redundantly.)\n Streamlit.setFrameHeight()\n}\n\n// Attach our `onRender` handler to Streamlit's render event.\nStreamlit.events.addEventListener(Streamlit.RENDER_EVENT, onRender)\n\n// Tell Streamlit we're ready to start receiving data. We won't get our\n// first RENDER_EVENT until we call this function.\nStreamlit.setComponentReady()\n\n// Finally, tell Streamlit to update our initial height. We omit the\n// `height` parameter here to have it default to our scrollHeight.\nStreamlit.setFrameHeight()\n"],"sourceRoot":""} 2 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/build/static/js/runtime-main.58369df8.js: -------------------------------------------------------------------------------- 1 | !function(e){function t(t){for(var n,l,a=t[0],i=t[1],f=t[2],c=0,s=[];c0.2%", "not dead", "not op_mini all"], 23 | "development": [ 24 | "last 1 chrome version", 25 | "last 1 firefox version", 26 | "last 1 safari version" 27 | ] 28 | }, 29 | "homepage": "." 30 | } 31 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | streamlit-image-select 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/public/styles.css: -------------------------------------------------------------------------------- 1 | *, 2 | ::after, 3 | ::before { 4 | box-sizing: border-box; 5 | } 6 | 7 | body { 8 | font-family: "Source Sans Pro", sans-serif; 9 | font-weight: 400; 10 | line-height: 1.6; 11 | text-size-adjust: 100%; 12 | margin: 0; 13 | } 14 | 15 | label { 16 | font-size: 14px; 17 | color: rgb(49, 51, 63); 18 | margin-bottom: 0.5rem; 19 | height: auto; 20 | min-height: 1.5rem; 21 | vertical-align: middle; 22 | display: flex; 23 | flex-direction: row; 24 | -webkit-box-align: center; 25 | align-items: center; 26 | } 27 | 28 | .container { 29 | width: 100%; 30 | display: flex; 31 | flex-direction: row; 32 | flex-wrap: wrap; 33 | gap: 0.5rem; 34 | } 35 | .item { 36 | width: 10rem; 37 | } 38 | .item.stretch { 39 | flex: 1; 40 | } 41 | 42 | .image-box { 43 | border: 1px solid rgba(49, 51, 63, 0.2); 44 | border-radius: 0.25rem; 45 | padding: calc(0.25rem + 1px); 46 | height: 10rem; 47 | min-width: 10rem; 48 | } 49 | 50 | .image-box.dark { 51 | border-color: rgba(250, 250, 250, 0.2); 52 | background-color: rgb(19, 23, 32); 53 | } 54 | 55 | .image { 56 | width: 100%; 57 | height: 100%; 58 | object-fit: cover; 59 | opacity: 0.8; 60 | } 61 | 62 | .image-box:hover { 63 | border-color: var(--primary-color); 64 | cursor: pointer; 65 | } 66 | 67 | .image:hover { 68 | opacity: 1; 69 | /* filter: brightness(1.1); */ 70 | } 71 | 72 | 73 | .image-box.selected { 74 | border-color: var(--primary-color); 75 | border-width: 2px; 76 | padding: 0.25rem; 77 | /* box-shadow: rgb(255 75 75 / 50%) 0px 0px 0px 0.2rem; */ 78 | } 79 | 80 | .image.selected { 81 | opacity: 1; 82 | /* filter: brightness(1.1); */ 83 | } 84 | 85 | .caption { 86 | margin-top: 0.25rem; 87 | font-weight: 400; 88 | font-size: 14px; 89 | color: rgba(49, 51, 63, 0.6); 90 | } 91 | 92 | .caption.dark { 93 | color:rgba(250, 250, 250, 0.6); 94 | } 95 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/src/index.tsx: -------------------------------------------------------------------------------- 1 | import { Streamlit, RenderData } from "streamlit-component-lib" 2 | 3 | const labelDiv = document.body.appendChild(document.createElement("label")) 4 | const label = labelDiv.appendChild(document.createTextNode("")) 5 | const container = document.body.appendChild(document.createElement("div")) 6 | container.classList.add("container") 7 | const selected_component_values: number[] = [] 8 | 9 | /** 10 | * The component's render function. This will be called immediately after 11 | * the component is initially loaded, and then again every time the 12 | * component gets new data from Python. 13 | */ 14 | function onRender(event: Event): void { 15 | // Get the RenderData from the event 16 | const data = (event as CustomEvent).detail 17 | 18 | if (data.theme) { 19 | labelDiv.style.font = data.theme.font 20 | labelDiv.style.color = data.theme.textColor 21 | if (data.theme.base === "dark") { 22 | document.body.querySelectorAll(".box, .caption").forEach((el) => { 23 | el.classList.add("dark") 24 | }) 25 | } else { 26 | document.body.querySelectorAll(".box, .caption").forEach((el) => { 27 | el.classList.remove("dark") 28 | }) 29 | } 30 | 31 | // TODO: Gray out the component if it's disabled. 32 | } 33 | 34 | label.textContent = data.args["label"] 35 | let images = data.args["images"] 36 | let captions = data.args["captions"] 37 | // console.log(captions) 38 | 39 | if (container.childNodes.length === 0) { 40 | images.forEach((image: string, i: number) => { 41 | let item = container.appendChild(document.createElement("div")) 42 | item.classList.add("item") 43 | if (data.args["use_container_width"] === true) { 44 | item.classList.add("stretch") 45 | } 46 | 47 | let box = item.appendChild(document.createElement("div")) 48 | box.classList.add("image-box") 49 | 50 | let img = box.appendChild(document.createElement("img")) 51 | img.classList.add("image") 52 | img.src = image 53 | 54 | if (captions) { 55 | let caption = item.appendChild(document.createElement("div")) 56 | caption.classList.add("caption") 57 | caption.textContent = captions[i] 58 | } 59 | 60 | // check if i is in the index array 61 | if (data.args["indices"] !== undefined && data.args["indices"].includes(i)) { 62 | box.classList.add("selected") 63 | img.classList.add("selected") 64 | selected_component_values.push(i) 65 | } 66 | 67 | img.onclick = function () { 68 | // check if the image is already selected, then un-select it and remove it from the array 69 | if (box.classList.contains("selected")) { 70 | selected_component_values.splice(selected_component_values.indexOf(i), 1) 71 | box.classList.remove("selected") 72 | img.classList.remove("selected") 73 | } else { 74 | selected_component_values.push(i) 75 | box.classList.add("selected") 76 | img.classList.add("selected") 77 | } 78 | selected_component_values.sort() 79 | Streamlit.setComponentValue(selected_component_values) 80 | } 81 | }) 82 | // return selected_component_values 83 | Streamlit.setComponentValue(selected_component_values) 84 | } 85 | 86 | // We tell Streamlit to update our frameHeight after each render event, in 87 | // case it has changed. (This isn't strictly necessary for the example 88 | // because our height stays fixed, but this is a low-cost function, so 89 | // there's no harm in doing it redundantly.) 90 | Streamlit.setFrameHeight() 91 | } 92 | 93 | // Attach our `onRender` handler to Streamlit's render event. 94 | Streamlit.events.addEventListener(Streamlit.RENDER_EVENT, onRender) 95 | 96 | // Tell Streamlit we're ready to start receiving data. We won't get our 97 | // first RENDER_EVENT until we call this function. 98 | Streamlit.setComponentReady() 99 | 100 | // Finally, tell Streamlit to update our initial height. We omit the 101 | // `height` parameter here to have it default to our scrollHeight. 102 | Streamlit.setFrameHeight() 103 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/src/react-app-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/streamlit_dash/frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "esModuleInterop": true, 8 | "allowSyntheticDefaultImports": true, 9 | "strict": true, 10 | "forceConsistentCasingInFileNames": true, 11 | "module": "esnext", 12 | "moduleResolution": "node", 13 | "resolveJsonModule": true, 14 | "isolatedModules": true, 15 | "noEmit": true, 16 | "jsx": "react" 17 | }, 18 | "include": ["src"] 19 | } 20 | -------------------------------------------------------------------------------- /yoloexplorer/frontend/styles/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/frontend/styles/__init__.py -------------------------------------------------------------------------------- /yoloexplorer/yolo_predictor.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | 3 | from ultralytics.yolo.utils import ops, LOGGER 4 | from ultralytics.yolo.utils.torch_utils import smart_inference_mode 5 | from ultralytics.yolo.v8.detect.predict import DetectionPredictor 6 | 7 | 8 | class YOLOEmbeddingsPredictor(DetectionPredictor): 9 | def postprocess(self, preds, img, orig_imgs): 10 | embedding = preds[1] 11 | embedding = F.adaptive_avg_pool2d(embedding, 2).flatten(1) 12 | return embedding 13 | 14 | @smart_inference_mode() 15 | def embed(self, source=None, model=None, verbose=True): 16 | """Streams real-time inference on camera feed and saves results to file.""" 17 | # Setup model 18 | if not self.model: 19 | self.setup_model(model) 20 | # Setup source every time predict is called 21 | self.setup_source(source if source is not None else self.args.source) 22 | 23 | # Warmup model 24 | if not self.done_warmup: 25 | self.model.warmup( 26 | imgsz=( 27 | 1 if self.model.pt or self.model.triton else self.dataset.bs, 28 | 3, 29 | *self.imgsz, 30 | ) 31 | ) 32 | self.done_warmup = True 33 | 34 | self.seen, self.windows, self.batch, profilers = ( 35 | 0, 36 | [], 37 | None, 38 | (ops.Profile(), ops.Profile(), ops.Profile()), 39 | ) 40 | for batch in self.dataset: 41 | path, im0s, _, _ = batch 42 | if verbose: 43 | LOGGER.info(path[0]) 44 | # Preprocess 45 | with profilers[0]: 46 | im = self.preprocess(im0s) 47 | 48 | # Inference 49 | with profilers[1]: 50 | preds = self.model(im, augment=self.args.augment, embed_from=-1) 51 | 52 | with profilers[2]: 53 | embeddings = self.postprocess(preds, im, im0s) 54 | 55 | return embeddings 56 | # yielding seems pointless as this is designed specifically to be used in for loops, 57 | # batching with embed_func would make things complex 58 | --------------------------------------------------------------------------------