├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── .pre-commit-config.yaml
├── MANIFEST.in
├── README.md
├── examples
    ├── add_remove_persist.ipynb
    └── intro.ipynb
├── pyproject.toml
├── requirements.txt
├── setup.py
├── tests
    └── test_explorer.py
└── yoloexplorer
    ├── __init__.py
    ├── assets
        └── docs
        │   ├── dash_intro.gif
        │   ├── intro.gif
        │   ├── plotting.png
        │   ├── sim_index.png
        │   └── sim_plotting.png
    ├── config.py
    ├── dataset.py
    ├── explorer.py
    ├── frontend
        ├── __init__.py
        ├── datasets.py
        ├── layout.py
        ├── pages
        │   ├── 1_table.py
        │   └── 2_analysis.py
        ├── redirect.py
        ├── states.py
        ├── streamlit_dash
        │   ├── __init__.py
        │   └── frontend
        │   │   ├── .prettierrc
        │   │   ├── build
        │   │       ├── asset-manifest.json
        │   │       ├── index.html
        │   │       ├── precache-manifest.cfc10f28dbda458a05dba1d053ca3f16.js
        │   │       ├── service-worker.js
        │   │       ├── static
        │   │       │   └── js
        │   │       │   │   ├── 2.ea259f3e.chunk.js
        │   │       │   │   ├── 2.ea259f3e.chunk.js.LICENSE.txt
        │   │       │   │   ├── 2.ea259f3e.chunk.js.map
        │   │       │   │   ├── main.c396fd5a.chunk.js
        │   │       │   │   ├── main.c396fd5a.chunk.js.map
        │   │       │   │   ├── runtime-main.58369df8.js
        │   │       │   │   └── runtime-main.58369df8.js.map
        │   │       └── styles.css
        │   │   ├── package-lock.json
        │   │   ├── package.json
        │   │   ├── public
        │   │       ├── index.html
        │   │       └── styles.css
        │   │   ├── src
        │   │       ├── index.tsx
        │   │       └── react-app-env.d.ts
        │   │   └── tsconfig.json
        └── styles
        │   └── __init__.py
    └── yolo_predictor.py


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Python package
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.8", "3.9", "3.10", "3.11"]
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v3
19 |       - name: Set up Python ${{ matrix.python-version }}
20 |         uses: actions/setup-python@v4
21 |         with:
22 |           python-version: ${{ matrix.python-version }}
23 |       - name: Install dependencies
24 |         run: |
25 |           python -m pip install --upgrade pip
26 |           pip install pytest
27 |           pip install -e .
28 |           pip install pandas==2.0.3
29 | 
30 |       - name: Test with pytest
31 |         run: |
32 |           pytest tests
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ### JetBrains ###
  2 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
  3 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  4 | 
  5 | # User-specific stuff
  6 | .idea/**/workspace.xml
  7 | .idea/**/tasks.xml
  8 | .idea/**/usage.statistics.xml
  9 | .idea/**/dictionaries
 10 | .idea/**/shelf
 11 | 
 12 | # AWS User-specific
 13 | .idea/**/aws.xml
 14 | 
 15 | # Generated files
 16 | .idea/**/contentModel.xml
 17 | 
 18 | # Sensitive or high-churn files
 19 | .idea/**/dataSources/
 20 | .idea/**/dataSources.ids
 21 | .idea/**/dataSources.local.xml
 22 | .idea/**/sqlDataSources.xml
 23 | .idea/**/dynamic.xml
 24 | .idea/**/uiDesigner.xml
 25 | .idea/**/dbnavigator.xml
 26 | 
 27 | # Gradle
 28 | .idea/**/gradle.xml
 29 | .idea/**/libraries
 30 | 
 31 | # Gradle and Maven with auto-import
 32 | # When using Gradle or Maven with auto-import, you should exclude module files,
 33 | # since they will be recreated, and may cause churn.  Uncomment if using
 34 | # auto-import.
 35 | # .idea/artifacts
 36 | # .idea/compiler.xml
 37 | # .idea/jarRepositories.xml
 38 | # .idea/modules.xml
 39 | # .idea/*.iml
 40 | # .idea/modules
 41 | # *.iml
 42 | # *.ipr
 43 | 
 44 | # CMake
 45 | cmake-build-*/
 46 | 
 47 | # Mongo Explorer plugin
 48 | .idea/**/mongoSettings.xml
 49 | 
 50 | # File-based project format
 51 | *.iws
 52 | 
 53 | # IntelliJ
 54 | out/
 55 | 
 56 | # mpeltonen/sbt-idea plugin
 57 | .idea_modules/
 58 | 
 59 | # JIRA plugin
 60 | atlassian-ide-plugin.xml
 61 | 
 62 | # Cursive Clojure plugin
 63 | .idea/replstate.xml
 64 | 
 65 | # SonarLint plugin
 66 | .idea/sonarlint/
 67 | 
 68 | # Crashlytics plugin (for Android Studio and IntelliJ)
 69 | com_crashlytics_export_strings.xml
 70 | crashlytics.properties
 71 | crashlytics-build.properties
 72 | fabric.properties
 73 | 
 74 | # Editor-based Rest Client
 75 | .idea/httpRequests
 76 | 
 77 | # Android studio 3.1+ serialized cache file
 78 | .idea/caches/build_file_checksums.ser
 79 | 
 80 | ### JetBrains Patch ###
 81 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
 82 | 
 83 | # *.iml
 84 | # modules.xml
 85 | # .idea/misc.xml
 86 | # *.ipr
 87 | 
 88 | # Sonarlint plugin
 89 | # https://plugins.jetbrains.com/plugin/7973-sonarlint
 90 | .idea/**/sonarlint/
 91 | 
 92 | # SonarQube Plugin
 93 | # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
 94 | .idea/**/sonarIssues.xml
 95 | 
 96 | # Markdown Navigator plugin
 97 | # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
 98 | .idea/**/markdown-navigator.xml
 99 | .idea/**/markdown-navigator-enh.xml
100 | .idea/**/markdown-navigator/
101 | 
102 | # Cache file creation bug
103 | # See https://youtrack.jetbrains.com/issue/JBR-2257
104 | .idea/$CACHE_FILE$
105 | 
106 | # CodeStream plugin
107 | # https://plugins.jetbrains.com/plugin/12206-codestream
108 | .idea/codestream.xml
109 | 
110 | # Azure Toolkit for IntelliJ plugin
111 | # https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
112 | .idea/**/azureSettings.xml
113 | 
114 | ### Linux ###
115 | *~
116 | 
117 | # temporary files which can be created if a process still has a handle open of a deleted file
118 | .fuse_hidden*
119 | 
120 | # KDE directory preferences
121 | .directory
122 | 
123 | # Linux trash folder which might appear on any partition or disk
124 | .Trash-*
125 | 
126 | # .nfs files are created when an open file is removed but is still being accessed
127 | .nfs*
128 | 
129 | ### macOS ###
130 | # General
131 | .DS_Store
132 | .config
133 | *.egg-info/*
134 | 
135 | .AppleDouble
136 | .LSOverride
137 | 
138 | # Icon must end with two \r
139 | Icon
140 | 
141 | 
142 | # Thumbnails
143 | ._*
144 | 
145 | # Files that might appear in the root of a volume
146 | .DocumentRevisions-V100
147 | .fseventsd
148 | .Spotlight-V100
149 | .TemporaryItems
150 | .Trashes
151 | .VolumeIcon.icns
152 | .com.apple.timemachine.donotpresent
153 | 
154 | # Directories potentially created on remote AFP share
155 | .AppleDB
156 | .AppleDesktop
157 | Network Trash Folder
158 | Temporary Items
159 | .apdisk
160 | 
161 | ### macOS Patch ###
162 | # iCloud generated files
163 | *.icloud
164 | 
165 | ### Python ###
166 | # Byte-compiled / optimized / DLL files
167 | __pycache__/
168 | *.py[cod]
169 | *$py.class
170 | 
171 | # C extensions
172 | *.so
173 | 
174 | # Distribution / packaging
175 | .Python
176 | #build/
177 | develop-eggs/
178 | dist/
179 | downloads/
180 | eggs/
181 | .eggs/
182 | lib/
183 | lib64/
184 | parts/
185 | sdist/
186 | var/
187 | wheels/
188 | share/python-wheels/
189 | *.egg-info/
190 | .installed.cfg
191 | *.egg
192 | MANIFEST
193 | 
194 | # PyInstaller
195 | #  Usually these files are written by a python script from a template
196 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
197 | *.manifest
198 | *.spec
199 | 
200 | # Installer logs
201 | pip-log.txt
202 | pip-delete-this-directory.txt
203 | 
204 | # Unit test / coverage reports
205 | htmlcov/
206 | .tox/
207 | .nox/
208 | .coverage
209 | .coverage.*
210 | .cache
211 | nosetests.xml
212 | coverage.xml
213 | *.cover
214 | *.py,cover
215 | .hypothesis/
216 | .pytest_cache/
217 | cover/
218 | 
219 | # Translations
220 | *.mo
221 | *.pot
222 | 
223 | # Django stuff:
224 | *.log
225 | local_settings.py
226 | db.sqlite3
227 | db.sqlite3-journal
228 | 
229 | # Flask stuff:
230 | instance/
231 | .webassets-cache
232 | 
233 | # Scrapy stuff:
234 | .scrapy
235 | 
236 | # Sphinx documentation
237 | docs/_build/
238 | 
239 | # PyBuilder
240 | .pybuilder/
241 | target/
242 | 
243 | # Jupyter Notebook
244 | .ipynb_checkpoints
245 | 
246 | # IPython
247 | profile_default/
248 | ipython_config.py
249 | 
250 | # pyenv
251 | #   For a library or package, you might want to ignore these files since the code is
252 | #   intended to run in multiple environments; otherwise, check them in:
253 | # .python-version
254 | 
255 | # pipenv
256 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
257 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
258 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
259 | #   install all needed dependencies.
260 | #Pipfile.lock
261 | 
262 | # poetry
263 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
264 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
265 | #   commonly ignored for libraries.
266 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
267 | #poetry.lock
268 | 
269 | # pdm
270 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
271 | #pdm.lock
272 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
273 | #   in version control.
274 | #   https://pdm.fming.dev/#use-with-ide
275 | .pdm.toml
276 | 
277 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
278 | __pypackages__/
279 | 
280 | # Celery stuff
281 | celerybeat-schedule
282 | celerybeat.pid
283 | 
284 | # SageMath parsed files
285 | *.sage.py
286 | 
287 | # Environments
288 | .env
289 | .venv
290 | env/
291 | venv/
292 | ENV/
293 | env.bak/
294 | venv.bak/
295 | 
296 | # Spyder project settings
297 | .spyderproject
298 | .spyproject
299 | 
300 | # Rope project settings
301 | .ropeproject
302 | 
303 | # mkdocs documentation
304 | /site
305 | 
306 | # mypy
307 | .mypy_cache/
308 | .dmypy.json
309 | dmypy.json
310 | 
311 | # Pyre type checker
312 | .pyre/
313 | 
314 | # pytype static type analyzer
315 | .pytype/
316 | 
317 | # Cython debug symbols
318 | cython_debug/
319 | 
320 | # PyCharm
321 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
322 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
323 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
324 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
325 | #.idea/
326 | 
327 | ### Python Patch ###
328 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
329 | poetry.toml
330 | 
331 | # ruff
332 | .ruff_cache/
333 | 
334 | # LSP config files
335 | pyrightconfig.json
336 | 
337 | ### Windows ###
338 | # Windows thumbnail cache files
339 | Thumbs.db
340 | Thumbs.db:encryptable
341 | ehthumbs.db
342 | ehthumbs_vista.db
343 | 
344 | # Dump file
345 | *.stackdump
346 | 
347 | # Folder config file
348 | [Dd]esktop.ini
349 | 
350 | # Recycle Bin used on file shares
351 | $RECYCLE.BIN/
352 | 
353 | # Windows Installer files
354 | *.cab
355 | *.msi
356 | *.msix
357 | *.msm
358 | *.msp
359 | 
360 | # Windows shortcuts
361 | *.lnk
362 | 
363 | run/
364 | node_modules/
365 | *.pt
366 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | 
 4 | default_language_version:
 5 |     python: python3.8
 6 | repos:
 7 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 8 |     rev: v4.4.0
 9 |     hooks:
10 |     -   id: check-added-large-files
11 |     -   id: check-toml
12 |     -   id: check-yaml
13 |         args:
14 |         -   --unsafe
15 |     -   id: end-of-file-fixer
16 |     -   id: trailing-whitespace
17 | -   repo: https://github.com/asottile/pyupgrade
18 |     rev: v3.10.1
19 |     hooks:
20 |     -   id: pyupgrade
21 |         args:
22 |         - --py3-plus
23 |         - --keep-runtime-typing
24 | -   repo: https://github.com/astral-sh/ruff-pre-commit
25 |     rev: v0.0.282
26 |     hooks:
27 |     -   id: ruff
28 |         args:
29 |         - --fix
30 | -   repo: https://github.com/psf/black
31 |     rev: 23.7.0
32 |     hooks:
33 |     -   id: black
34 | ci:
35 |     autofix_prs: true
36 |     autoupdate_schedule: monthly
37 |     autofix_commit_msg: "fix(pre_commit): 🎨 auto format pre-commit hooks"
38 |     autoupdate_commit_msg: "fix(pre_commit): ⬆ pre_commit autoupdate"
39 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # YOLOExplorer
  2 | 
  3 | Explore, manipulate and iterate on Computer Vision datasets with precision using simple APIs.
  4 | Supports SQL filters, vector similarity search, native interface with Pandas and more.
  5 | 
  6 | 
  7 | * Analyse your datasets with powerful custom queries
  8 | * Find and remove bad images (duplicates, out of domain data and more)
  9 | * Enrich datasets by adding more examples from another datasets
 10 | * And more
 11 | 
 12 | 🌟 NEW: Supports GUI Dashboard, Pythonic and notebook workflows
 13 | ### Dashboard Workflows
 14 | <details open>
 15 | <summary>Multiple dataset support</summary>
 16 | You can now explore multiple datasets, search across them, add/remove images across multiple datasets to enrich bad examples. Start training on new dataset within seconds.
 17 |   Here's an example of using VOC, coco128 and coco8 datasets together with VOC being the primary.
 18 | <pre>
 19 | from yoloexplorer import Explorer
 20 | 
 21 | exp = Explorer("VOC.yaml")
 22 | exp.build_embeddings()
 23 | 
 24 | coco_exp = Explorer("coco128.yaml")
 25 | coco_exp.build_embeddings()
 26 |  #Init coco8 similarly
 27 | 
 28 | exp.dash([coco_exp, coco8])
 29 | #Automatic analysis coming soon with dash(..., analysis=True)
 30 | 
 31 | </pre>
 32 | 
 33 |   ![ezgif com-optimize (3)](https://github.com/lancedb/yoloexplorer/assets/15766192/3422a536-138a-4fce-af2c-cef97f171aed)
 34 | 
 35 | </details>
 36 | 
 37 | 
 38 | <details open>
 39 | <summary>Multiple model support</summary>
 40 | 
 41 | You can now explore multiple pretrained models listed
 42 | `"resnet18", "resnet50", "efficientnet_b0", "efficientnet_v2_s", "googlenet", "mobilenet_v3_small"` for extracting better features out of images to improve searching across multiple datasets.<pre>
 43 | from yoloexplorer import Explorer
 44 | 
 45 | exp = Explorer("coco128.yaml", model="resnet50")
 46 | exp.build_embeddings()
 47 | 
 48 | coco_exp = Explorer("coco128.yaml", model="mobilenet_v3_small")
 49 | coco_exp.build_embeddings()
 50 | 
 51 | #Use force=True as a parameter in build_embedding if embeddings already exists
 52 | 
 53 | exp.dash([coco_exp, coco8])
 54 | #Automatic analysis coming soon with dash(..., analysis=True)
 55 | </details>
 56 | 
 57 | <details open>
 58 | <summary>Query using SQL and semantic search, View dataset as pandas DF and explore embeddings</summary>
 59 | 
 60 | ![ezgif com-optimize (4)](https://github.com/lancedb/yoloexplorer/assets/15766192/b786e2f1-dc8e-411e-b13b-84b26ec50d41)
 61 | 
 62 | ![ezgif com-optimize (5)](https://github.com/lancedb/yoloexplorer/assets/15766192/38d42a38-810e-48f3-89ea-1ccf304a1047)
 63 | 
 64 | </details>
 65 | 
 66 | <details open>
 67 | Try an example colab <a href="https://colab.research.google.com/github/lancedb/yoloexplorer/blob/main/examples/intro.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
 68 | 
 69 | <summary>Colab / Notebook</summary>
 70 | <img src="./yoloexplorer/assets/docs/intro.gif" height=75% width=75% />
 71 | </details>
 72 | 
 73 | ### Installation
 74 | ```
 75 | pip install yoloexplorer
 76 | ```
 77 | Install from source branch
 78 | ```
 79 | pip install git+https://github.com/lancedb/yoloexplorer.git
 80 | ```
 81 | Pypi installation coming soon
 82 | 
 83 | ## Quickstart
 84 | YOLOExplorer can be used to rapidly generate new versions of CV datasets trainable on [Ultralytics YOLO, SAM, FAST-SAM, RT-DETR](https://github.com/ultralytics/ultralytics) and more models.
 85 | 
 86 | Start exploring your Datasets in 2 simple steps
 87 | * Select a supported dataset or bring your own. Supports all  Ultralytics YOLO datasets currently
 88 | ```python
 89 | from yoloexplorer import Explorer
 90 | 
 91 | coco_exp = Explorer("coco128.yaml")
 92 | ```
 93 | * Build the LanceDB table to allow querying
 94 | ```python
 95 | coco_exp.build_embeddings()
 96 | coco_exp.dash() # Launch the GUI dashboard
 97 | ```
 98 | <details open>
 99 | <summary> <b> Querying Basics </b> </summary>
100 | 
101 | You can get the schema of you dataset once the table is built
102 | ```
103 | schema = coco_exp.table.schema
104 | ```
105 | You can use this schema to run queries
106 | 
107 | <b>SQL query</b><br/>
108 | Let's try this query and print 4 result - Select instances that contain one or more 'person' and 'cat'
109 | ```python
110 | df = coco_exp.sql("SELECT * from 'table' WHERE labels like '%person%' and labels LIKE '%cat%'")
111 | coco_exp.plot_imgs(ids=df["id"][0:4].to_list())
112 | ```
113 | Result
114 | 
115 | <img src="./yoloexplorer/assets/docs/plotting.png" height=50% width=50% /><br/>
116 | The above is equivlant to plotting directly with a query:
117 | ```python
118 | voc_exp.plot_imgs(query=query, n=4)
119 | ```
120 | 
121 | <b>Querying by similarity</b><br/>
122 | Now lets say your model confuses between cetain classes( cat & dog for example) so you want to look find images similar to the ones above to investigate.
123 | 
124 | The id of the first image in this case was 117
125 | ```python
126 | imgs, ids = coco_exp.get_similar_imgs(117, n=6) # accepts ids/idx, Path, or img blob
127 | voc_exp.plot_imgs(ids)
128 | ```
129 | <img src="./yoloexplorer/assets/docs/sim_plotting.png" height=50% width=50% /><br/>
130 | The above is equivlant to directly calling `plot_similar_imgs`
131 | ```python
132 | voc_exp.plot_similar_imgs(117, n=6)
133 | ```
134 | NOTE: You can also pass any image file for similarity search, even the ones that are not in the dataset
135 | 
136 | 
137 | <b>Similarity Search with SQL Filter (Coming Soon)</b></br>
138 | Soon you'll be able to have a finer control over the queries by pre-filtering your table
139 | ```
140 | coco_exp.get_similar_imgs(..., query="WHERE labels LIKE '%motorbike%'")
141 | coco_exp.plot_similar_imgs(query="WHERE labels LIKE '%motorbike%'")
142 | ```
143 | </details>
144 | 
145 | <details>
146 | <summary> <b>Plotting</b></summary>
147 | 
148 | | Visualization Method | Description | Arguments |
149 | |---|---|---|
150 | | `plot_imgs(ids, query, n=10)` | Plots the given `ids` or the result of the SQL query. One of the 2 must be provided. | `ids`: A list of image IDs or a SQL query. `n`: The number of images to plot. |
151 | | `plot_similar_imgs(img/idx, n=10)` | Plots `n` top similar images to the given img. Accepts img idx from the dataset, Path to imgs or encoded/binary img | `img/idx`: The image to plot similar images for. `n`: The number of similar images to plot. |
152 | | `plot_similarity_index(top_k=0.01, sim_thres=0.90, reduce=False, sorted=False)` | Plots the similarity index of the dataset. This gives measure of how similar an img is when compared to all the imgs of the dataset. | `top_k`: The percentage of images to keep for the similarity index. `sim_thres`: The similarity threshold. `reduce`: Whether to reduce the dimensionality of the similarity index. `sorted`: Whether to sort the similarity index. |
153 | 
154 | **Additional Details**
155 | 
156 | * The `plot_imgs` method can be used to visualize a subset of images from the dataset. The `ids` argument can be a list of image IDs, or a SQL query that returns a list of image IDs. The `n` argument specifies the number of images to plot.
157 | * The `plot_similar_imgs` method can be used to visualize the top `n` similar images to a given image. The `img/idx` argument can be the index of the image in the dataset, the path to the image file, or the encoded/binary representation of the image.
158 | * The `plot_similarity_index` method can be used to visualize the similarity index of the dataset. The similarity index is a measure of how similar each image is to all the other images in the dataset. The `top_k` argument specifies the percentage of images to keep for the similarity index. The `sim_thres` argument specifies the similarity threshold. The `reduce` argument specifies whether to reduce the dimensionality of embeddings before calculating the index. The `sorted` argument specifies whether to sort the similarity index.
159 | 
160 | 
161 | </details>
162 | 
163 | <details>
164 | <summary> <b>Add, remove, merge parts of datasets, persist new Datasets, and start training!</b></summary>
165 | Once you've found the right images that you'd like to add or remove, you can simply add/remove them from your dataset and generate the updated version.
166 | 
167 | <b>Removing data</b><br/>
168 | You can simply remove images by passing a list of `ids` from the table.
169 | ```
170 | coco_exp.remove_imgs([100,120,300..n]) # Removes images at the given ids.
171 | ```
172 | 
173 | <b>Adding data</b><br/>
174 | For adding data from another dataset, you need an explorer object of that dataset with embeddings built. You can then pass that object along with the ids of the imgs that you'd like to add from that dataset.
175 | ```
176 | coco_exp.add_imgs(exp, idxs) #
177 | ```
178 | Note: You can use SQL querying and/or similarity searches to get the desired ids from the datasets.
179 | 
180 | <b>Persisting the Table: Create new dataset and start training</b><br/>
181 | After making the desired changes, you can persist the table to create the new dataset.
182 | ```
183 | coco_exp.persist()
184 | ```
185 | This creates a new dataset and outputs the training command that you can simply paste in your terminal to train a new model!
186 | 
187 | <b>Resetting the Table</b><br/>
188 | You can reset the table to its original or last persisted state (whichever is latest)
189 | ```
190 | coco_exp.reset()
191 | ```
192 | </details>
193 | 
194 | <details>
195 | <summary><b>(Advanced querying)Getting insights from Similarity index</b></summary>
196 | The `plot_similarity_index` method can be used to visualize the similarity index of the dataset. The similarity index is a measure of how similar each image is to all the other images in the dataset.
197 | Let's the the similarity index of the VOC dataset keeping all the default settings
198 | 
199 | ```python
200 | voc_exp.plot_similarity_index()
201 | ```
202 | 
203 | <img src="./yoloexplorer/assets/docs/sim_index.png" height=50% width=50%><br/>
204 | You can also get the the similarity index as a numpy array to perform advanced querys.
205 | 
206 | ```python
207 | sim = voc_exp.get_similarity_index()
208 | ```
209 | Now you can combine the similarity index with other querying options discussed above to create even more powerful queries. Here's an example:
210 | 
211 | "Let's say you've created a list of candidates you wish to remove from the dataset. Now, you want to filter out the images that have similarity index less than 250, i.e, remove the images that are 90%(`sim_thres`) or more similar to more than 250 images in the dataset.
212 | "
213 | ```python
214 | ids = [...] # filtered ids list
215 | filter = np.where(sim > 250)
216 | final_ids = np.intersect1d(ids, filter) # intersect both arrays
217 | 
218 | exp.remove_imgs(final_ids)
219 | ```
220 | </details>
221 | 
222 | <h3>Coming Soon</h3>
223 | 
224 | <b>Pre-filtering</b>
225 | * To allow adding filter to searches.
226 | * Have a finer control over embeddings search space
227 | 
228 | Pre-filtering will enable powerful queries like - "Show me images similar to <IMAGE> and include only ones that contain one or more(or exactly one) person, 2 cars and 1 horse" <br/>
229 | 
230 | * <b>Automatically find potential duplicate images</b>
231 | 
232 | * <b>Better embedding plotting and analytics insights </b>
233 | 
234 | * <b>Better dashboard for visualizing imgs </b>
235 | </br>
236 | 
237 | Notes:
238 | * The API will have some minor changes going from dev to minor release
239 | * For all practical purposes the ids are same as row number and is reset after every addition or removal
240 | 


--------------------------------------------------------------------------------
/examples/add_remove_persist.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "id": "f5fc3d97",
   7 |    "metadata": {},
   8 |    "outputs": [],
   9 |    "source": [
  10 |     "from yoloexplorer import Explorer"
  11 |    ]
  12 |   },
  13 |   {
  14 |    "cell_type": "code",
  15 |    "execution_count": 2,
  16 |    "id": "0992199f",
  17 |    "metadata": {},
  18 |    "outputs": [
  19 |     {
  20 |      "name": "stderr",
  21 |      "output_type": "stream",
  22 |      "text": [
  23 |       "Ultralytics YOLOv8.0.120 🚀 Python-3.11.4 torch-2.0.1 CPU\n",
  24 |       "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n",
  25 |       "Scanning /Users/ayushchaurasia/Documents/ultralytics/datasets/VOC/labels/train2007.cache... 16551 images, 0 backgroun\n",
  26 |       "LanceDB embedding space already exists. Attempting to reuse it. Use force=True to overwrite.\n"
  27 |      ]
  28 |     }
  29 |    ],
  30 |    "source": [
  31 |     "voc_exp = Explorer(\"VOC.yaml\")\n",
  32 |     "voc_exp.build_embeddings()"
  33 |    ]
  34 |   },
  35 |   {
  36 |    "cell_type": "code",
  37 |    "execution_count": 4,
  38 |    "id": "9bb61d27",
  39 |    "metadata": {},
  40 |    "outputs": [
  41 |     {
  42 |      "name": "stderr",
  43 |      "output_type": "stream",
  44 |      "text": [
  45 |       "Ultralytics YOLOv8.0.120 🚀 Python-3.11.4 torch-2.0.1 CPU\n",
  46 |       "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n",
  47 |       "Scanning /Users/ayushchaurasia/Documents/ultralytics/datasets/coco8/labels/train.cache... 4 images, 0 backgrounds, 0 corrupt: 100%|█████████\n"
  48 |      ]
  49 |     },
  50 |     {
  51 |      "name": "stdout",
  52 |      "output_type": "stream",
  53 |      "text": [
  54 |       "WARNING: rate limit only support up to 3.10, proceeding without rate limiter\n"
  55 |      ]
  56 |     },
  57 |     {
  58 |      "name": "stderr",
  59 |      "output_type": "stream",
  60 |      "text": [
  61 |       "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 16.75it/s]\n",
  62 |       "\u001b[34m\u001b[1mLanceDB:\u001b[0m Embedding space built successfully.\n"
  63 |      ]
  64 |     }
  65 |    ],
  66 |    "source": [
  67 |     "coco_exp = Explorer(\"coco8.yaml\")\n",
  68 |     "coco_exp.build_embeddings()"
  69 |    ]
  70 |   },
  71 |   {
  72 |    "cell_type": "code",
  73 |    "execution_count": 6,
  74 |    "id": "d2f17222",
  75 |    "metadata": {},
  76 |    "outputs": [
  77 |     {
  78 |      "name": "stderr",
  79 |      "output_type": "stream",
  80 |      "text": [
  81 |       "\n",
  82 |       "|-----------------------------------------------|\n",
  83 |       "\t Number of images: 16555\n",
  84 |       "|------------------------------------------------|\n"
  85 |      ]
  86 |     }
  87 |    ],
  88 |    "source": [
  89 |     "voc_exp.add_imgs(coco_exp, [0,1,2,3])"
  90 |    ]
  91 |   },
  92 |   {
  93 |    "cell_type": "code",
  94 |    "execution_count": 8,
  95 |    "id": "fbca050d",
  96 |    "metadata": {},
  97 |    "outputs": [
  98 |     {
  99 |      "data": {
 100 |       "text/html": [
 101 |        "<div>\n",
 102 |        "<style scoped>\n",
 103 |        "    .dataframe tbody tr th:only-of-type {\n",
 104 |        "        vertical-align: middle;\n",
 105 |        "    }\n",
 106 |        "\n",
 107 |        "    .dataframe tbody tr th {\n",
 108 |        "        vertical-align: top;\n",
 109 |        "    }\n",
 110 |        "\n",
 111 |        "    .dataframe thead th {\n",
 112 |        "        text-align: right;\n",
 113 |        "    }\n",
 114 |        "</style>\n",
 115 |        "<table border=\"1\" class=\"dataframe\">\n",
 116 |        "  <thead>\n",
 117 |        "    <tr style=\"text-align: right;\">\n",
 118 |        "      <th></th>\n",
 119 |        "      <th>id</th>\n",
 120 |        "      <th>path</th>\n",
 121 |        "      <th>cls</th>\n",
 122 |        "      <th>labels</th>\n",
 123 |        "      <th>bboxes</th>\n",
 124 |        "      <th>img</th>\n",
 125 |        "      <th>vector</th>\n",
 126 |        "    </tr>\n",
 127 |        "  </thead>\n",
 128 |        "  <tbody>\n",
 129 |        "    <tr>\n",
 130 |        "      <th>16550</th>\n",
 131 |        "      <td>16550</td>\n",
 132 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 133 |        "      <td>[0]</td>\n",
 134 |        "      <td>[aeroplane]</td>\n",
 135 |        "      <td>[[0.4880000352859497, 0.40942928194999695, 0.8...</td>\n",
 136 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 137 |        "      <td>[-0.17438583, -0.17087273, -0.13651106, -0.177...</td>\n",
 138 |        "    </tr>\n",
 139 |        "    <tr>\n",
 140 |        "      <th>16551</th>\n",
 141 |        "      <td>0</td>\n",
 142 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 143 |        "      <td>[45, 45, 45, 49, 49, 49, 49, 50]</td>\n",
 144 |        "      <td>[bowl, bowl, bowl, orange, orange, orange, ora...</td>\n",
 145 |        "      <td>[[0.4794920086860657, 0.6887710094451904, 0.95...</td>\n",
 146 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 147 |        "      <td>[-0.02985644, 0.015207137, 0.22520831, 0.13441...</td>\n",
 148 |        "    </tr>\n",
 149 |        "    <tr>\n",
 150 |        "      <th>16552</th>\n",
 151 |        "      <td>1</td>\n",
 152 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 153 |        "      <td>[23, 23]</td>\n",
 154 |        "      <td>[giraffe, giraffe]</td>\n",
 155 |        "      <td>[[0.7703359723091125, 0.4896950125694275, 0.33...</td>\n",
 156 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 157 |        "      <td>[-0.2151032, -0.15736936, -0.08698822, -0.0328...</td>\n",
 158 |        "    </tr>\n",
 159 |        "    <tr>\n",
 160 |        "      <th>16553</th>\n",
 161 |        "      <td>2</td>\n",
 162 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 163 |        "      <td>[58, 75]</td>\n",
 164 |        "      <td>[potted plant, vase]</td>\n",
 165 |        "      <td>[[0.5192189812660217, 0.4511209726333618, 0.39...</td>\n",
 166 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 167 |        "      <td>[-0.1577483, -0.07757879, 0.1852913, 0.0405876...</td>\n",
 168 |        "    </tr>\n",
 169 |        "    <tr>\n",
 170 |        "      <th>16554</th>\n",
 171 |        "      <td>3</td>\n",
 172 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 173 |        "      <td>[22]</td>\n",
 174 |        "      <td>[zebra]</td>\n",
 175 |        "      <td>[[0.3462109863758087, 0.4932590126991272, 0.68...</td>\n",
 176 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 177 |        "      <td>[-0.14650379, -0.2218233, -0.10506437, -0.1519...</td>\n",
 178 |        "    </tr>\n",
 179 |        "  </tbody>\n",
 180 |        "</table>\n",
 181 |        "</div>"
 182 |       ],
 183 |       "text/plain": [
 184 |        "          id                                               path  \\\n",
 185 |        "16550  16550  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 186 |        "16551      0  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 187 |        "16552      1  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 188 |        "16553      2  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 189 |        "16554      3  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 190 |        "\n",
 191 |        "                                    cls  \\\n",
 192 |        "16550                               [0]   \n",
 193 |        "16551  [45, 45, 45, 49, 49, 49, 49, 50]   \n",
 194 |        "16552                          [23, 23]   \n",
 195 |        "16553                          [58, 75]   \n",
 196 |        "16554                              [22]   \n",
 197 |        "\n",
 198 |        "                                                  labels  \\\n",
 199 |        "16550                                        [aeroplane]   \n",
 200 |        "16551  [bowl, bowl, bowl, orange, orange, orange, ora...   \n",
 201 |        "16552                                 [giraffe, giraffe]   \n",
 202 |        "16553                               [potted plant, vase]   \n",
 203 |        "16554                                            [zebra]   \n",
 204 |        "\n",
 205 |        "                                                  bboxes  \\\n",
 206 |        "16550  [[0.4880000352859497, 0.40942928194999695, 0.8...   \n",
 207 |        "16551  [[0.4794920086860657, 0.6887710094451904, 0.95...   \n",
 208 |        "16552  [[0.7703359723091125, 0.4896950125694275, 0.33...   \n",
 209 |        "16553  [[0.5192189812660217, 0.4511209726333618, 0.39...   \n",
 210 |        "16554  [[0.3462109863758087, 0.4932590126991272, 0.68...   \n",
 211 |        "\n",
 212 |        "                                                     img  \\\n",
 213 |        "16550  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 214 |        "16551  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 215 |        "16552  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 216 |        "16553  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 217 |        "16554  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 218 |        "\n",
 219 |        "                                                  vector  \n",
 220 |        "16550  [-0.17438583, -0.17087273, -0.13651106, -0.177...  \n",
 221 |        "16551  [-0.02985644, 0.015207137, 0.22520831, 0.13441...  \n",
 222 |        "16552  [-0.2151032, -0.15736936, -0.08698822, -0.0328...  \n",
 223 |        "16553  [-0.1577483, -0.07757879, 0.1852913, 0.0405876...  \n",
 224 |        "16554  [-0.14650379, -0.2218233, -0.10506437, -0.1519...  "
 225 |       ]
 226 |      },
 227 |      "execution_count": 8,
 228 |      "metadata": {},
 229 |      "output_type": "execute_result"
 230 |     }
 231 |    ],
 232 |    "source": [
 233 |     "voc_exp.table.to_pandas().tail()"
 234 |    ]
 235 |   },
 236 |   {
 237 |    "cell_type": "code",
 238 |    "execution_count": 9,
 239 |    "id": "0652d847",
 240 |    "metadata": {},
 241 |    "outputs": [
 242 |     {
 243 |      "name": "stderr",
 244 |      "output_type": "stream",
 245 |      "text": [
 246 |       "Ultralytics YOLOv8.0.120 🚀 Python-3.11.4 torch-2.0.1 CPU\n",
 247 |       "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n",
 248 |       "Scanning /Users/ayushchaurasia/Documents/ultralytics/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrounds, 0 corrupt: 100%|█\n",
 249 |       "LanceDB embedding space already exists. Attempting to reuse it. Use force=True to overwrite.\n",
 250 |       "Ultralytics YOLOv8.0.120 🚀 Python-3.11.4 torch-2.0.1 CPU\n",
 251 |       "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n",
 252 |       "Scanning /Users/ayushchaurasia/Documents/ultralytics/datasets/coco8/labels/train.cache... 4 images, 0 backgrounds, 0 corrupt: 100%|█████████\n",
 253 |       "LanceDB embedding space already exists. Attempting to reuse it. Use force=True to overwrite.\n"
 254 |      ]
 255 |     }
 256 |    ],
 257 |    "source": [
 258 |     "coco128_exp = Explorer(\"coco128.yaml\")\n",
 259 |     "coco128_exp.build_embeddings()\n",
 260 |     "\n",
 261 |     "coco8_exp = Explorer(\"coco8.yaml\")\n",
 262 |     "coco8_exp.build_embeddings()\n"
 263 |    ]
 264 |   },
 265 |   {
 266 |    "cell_type": "code",
 267 |    "execution_count": 10,
 268 |    "id": "4211c07e",
 269 |    "metadata": {},
 270 |    "outputs": [
 271 |     {
 272 |      "name": "stderr",
 273 |      "output_type": "stream",
 274 |      "text": [
 275 |       "\n",
 276 |       "|-----------------------------------------------|\n",
 277 |       "\t Number of images: 130\n",
 278 |       "|------------------------------------------------|\n"
 279 |      ]
 280 |     }
 281 |    ],
 282 |    "source": [
 283 |     "coco128_exp.add_imgs(coco8_exp, [2,3])"
 284 |    ]
 285 |   },
 286 |   {
 287 |    "cell_type": "code",
 288 |    "execution_count": 11,
 289 |    "id": "e9210257",
 290 |    "metadata": {},
 291 |    "outputs": [
 292 |     {
 293 |      "data": {
 294 |       "text/html": [
 295 |        "<div>\n",
 296 |        "<style scoped>\n",
 297 |        "    .dataframe tbody tr th:only-of-type {\n",
 298 |        "        vertical-align: middle;\n",
 299 |        "    }\n",
 300 |        "\n",
 301 |        "    .dataframe tbody tr th {\n",
 302 |        "        vertical-align: top;\n",
 303 |        "    }\n",
 304 |        "\n",
 305 |        "    .dataframe thead th {\n",
 306 |        "        text-align: right;\n",
 307 |        "    }\n",
 308 |        "</style>\n",
 309 |        "<table border=\"1\" class=\"dataframe\">\n",
 310 |        "  <thead>\n",
 311 |        "    <tr style=\"text-align: right;\">\n",
 312 |        "      <th></th>\n",
 313 |        "      <th>id</th>\n",
 314 |        "      <th>path</th>\n",
 315 |        "      <th>cls</th>\n",
 316 |        "      <th>labels</th>\n",
 317 |        "      <th>bboxes</th>\n",
 318 |        "      <th>img</th>\n",
 319 |        "      <th>vector</th>\n",
 320 |        "    </tr>\n",
 321 |        "  </thead>\n",
 322 |        "  <tbody>\n",
 323 |        "    <tr>\n",
 324 |        "      <th>125</th>\n",
 325 |        "      <td>125</td>\n",
 326 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 327 |        "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 5]</td>\n",
 328 |        "      <td>[person, person, person, person, person, perso...</td>\n",
 329 |        "      <td>[[0.912320077419281, 0.5608879923820496, 0.017...</td>\n",
 330 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 331 |        "      <td>[-0.1441657, -0.18747765, -0.16826846, -0.1800...</td>\n",
 332 |        "    </tr>\n",
 333 |        "    <tr>\n",
 334 |        "      <th>126</th>\n",
 335 |        "      <td>126</td>\n",
 336 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 337 |        "      <td>[39, 39, 45, 62, 64, 73, 73, 73, 73, 73, 73, 7...</td>\n",
 338 |        "      <td>[bottle, bottle, bowl, tv, mouse, book, book, ...</td>\n",
 339 |        "      <td>[[0.18036000430583954, 0.8277199864387512, 0.0...</td>\n",
 340 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 341 |        "      <td>[-0.10117926, -0.09878854, 0.06405701, -0.0227...</td>\n",
 342 |        "    </tr>\n",
 343 |        "    <tr>\n",
 344 |        "      <th>127</th>\n",
 345 |        "      <td>127</td>\n",
 346 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 347 |        "      <td>[2, 15]</td>\n",
 348 |        "      <td>[car, cat]</td>\n",
 349 |        "      <td>[[0.5018590092658997, 0.8207259178161621, 0.99...</td>\n",
 350 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 351 |        "      <td>[-0.13692749, -0.14139369, 0.24011154, 0.07854...</td>\n",
 352 |        "    </tr>\n",
 353 |        "    <tr>\n",
 354 |        "      <th>128</th>\n",
 355 |        "      <td>2</td>\n",
 356 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 357 |        "      <td>[58, 75]</td>\n",
 358 |        "      <td>[potted plant, vase]</td>\n",
 359 |        "      <td>[[0.5192189812660217, 0.4511209726333618, 0.39...</td>\n",
 360 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 361 |        "      <td>[-0.1577483, -0.07757879, 0.1852913, 0.0405876...</td>\n",
 362 |        "    </tr>\n",
 363 |        "    <tr>\n",
 364 |        "      <th>129</th>\n",
 365 |        "      <td>3</td>\n",
 366 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 367 |        "      <td>[22]</td>\n",
 368 |        "      <td>[zebra]</td>\n",
 369 |        "      <td>[[0.3462109863758087, 0.4932590126991272, 0.68...</td>\n",
 370 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 371 |        "      <td>[-0.14650379, -0.2218233, -0.10506437, -0.1519...</td>\n",
 372 |        "    </tr>\n",
 373 |        "  </tbody>\n",
 374 |        "</table>\n",
 375 |        "</div>"
 376 |       ],
 377 |       "text/plain": [
 378 |        "      id                                               path  \\\n",
 379 |        "125  125  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 380 |        "126  126  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 381 |        "127  127  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 382 |        "128    2  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 383 |        "129    3  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 384 |        "\n",
 385 |        "                                                   cls  \\\n",
 386 |        "125               [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 5]   \n",
 387 |        "126  [39, 39, 45, 62, 64, 73, 73, 73, 73, 73, 73, 7...   \n",
 388 |        "127                                            [2, 15]   \n",
 389 |        "128                                           [58, 75]   \n",
 390 |        "129                                               [22]   \n",
 391 |        "\n",
 392 |        "                                                labels  \\\n",
 393 |        "125  [person, person, person, person, person, perso...   \n",
 394 |        "126  [bottle, bottle, bowl, tv, mouse, book, book, ...   \n",
 395 |        "127                                         [car, cat]   \n",
 396 |        "128                               [potted plant, vase]   \n",
 397 |        "129                                            [zebra]   \n",
 398 |        "\n",
 399 |        "                                                bboxes  \\\n",
 400 |        "125  [[0.912320077419281, 0.5608879923820496, 0.017...   \n",
 401 |        "126  [[0.18036000430583954, 0.8277199864387512, 0.0...   \n",
 402 |        "127  [[0.5018590092658997, 0.8207259178161621, 0.99...   \n",
 403 |        "128  [[0.5192189812660217, 0.4511209726333618, 0.39...   \n",
 404 |        "129  [[0.3462109863758087, 0.4932590126991272, 0.68...   \n",
 405 |        "\n",
 406 |        "                                                   img  \\\n",
 407 |        "125  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 408 |        "126  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 409 |        "127  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 410 |        "128  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 411 |        "129  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 412 |        "\n",
 413 |        "                                                vector  \n",
 414 |        "125  [-0.1441657, -0.18747765, -0.16826846, -0.1800...  \n",
 415 |        "126  [-0.10117926, -0.09878854, 0.06405701, -0.0227...  \n",
 416 |        "127  [-0.13692749, -0.14139369, 0.24011154, 0.07854...  \n",
 417 |        "128  [-0.1577483, -0.07757879, 0.1852913, 0.0405876...  \n",
 418 |        "129  [-0.14650379, -0.2218233, -0.10506437, -0.1519...  "
 419 |       ]
 420 |      },
 421 |      "execution_count": 11,
 422 |      "metadata": {},
 423 |      "output_type": "execute_result"
 424 |     }
 425 |    ],
 426 |    "source": [
 427 |     "coco128_exp.table.to_pandas().tail()"
 428 |    ]
 429 |   },
 430 |   {
 431 |    "cell_type": "code",
 432 |    "execution_count": 12,
 433 |    "id": "d0f8e343",
 434 |    "metadata": {},
 435 |    "outputs": [
 436 |     {
 437 |      "data": {
 438 |       "text/html": [
 439 |        "<div>\n",
 440 |        "<style scoped>\n",
 441 |        "    .dataframe tbody tr th:only-of-type {\n",
 442 |        "        vertical-align: middle;\n",
 443 |        "    }\n",
 444 |        "\n",
 445 |        "    .dataframe tbody tr th {\n",
 446 |        "        vertical-align: top;\n",
 447 |        "    }\n",
 448 |        "\n",
 449 |        "    .dataframe thead th {\n",
 450 |        "        text-align: right;\n",
 451 |        "    }\n",
 452 |        "</style>\n",
 453 |        "<table border=\"1\" class=\"dataframe\">\n",
 454 |        "  <thead>\n",
 455 |        "    <tr style=\"text-align: right;\">\n",
 456 |        "      <th></th>\n",
 457 |        "      <th>id</th>\n",
 458 |        "      <th>path</th>\n",
 459 |        "      <th>cls</th>\n",
 460 |        "      <th>labels</th>\n",
 461 |        "      <th>bboxes</th>\n",
 462 |        "      <th>img</th>\n",
 463 |        "      <th>vector</th>\n",
 464 |        "    </tr>\n",
 465 |        "  </thead>\n",
 466 |        "  <tbody>\n",
 467 |        "    <tr>\n",
 468 |        "      <th>0</th>\n",
 469 |        "      <td>0</td>\n",
 470 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 471 |        "      <td>[45, 45, 45, 49, 49, 49, 49, 50]</td>\n",
 472 |        "      <td>[bowl, bowl, bowl, orange, orange, orange, ora...</td>\n",
 473 |        "      <td>[[0.4794920086860657, 0.6887710094451904, 0.95...</td>\n",
 474 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 475 |        "      <td>[-0.02985644, 0.015207137, 0.22520831, 0.13441...</td>\n",
 476 |        "    </tr>\n",
 477 |        "    <tr>\n",
 478 |        "      <th>1</th>\n",
 479 |        "      <td>1</td>\n",
 480 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 481 |        "      <td>[23, 23]</td>\n",
 482 |        "      <td>[giraffe, giraffe]</td>\n",
 483 |        "      <td>[[0.7703359723091125, 0.4896950125694275, 0.33...</td>\n",
 484 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 485 |        "      <td>[-0.2151032, -0.15736936, -0.08698822, -0.0328...</td>\n",
 486 |        "    </tr>\n",
 487 |        "    <tr>\n",
 488 |        "      <th>2</th>\n",
 489 |        "      <td>2</td>\n",
 490 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 491 |        "      <td>[58, 75]</td>\n",
 492 |        "      <td>[potted plant, vase]</td>\n",
 493 |        "      <td>[[0.5192189812660217, 0.4511209726333618, 0.39...</td>\n",
 494 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 495 |        "      <td>[-0.1577483, -0.07757879, 0.1852913, 0.0405876...</td>\n",
 496 |        "    </tr>\n",
 497 |        "    <tr>\n",
 498 |        "      <th>3</th>\n",
 499 |        "      <td>3</td>\n",
 500 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 501 |        "      <td>[22]</td>\n",
 502 |        "      <td>[zebra]</td>\n",
 503 |        "      <td>[[0.3462109863758087, 0.4932590126991272, 0.68...</td>\n",
 504 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 505 |        "      <td>[-0.14650379, -0.2218233, -0.10506437, -0.1519...</td>\n",
 506 |        "    </tr>\n",
 507 |        "  </tbody>\n",
 508 |        "</table>\n",
 509 |        "</div>"
 510 |       ],
 511 |       "text/plain": [
 512 |        "   id                                               path  \\\n",
 513 |        "0   0  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 514 |        "1   1  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 515 |        "2   2  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 516 |        "3   3  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 517 |        "\n",
 518 |        "                                cls  \\\n",
 519 |        "0  [45, 45, 45, 49, 49, 49, 49, 50]   \n",
 520 |        "1                          [23, 23]   \n",
 521 |        "2                          [58, 75]   \n",
 522 |        "3                              [22]   \n",
 523 |        "\n",
 524 |        "                                              labels  \\\n",
 525 |        "0  [bowl, bowl, bowl, orange, orange, orange, ora...   \n",
 526 |        "1                                 [giraffe, giraffe]   \n",
 527 |        "2                               [potted plant, vase]   \n",
 528 |        "3                                            [zebra]   \n",
 529 |        "\n",
 530 |        "                                              bboxes  \\\n",
 531 |        "0  [[0.4794920086860657, 0.6887710094451904, 0.95...   \n",
 532 |        "1  [[0.7703359723091125, 0.4896950125694275, 0.33...   \n",
 533 |        "2  [[0.5192189812660217, 0.4511209726333618, 0.39...   \n",
 534 |        "3  [[0.3462109863758087, 0.4932590126991272, 0.68...   \n",
 535 |        "\n",
 536 |        "                                                 img  \\\n",
 537 |        "0  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 538 |        "1  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 539 |        "2  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 540 |        "3  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 541 |        "\n",
 542 |        "                                              vector  \n",
 543 |        "0  [-0.02985644, 0.015207137, 0.22520831, 0.13441...  \n",
 544 |        "1  [-0.2151032, -0.15736936, -0.08698822, -0.0328...  \n",
 545 |        "2  [-0.1577483, -0.07757879, 0.1852913, 0.0405876...  \n",
 546 |        "3  [-0.14650379, -0.2218233, -0.10506437, -0.1519...  "
 547 |       ]
 548 |      },
 549 |      "execution_count": 12,
 550 |      "metadata": {},
 551 |      "output_type": "execute_result"
 552 |     }
 553 |    ],
 554 |    "source": [
 555 |     "coco8_exp.table.to_pandas()"
 556 |    ]
 557 |   },
 558 |   {
 559 |    "cell_type": "code",
 560 |    "execution_count": 13,
 561 |    "id": "733c19b6",
 562 |    "metadata": {},
 563 |    "outputs": [
 564 |     {
 565 |      "name": "stderr",
 566 |      "output_type": "stream",
 567 |      "text": [
 568 |       "Persisting changes to the dataset...\n",
 569 |       "\n",
 570 |       "|-----------------------------------------------|\n",
 571 |       "\t Number of images: 130\n",
 572 |       "|------------------------------------------------|\n",
 573 |       "100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 130/130 [00:00<00:00, 17542.05it/s]\n",
 574 |       "Changes persisted to the dataset.\n",
 575 |       "\u001b[34m\u001b[1mLanceDB: \u001b[0mNew dataset created successfully! Run the following command to train a model:\n",
 576 |       "yolo train data=run/coco_updated epochs=10\n"
 577 |      ]
 578 |     }
 579 |    ],
 580 |    "source": [
 581 |     "coco128_exp.persist(\"coco_updated\")"
 582 |    ]
 583 |   },
 584 |   {
 585 |    "cell_type": "code",
 586 |    "execution_count": 15,
 587 |    "id": "612c7464",
 588 |    "metadata": {
 589 |     "scrolled": true
 590 |    },
 591 |    "outputs": [
 592 |     {
 593 |      "name": "stderr",
 594 |      "output_type": "stream",
 595 |      "text": [
 596 |       "Ultralytics YOLOv8.0.120 🚀 Python-3.11.4 torch-2.0.1 CPU\n",
 597 |       "YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients\n",
 598 |       "Scanning /Users/ayushchaurasia/Documents/ultralytics/datasets/coco128/labels/train2017... 128 images, 2 backgrounds, 0 corrupt: 100%|███████\n",
 599 |       "New cache created: /Users/ayushchaurasia/Documents/ultralytics/datasets/coco128/labels/train2017.cache\n"
 600 |      ]
 601 |     },
 602 |     {
 603 |      "name": "stdout",
 604 |      "output_type": "stream",
 605 |      "text": [
 606 |       "WARNING: rate limit only support up to 3.10, proceeding without rate limiter\n"
 607 |      ]
 608 |     },
 609 |     {
 610 |      "name": "stderr",
 611 |      "output_type": "stream",
 612 |      "text": [
 613 |       "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 130/130 [00:06<00:00, 21.50it/s]\n",
 614 |       "\u001b[34m\u001b[1mLanceDB:\u001b[0m Embedding space built successfully.\n"
 615 |      ]
 616 |     }
 617 |    ],
 618 |    "source": [
 619 |     "coco_updated = Explorer(\"coco_updated/coco_updated.yaml\")\n",
 620 |     "coco_updated.build_embeddings()"
 621 |    ]
 622 |   },
 623 |   {
 624 |    "cell_type": "code",
 625 |    "execution_count": 23,
 626 |    "id": "1f2ec47e",
 627 |    "metadata": {},
 628 |    "outputs": [
 629 |     {
 630 |      "data": {
 631 |       "text/html": [
 632 |        "<div>\n",
 633 |        "<style scoped>\n",
 634 |        "    .dataframe tbody tr th:only-of-type {\n",
 635 |        "        vertical-align: middle;\n",
 636 |        "    }\n",
 637 |        "\n",
 638 |        "    .dataframe tbody tr th {\n",
 639 |        "        vertical-align: top;\n",
 640 |        "    }\n",
 641 |        "\n",
 642 |        "    .dataframe thead th {\n",
 643 |        "        text-align: right;\n",
 644 |        "    }\n",
 645 |        "</style>\n",
 646 |        "<table border=\"1\" class=\"dataframe\">\n",
 647 |        "  <thead>\n",
 648 |        "    <tr style=\"text-align: right;\">\n",
 649 |        "      <th></th>\n",
 650 |        "      <th>id</th>\n",
 651 |        "      <th>path</th>\n",
 652 |        "      <th>cls</th>\n",
 653 |        "      <th>labels</th>\n",
 654 |        "      <th>bboxes</th>\n",
 655 |        "      <th>img</th>\n",
 656 |        "      <th>vector</th>\n",
 657 |        "    </tr>\n",
 658 |        "  </thead>\n",
 659 |        "  <tbody>\n",
 660 |        "    <tr>\n",
 661 |        "      <th>0</th>\n",
 662 |        "      <td>7</td>\n",
 663 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 664 |        "      <td>[0, 0, 0, 20, 20]</td>\n",
 665 |        "      <td>[person, person, person, elephant, elephant]</td>\n",
 666 |        "      <td>[[0.44568800926208496, 0.48061496019363403, 0....</td>\n",
 667 |        "      <td>[255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...</td>\n",
 668 |        "      <td>[-0.16883952915668488, -0.19498196244239807, -...</td>\n",
 669 |        "    </tr>\n",
 670 |        "    <tr>\n",
 671 |        "      <th>1</th>\n",
 672 |        "      <td>45</td>\n",
 673 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 674 |        "      <td>[20, 20]</td>\n",
 675 |        "      <td>[elephant, elephant]</td>\n",
 676 |        "      <td>[[0.6323270201683044, 0.6266880035400391, 0.73...</td>\n",
 677 |        "      <td>[255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...</td>\n",
 678 |        "      <td>[-0.13958469033241272, -0.14195983111858368, 0...</td>\n",
 679 |        "    </tr>\n",
 680 |        "    <tr>\n",
 681 |        "      <th>2</th>\n",
 682 |        "      <td>51</td>\n",
 683 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 684 |        "      <td>[20, 20, 20, 20, 20, 20]</td>\n",
 685 |        "      <td>[elephant, elephant, elephant, elephant, eleph...</td>\n",
 686 |        "      <td>[[0.18141399323940277, 0.6764050126075745, 0.3...</td>\n",
 687 |        "      <td>[255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...</td>\n",
 688 |        "      <td>[-0.19354629516601562, -0.1750415414571762, -0...</td>\n",
 689 |        "    </tr>\n",
 690 |        "    <tr>\n",
 691 |        "      <th>3</th>\n",
 692 |        "      <td>114</td>\n",
 693 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 694 |        "      <td>[20, 20, 20, 20, 20, 20, 20]</td>\n",
 695 |        "      <td>[elephant, elephant, elephant, elephant, eleph...</td>\n",
 696 |        "      <td>[[0.5598670244216919, 0.7241129279136658, 0.06...</td>\n",
 697 |        "      <td>[255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...</td>\n",
 698 |        "      <td>[-0.18665747344493866, -0.1859922856092453, -0...</td>\n",
 699 |        "    </tr>\n",
 700 |        "  </tbody>\n",
 701 |        "</table>\n",
 702 |        "</div>"
 703 |       ],
 704 |       "text/plain": [
 705 |        "    id                                               path  \\\n",
 706 |        "0    7  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 707 |        "1   45  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 708 |        "2   51  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 709 |        "3  114  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 710 |        "\n",
 711 |        "                            cls  \\\n",
 712 |        "0             [0, 0, 0, 20, 20]   \n",
 713 |        "1                      [20, 20]   \n",
 714 |        "2      [20, 20, 20, 20, 20, 20]   \n",
 715 |        "3  [20, 20, 20, 20, 20, 20, 20]   \n",
 716 |        "\n",
 717 |        "                                              labels  \\\n",
 718 |        "0       [person, person, person, elephant, elephant]   \n",
 719 |        "1                               [elephant, elephant]   \n",
 720 |        "2  [elephant, elephant, elephant, elephant, eleph...   \n",
 721 |        "3  [elephant, elephant, elephant, elephant, eleph...   \n",
 722 |        "\n",
 723 |        "                                              bboxes  \\\n",
 724 |        "0  [[0.44568800926208496, 0.48061496019363403, 0....   \n",
 725 |        "1  [[0.6323270201683044, 0.6266880035400391, 0.73...   \n",
 726 |        "2  [[0.18141399323940277, 0.6764050126075745, 0.3...   \n",
 727 |        "3  [[0.5598670244216919, 0.7241129279136658, 0.06...   \n",
 728 |        "\n",
 729 |        "                                                 img  \\\n",
 730 |        "0  [255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...   \n",
 731 |        "1  [255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...   \n",
 732 |        "2  [255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...   \n",
 733 |        "3  [255, 216, 255, 224, 0, 16, 74, 70, 73, 70, 0,...   \n",
 734 |        "\n",
 735 |        "                                              vector  \n",
 736 |        "0  [-0.16883952915668488, -0.19498196244239807, -...  \n",
 737 |        "1  [-0.13958469033241272, -0.14195983111858368, 0...  \n",
 738 |        "2  [-0.19354629516601562, -0.1750415414571762, -0...  \n",
 739 |        "3  [-0.18665747344493866, -0.1859922856092453, -0...  "
 740 |       ]
 741 |      },
 742 |      "execution_count": 23,
 743 |      "metadata": {},
 744 |      "output_type": "execute_result"
 745 |     }
 746 |    ],
 747 |    "source": [
 748 |     "# Remove data containing elephant\n",
 749 |     "coco_updated.sql(\"SELECT * FROM 'table' WHERE labels LIKE '%elephant, elephant%'\")"
 750 |    ]
 751 |   },
 752 |   {
 753 |    "cell_type": "code",
 754 |    "execution_count": 24,
 755 |    "id": "a9d676f9",
 756 |    "metadata": {},
 757 |    "outputs": [
 758 |     {
 759 |      "name": "stderr",
 760 |      "output_type": "stream",
 761 |      "text": [
 762 |       "\n",
 763 |       "|-----------------------------------------------|\n",
 764 |       "\t Number of images: 126\n",
 765 |       "|------------------------------------------------|\n"
 766 |      ]
 767 |     }
 768 |    ],
 769 |    "source": [
 770 |     "coco_updated.remove_imgs([7, 45, 51, 114])"
 771 |    ]
 772 |   },
 773 |   {
 774 |    "cell_type": "code",
 775 |    "execution_count": 18,
 776 |    "id": "060fb7bf",
 777 |    "metadata": {},
 778 |    "outputs": [
 779 |     {
 780 |      "name": "stdout",
 781 |      "output_type": "stream",
 782 |      "text": [
 783 |       "130\n"
 784 |      ]
 785 |     },
 786 |     {
 787 |      "data": {
 788 |       "text/html": [
 789 |        "<div>\n",
 790 |        "<style scoped>\n",
 791 |        "    .dataframe tbody tr th:only-of-type {\n",
 792 |        "        vertical-align: middle;\n",
 793 |        "    }\n",
 794 |        "\n",
 795 |        "    .dataframe tbody tr th {\n",
 796 |        "        vertical-align: top;\n",
 797 |        "    }\n",
 798 |        "\n",
 799 |        "    .dataframe thead th {\n",
 800 |        "        text-align: right;\n",
 801 |        "    }\n",
 802 |        "</style>\n",
 803 |        "<table border=\"1\" class=\"dataframe\">\n",
 804 |        "  <thead>\n",
 805 |        "    <tr style=\"text-align: right;\">\n",
 806 |        "      <th></th>\n",
 807 |        "      <th>id</th>\n",
 808 |        "      <th>path</th>\n",
 809 |        "      <th>cls</th>\n",
 810 |        "      <th>labels</th>\n",
 811 |        "      <th>bboxes</th>\n",
 812 |        "      <th>img</th>\n",
 813 |        "      <th>vector</th>\n",
 814 |        "    </tr>\n",
 815 |        "  </thead>\n",
 816 |        "  <tbody>\n",
 817 |        "    <tr>\n",
 818 |        "      <th>125</th>\n",
 819 |        "      <td>125</td>\n",
 820 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 821 |        "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 5]</td>\n",
 822 |        "      <td>[person, person, person, person, person, perso...</td>\n",
 823 |        "      <td>[[0.912320077419281, 0.5608879923820496, 0.017...</td>\n",
 824 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 825 |        "      <td>[-0.1441657, -0.18747765, -0.16826846, -0.1800...</td>\n",
 826 |        "    </tr>\n",
 827 |        "    <tr>\n",
 828 |        "      <th>126</th>\n",
 829 |        "      <td>126</td>\n",
 830 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 831 |        "      <td>[39, 39, 45, 62, 64, 73, 73, 73, 73, 73, 73, 7...</td>\n",
 832 |        "      <td>[bottle, bottle, bowl, tv, mouse, book, book, ...</td>\n",
 833 |        "      <td>[[0.18036000430583954, 0.8277199864387512, 0.0...</td>\n",
 834 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 835 |        "      <td>[-0.10117926, -0.09878854, 0.06405701, -0.0227...</td>\n",
 836 |        "    </tr>\n",
 837 |        "    <tr>\n",
 838 |        "      <th>127</th>\n",
 839 |        "      <td>127</td>\n",
 840 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 841 |        "      <td>[2, 15]</td>\n",
 842 |        "      <td>[car, cat]</td>\n",
 843 |        "      <td>[[0.5018590092658997, 0.8207259178161621, 0.99...</td>\n",
 844 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 845 |        "      <td>[-0.13692749, -0.14139369, 0.24011154, 0.07854...</td>\n",
 846 |        "    </tr>\n",
 847 |        "    <tr>\n",
 848 |        "      <th>128</th>\n",
 849 |        "      <td>128</td>\n",
 850 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 851 |        "      <td>[58, 75]</td>\n",
 852 |        "      <td>[potted plant, vase]</td>\n",
 853 |        "      <td>[[0.5192189812660217, 0.4511209726333618, 0.39...</td>\n",
 854 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 855 |        "      <td>[-0.1577483, -0.07757879, 0.1852913, 0.0405876...</td>\n",
 856 |        "    </tr>\n",
 857 |        "    <tr>\n",
 858 |        "      <th>129</th>\n",
 859 |        "      <td>129</td>\n",
 860 |        "      <td>/Users/ayushchaurasia/Documents/ultralytics/da...</td>\n",
 861 |        "      <td>[22]</td>\n",
 862 |        "      <td>[zebra]</td>\n",
 863 |        "      <td>[[0.3462109863758087, 0.4932590126991272, 0.68...</td>\n",
 864 |        "      <td>b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...</td>\n",
 865 |        "      <td>[-0.14650379, -0.2218233, -0.10506437, -0.1519...</td>\n",
 866 |        "    </tr>\n",
 867 |        "  </tbody>\n",
 868 |        "</table>\n",
 869 |        "</div>"
 870 |       ],
 871 |       "text/plain": [
 872 |        "      id                                               path  \\\n",
 873 |        "125  125  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 874 |        "126  126  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 875 |        "127  127  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 876 |        "128  128  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 877 |        "129  129  /Users/ayushchaurasia/Documents/ultralytics/da...   \n",
 878 |        "\n",
 879 |        "                                                   cls  \\\n",
 880 |        "125               [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 5]   \n",
 881 |        "126  [39, 39, 45, 62, 64, 73, 73, 73, 73, 73, 73, 7...   \n",
 882 |        "127                                            [2, 15]   \n",
 883 |        "128                                           [58, 75]   \n",
 884 |        "129                                               [22]   \n",
 885 |        "\n",
 886 |        "                                                labels  \\\n",
 887 |        "125  [person, person, person, person, person, perso...   \n",
 888 |        "126  [bottle, bottle, bowl, tv, mouse, book, book, ...   \n",
 889 |        "127                                         [car, cat]   \n",
 890 |        "128                               [potted plant, vase]   \n",
 891 |        "129                                            [zebra]   \n",
 892 |        "\n",
 893 |        "                                                bboxes  \\\n",
 894 |        "125  [[0.912320077419281, 0.5608879923820496, 0.017...   \n",
 895 |        "126  [[0.18036000430583954, 0.8277199864387512, 0.0...   \n",
 896 |        "127  [[0.5018590092658997, 0.8207259178161621, 0.99...   \n",
 897 |        "128  [[0.5192189812660217, 0.4511209726333618, 0.39...   \n",
 898 |        "129  [[0.3462109863758087, 0.4932590126991272, 0.68...   \n",
 899 |        "\n",
 900 |        "                                                   img  \\\n",
 901 |        "125  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 902 |        "126  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 903 |        "127  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 904 |        "128  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 905 |        "129  b'\\xff\\xd8\\xff\\xe0\\x00\\x10JFIF\\x00\\x01\\x01\\x00...   \n",
 906 |        "\n",
 907 |        "                                                vector  \n",
 908 |        "125  [-0.1441657, -0.18747765, -0.16826846, -0.1800...  \n",
 909 |        "126  [-0.10117926, -0.09878854, 0.06405701, -0.0227...  \n",
 910 |        "127  [-0.13692749, -0.14139369, 0.24011154, 0.07854...  \n",
 911 |        "128  [-0.1577483, -0.07757879, 0.1852913, 0.0405876...  \n",
 912 |        "129  [-0.14650379, -0.2218233, -0.10506437, -0.1519...  "
 913 |       ]
 914 |      },
 915 |      "execution_count": 18,
 916 |      "metadata": {},
 917 |      "output_type": "execute_result"
 918 |     }
 919 |    ],
 920 |    "source": [
 921 |     "print(len(coco_updated.table)) #should be updated\n",
 922 |     "coco_updated.table.to_pandas().tail()"
 923 |    ]
 924 |   },
 925 |   {
 926 |    "cell_type": "code",
 927 |    "execution_count": 4,
 928 |    "id": "6313c9a8",
 929 |    "metadata": {},
 930 |    "outputs": [
 931 |     {
 932 |      "name": "stderr",
 933 |      "output_type": "stream",
 934 |      "text": [
 935 |       "100%|██████████████████████████████████████████████████████████████████████████| 16551/16551 [11:06<00:00, 24.83it/s]\n"
 936 |      ]
 937 |     }
 938 |    ],
 939 |    "source": [
 940 |     "sim = voc_exp.get_similarity_index()"
 941 |    ]
 942 |   },
 943 |   {
 944 |    "cell_type": "code",
 945 |    "execution_count": 6,
 946 |    "id": "135a6953",
 947 |    "metadata": {},
 948 |    "outputs": [
 949 |     {
 950 |      "name": "stderr",
 951 |      "output_type": "stream",
 952 |      "text": [
 953 |       "100%|██████████████████████████████████████████████████████████████████████████| 16551/16551 [11:07<00:00, 24.81it/s]\n"
 954 |      ]
 955 |     },
 956 |     {
 957 |      "data": {
 958 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAGwCAYAAABIC3rIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA1yElEQVR4nO3de3RU1f3+8WdCyIVLEgJmkiiEKFaIgNwkxAv4LSnhUhGlVmyqESlYG6RcRKEVKKgFaQWrpVBdFnRJFalIW6RoDCAqMWA03E1RI6CQpDUmA3JJSPbvD3+ZMiRgRuZ+3q+1Zq3knD2Tz96zz8yTM+ecsRljjAAAACwszN8FAAAA+BuBCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWF64vwsIBvX19Tp8+LDatm0rm83m73IAAEAzGGN09OhRJScnKyzs/PuACETNcPjwYXXs2NHfZQAAgO/g0KFDuuSSS87bhkDUDG3btpX0zYDGxMT4uRoAANAcDodDHTt2dL6Pnw+BqBkaPiaLiYkhEAEAEGSac7gLB1UDAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADLIxABAADL82sg2rJli2688UYlJyfLZrNp7dq1LuuNMZo9e7aSkpIUHR2tzMxM7d+/36VNZWWlsrOzFRMTo7i4OI0bN07Hjh1zabNz505df/31ioqKUseOHbVw4UJvdw0AAAQRvwair7/+WldddZWWLFnS5PqFCxfqySef1LJly1RYWKjWrVsrKytLJ0+edLbJzs7Wnj17lJeXp3Xr1mnLli2aMGGCc73D4dCQIUOUkpKioqIi/e53v9NvfvMbPf30017vHwAACA42Y4zxdxGSZLPZ9Oqrr2rUqFGSvtk7lJycrGnTpun++++XJFVXV8tut2vFihUaM2aM9u3bp7S0NG3fvl39+vWTJG3YsEHDhw/X559/ruTkZC1dulS//vWvVVZWpoiICEnSjBkztHbtWn300UdN1nLq1CmdOnXK+bvD4VDHjh1VXV2tmJgYL44CAADwFIfDodjY2Ga9fwfsMUSlpaUqKytTZmamc1lsbKzS09NVUFAgSSooKFBcXJwzDElSZmamwsLCVFhY6GwzcOBAZxiSpKysLJWUlOirr75q8m/Pnz9fsbGxzlvHjh290UUAABAgAjYQlZWVSZLsdrvLcrvd7lxXVlamhIQEl/Xh4eGKj493adPUY5z5N842c+ZMVVdXO2+HDh268A4BAICAFe7vAgJRZGSkIiMj/V0GAADwkYDdQ5SYmChJKi8vd1leXl7uXJeYmKiKigqX9adPn1ZlZaVLm6Ye48y/AQAArC1gA1FqaqoSExOVn5/vXOZwOFRYWKiMjAxJUkZGhqqqqlRUVORss3HjRtXX1ys9Pd3ZZsuWLaqtrXW2ycvL0xVXXKF27dr5qDcAACCQ+TUQHTt2TMXFxSouLpb0zYHUxcXFOnjwoGw2myZPnqxHHnlE//jHP7Rr1y7deeedSk5Odp6J1q1bNw0dOlTjx4/Xtm3b9O6772rixIkaM2aMkpOTJUk/+clPFBERoXHjxmnPnj1atWqV/vCHP2jq1Kl+6jUAAAg4xo82bdpkJDW65eTkGGOMqa+vN7NmzTJ2u91ERkaawYMHm5KSEpfH+PLLL83tt99u2rRpY2JiYszYsWPN0aNHXdrs2LHDXHfddSYyMtJcfPHFZsGCBW7VWV1dbSSZ6urqC+ovAADwHXfevwPmOkSBzJ3rGAAAgMAQEtchAgAA8BUCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAOK/OM17zdwleRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWRyACAACWF9CBqK6uTrNmzVJqaqqio6N12WWX6eGHH5YxxtnGGKPZs2crKSlJ0dHRyszM1P79+10ep7KyUtnZ2YqJiVFcXJzGjRunY8eO+bo7AAAgQAV0IHrssce0dOlS/fGPf9S+ffv02GOPaeHChXrqqaecbRYuXKgnn3xSy5YtU2FhoVq3bq2srCydPHnS2SY7O1t79uxRXl6e1q1bpy1btmjChAn+6BIAAAhANnPm7pYA88Mf/lB2u13PPvusc9no0aMVHR2tF154QcYYJScna9q0abr//vslSdXV1bLb7VqxYoXGjBmjffv2KS0tTdu3b1e/fv0kSRs2bNDw4cP1+eefKzk5+VvrcDgcio2NVXV1tWJiYrzTWQAAAlTnGa/pswUj/F2G29x5/w7oPUTXXHON8vPz9e9//1uStGPHDr3zzjsaNmyYJKm0tFRlZWXKzMx03ic2Nlbp6ekqKCiQJBUUFCguLs4ZhiQpMzNTYWFhKiwsbPLvnjp1Sg6Hw+UGAABCV7i/CzifGTNmyOFwqGvXrmrRooXq6ur06KOPKjs7W5JUVlYmSbLb7S73s9vtznVlZWVKSEhwWR8eHq74+Hhnm7PNnz9fc+fO9XR3AABAgAroPUQvv/yyVq5cqb/+9a/64IMP9Nxzz+n3v/+9nnvuOa/+3ZkzZ6q6utp5O3TokFf/HgAA8K+A3kM0ffp0zZgxQ2PGjJEk9ejRQwcOHND8+fOVk5OjxMRESVJ5ebmSkpKc9ysvL1evXr0kSYmJiaqoqHB53NOnT6uystJ5/7NFRkYqMjLSCz0CAACBKKD3EB0/flxhYa4ltmjRQvX19ZKk1NRUJSYmKj8/37ne4XCosLBQGRkZkqSMjAxVVVWpqKjI2Wbjxo2qr69Xenq6D3oBAAACXUDvIbrxxhv16KOPqlOnTrryyiv14YcfatGiRbr77rslSTabTZMnT9Yjjzyiyy+/XKmpqZo1a5aSk5M1atQoSVK3bt00dOhQjR8/XsuWLVNtba0mTpyoMWPGNOsMMwAAEPoCOhA99dRTmjVrln7xi1+ooqJCycnJuueeezR79mxnmwceeEBff/21JkyYoKqqKl133XXasGGDoqKinG1WrlypiRMnavDgwQoLC9Po0aP15JNP+qNLAAAgAAX0dYgCBdchAgBYGdchAgAAsAACEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDy3A9Hzzz+vU6dONVpeU1Oj559/3iNFAQAA+JLbgWjs2LGqrq5utPzo0aMaO3asR4oCAADwJbcDkTFGNput0fLPP/9csbGxHikKAADAl8Kb27B3796y2Wyy2WwaPHiwwsP/d9e6ujqVlpZq6NChXikSAADAm5odiEaNGiVJKi4uVlZWltq0aeNcFxERoc6dO2v06NEeLxAAAMDbmh2I5syZI0nq3LmzbrvtNkVFRXmtKAAAAF9qdiBqkJOTI+mbs8oqKipUX1/vsr5Tp06eqQwAAMBH3A5E+/fv1913362tW7e6LG842Lqurs5jxQEAAPiC24HorrvuUnh4uNatW6ekpKQmzzgDAAAIJm4HouLiYhUVFalr167eqAcAAMDn3L4OUVpamv773/96oxYAAAC/cDsQPfbYY3rggQe0efNmffnll3I4HC43AACAYOP2R2aZmZmSpMGDB7ss56BqAAAQrNwORJs2bfJGHQAAAH7jdiAaNGiQN+oAAADwG7cD0ZYtW867fuDAgd+5GAAAAH9wOxDdcMMNjZadeS0ijiECAADBxu2zzL766iuXW0VFhTZs2KCrr75ab7zxhjdqBAAA8Cq39xDFxsY2WvaDH/xAERERmjp1qoqKijxSGAAAgK+4vYfoXOx2u0pKSjz1cAAAAD7j9h6inTt3uvxujNGRI0e0YMEC9erVy1N1AQAA+IzbgahXr16y2WwyxrgsHzBggP7yl794rDAAAABfcTsQlZaWuvweFhamiy66SFFRUR4rCgAAwJfcDkQpKSneqAMAAMBvvtNB1W+99ZZuvPFGdenSRV26dNHIkSP19ttve7o2AAAAn3A7EL3wwgvKzMxUq1atNGnSJE2aNEnR0dEaPHiw/vrXv3qjRgAAAK+ymbOPjv4W3bp104QJEzRlyhSX5YsWLdIzzzyjffv2ebTAQOBwOBQbG6vq6mrFxMT4uxwAAHyq84zX9NmCEf4uw23uvH+7vYfo008/1Y033tho+ciRIxsdcA0AABAM3A5EHTt2VH5+fqPlb775pjp27OiRogAAAHzJ7bPMpk2bpkmTJqm4uFjXXHONJOndd9/VihUr9Ic//MHjBQIAAHib24Ho3nvvVWJioh5//HG9/PLLkr45rmjVqlW66aabPF4gAACAt7kdiCTp5ptv1s033+zpWgAAAPzC7WOItm/frsLCwkbLCwsL9f7773ukKAAAAF9yOxDl5ubq0KFDjZZ/8cUXys3N9UhRAAAAvuR2INq7d6/69OnTaHnv3r21d+9ejxQFAADgS24HosjISJWXlzdafuTIEYWHf6dDks7riy++0E9/+lO1b99e0dHR6tGjh8tHc8YYzZ49W0lJSYqOjlZmZqb279/v8hiVlZXKzs5WTEyM4uLiNG7cOB07dszjtQIAgODkdiAaMmSIZs6cqerqaueyqqoq/epXv9IPfvADjxb31Vdf6dprr1XLli31r3/9S3v37tXjjz+udu3aOdssXLhQTz75pJYtW6bCwkK1bt1aWVlZOnnypLNNdna29uzZo7y8PK1bt05btmzRhAkTPForAAAIXm5/dccXX3yhgQMH6ssvv1Tv3r0lScXFxbLb7crLy/PoxRlnzJihd99995xfHGuMUXJysqZNm6b7779fklRdXS273a4VK1ZozJgx2rdvn9LS0rR9+3b169dPkrRhwwYNHz5cn3/+uZKTk7+1Dr66AwBgZXx1RxMuvvhi7dy5UwsXLlRaWpr69u2rP/zhD9q1a5fHr1T9j3/8Q/369dOtt96qhIQE9e7dW88884xzfWlpqcrKypSZmelcFhsbq/T0dBUUFEiSCgoKFBcX5wxDkpSZmamwsLAmz5aTpFOnTsnhcLjcAABA6PpOB/20bt3aJx85ffrpp1q6dKmmTp2qX/3qV9q+fbsmTZqkiIgI5eTkqKysTJJkt9td7me3253rysrKlJCQ4LI+PDxc8fHxzjZnmz9/vubOneuFHgEAgEDk9h4iX6qvr1efPn3029/+Vr1799aECRM0fvx4LVu2zKt/t+EYqYZbU5cZAAAAoSOgA1FSUpLS0tJclnXr1k0HDx6UJCUmJkpSo7PeysvLnesSExNVUVHhsv706dOqrKx0tjlbZGSkYmJiXG4AACB0BXQguvbaa1VSUuKy7N///rdSUlIkSampqUpMTFR+fr5zvcPhUGFhoTIyMiRJGRkZqqqqUlFRkbPNxo0bVV9fr/T0dB/0AgAA93Se8Zq/S7Acz184yIOmTJmia665Rr/97W/14x//WNu2bdPTTz+tp59+WpJks9k0efJkPfLII7r88suVmpqqWbNmKTk5WaNGjZL0zR6loUOHOj9qq62t1cSJEzVmzJhmnWEGAABCn9t7iHJycrRlyxZv1NLI1VdfrVdffVUvvviiunfvrocfflhPPPGEsrOznW0eeOAB3XfffZowYYKuvvpqHTt2TBs2bFBUVJSzzcqVK9W1a1cNHjxYw4cP13XXXecMVQAAAG5fh2jUqFFav369UlJSNHbsWOXk5Ojiiy/2Vn0BgesQAQB8KdCu+xNo9TSXV69DtHbtWn3xxRe69957tWrVKnXu3FnDhg3T3/72N9XW1n7nogEAAPzlOx1UfdFFF2nq1KnasWOHCgsL1aVLF91xxx1KTk7WlClTGn2XGAAAQCC7oLPMjhw5ory8POXl5alFixYaPny4du3apbS0NC1evNhTNQIAAHiV24GotrZWr7zyin74wx8qJSVFq1ev1uTJk3X48GE999xzevPNN/Xyyy9r3rx53qgXAADA49w+7T4pKUn19fW6/fbbtW3bNvXq1atRm//7v/9TXFycB8oDAADwPrcD0eLFi3Xrrbe6nNZ+tri4OJWWll5QYQAuXLCeGQIAvub2R2abNm1q8myyr7/+WnfffbdHigIAAPAltwPRc889pxMnTjRafuLECT3//PMeKQoAAMCXmv2RmcPhkDFGxhgdPXrU5SOzuro6rV+/XgkJCV4pEgAAwJuaHYji4uJks9lks9n0ve99r9F6m82muXPnerQ4AAAAX2h2INq0aZOMMfr+97+vV155RfHx8c51ERERSklJ4ctSAQBAUGp2IBo0aJAkqbS0VJ06dZLNZvNaUQAAAL7UrEC0c+dOde/eXWFhYaqurtauXbvO2bZnz54eKw4AAMAXmhWIevXqpbKyMiUkJKhXr16y2WwyxjRqZ7PZVFdX5/Eigaa4e40drskDADiXZgWi0tJSXXTRRc6fAQAAQkmzAlFKSoqkb77HbO7cuZo1a5ZSU1O9WhgAAICvuHVhxpYtW+qVV17xVi0AAAB+4faVqkeNGqW1a9d6oRQAAAD/cPvLXS+//HLNmzdP7777rvr27avWrVu7rJ80aZLHigMAAPAFtwPRs88+q7i4OBUVFamoqMhlnc1mIxABAICg43Yg4iwzAICvcdkMeJvbxxABAACEGrf3EEnS559/rn/84x86ePCgampqXNYtWrTII4UBAAD4ituBKD8/XyNHjtSll16qjz76SN27d9dnn30mY4z69OnjjRoBAAC8yu2PzGbOnKn7779fu3btUlRUlF555RUdOnRIgwYN0q233uqNGgEAALzK7UC0b98+3XnnnZKk8PBwnThxQm3atNG8efP02GOPebxAAAAAb3M7ELVu3dp53FBSUpI++eQT57r//ve/nqsMAADAR9w+hmjAgAF655131K1bNw0fPlzTpk3Trl27tGbNGg0YMMAbNQIAAHiV24Fo0aJFOnbsmCRp7ty5OnbsmFatWqXLL7+cM8wAAEBQcjsQXXrppc6fW7durWXLlnm0IAAAAF/jwowAAMDymrWHqF27drLZbM16wMrKygsqCAAAwNeaFYieeOIJL5cBAADgP80KRDk5Od6uAwAAwG+aFYgcDodiYmKcP59PQzsAAIBg0exjiI4cOaKEhATFxcU1eTyRMUY2m011dXUeLxIAGnSe8Zo+WzDC32UACDHNCkQbN25UfHy8JGnTpk1eLQgAAMDXmhWIBg0a1OTPAAAAocDtCzNK0smTJ7Vz505VVFSovr7eZd3IkSM9UhgQiPi4BgC+EWqvh24Hog0bNujOO+9s8otcOYYIAAAEI7evVH3ffffp1ltv1ZEjR1RfX+9yIwwBAIBg5HYgKi8v19SpU2W3271RDwAAgM+5HYh+9KMfafPmzV4oBQAAwD/cPoboj3/8o2699Va9/fbb6tGjh1q2bOmyftKkSR4rDgAAwBfcDkQvvvii3njjDUVFRWnz5s0uF2m02WwEIgAAEHTcDkS//vWvNXfuXM2YMUNhYW5/4gYAABBw3E40NTU1uu222whDAAAgZLidanJycrRq1Spv1AKEhM4zXvN3CT5nxT4DCC1uf2RWV1enhQsX6vXXX1fPnj0bHVS9aNEijxUHAADOL9SuGO0vbgeiXbt2qXfv3pKk3bt3u6w78wBrAACAYOF2IOLb7tEc/McCAAgmHBkNAAAsr1l7iG655RatWLFCMTExuuWWW87bds2aNR4pDAAAwFeaFYhiY2OdxwfFxsZ6tSAAAABfa1YgWr58eZM/AwCA8wumYyqDqVZPc/sYohMnTuj48ePO3w8cOKAnnnhCb7zxhkcLa8qCBQtks9k0efJk57KTJ08qNzdX7du3V5s2bTR69GiVl5e73O/gwYMaMWKEWrVqpYSEBE2fPl2nT5/2er0AAN/heli4EG4HoptuuknPP/+8JKmqqkr9+/fX448/rptuuklLly71eIENtm/frj//+c/q2bOny/IpU6bon//8p1avXq233npLhw8fdjnOqa6uTiNGjFBNTY22bt2q5557TitWrNDs2bO9VisAAAgubgeiDz74QNdff70k6W9/+5sSExN14MABPf/883ryySc9XqAkHTt2TNnZ2XrmmWfUrl075/Lq6mo9++yzWrRokb7//e+rb9++Wr58ubZu3ar33ntPkvTGG29o7969euGFF9SrVy8NGzZMDz/8sJYsWaKamhqv1AsgeLBXAaGOOd48bgei48ePq23btpK+CRu33HKLwsLCNGDAAB04cMDjBUpSbm6uRowYoczMTJflRUVFqq2tdVnetWtXderUSQUFBZKkgoIC9ejRQ3a73dkmKytLDodDe/bsafLvnTp1Sg6Hw+UGAJ7EmxQQWNwORF26dNHatWt16NAhvf766xoyZIgkqaKiQjExMR4v8KWXXtIHH3yg+fPnN1pXVlamiIgIxcXFuSy32+0qKytztjkzDDWsb1jXlPnz5ys2NtZ569ixowd6AgAAApXbgWj27Nm6//771blzZ6WnpysjI0PSN3uLGr7Sw1MOHTqkX/7yl1q5cqWioqI8+tjnM3PmTFVXVztvhw4d8tnfBgAAvuf2V3f86Ec/0nXXXacjR47oqquuci4fPHiwbr75Zo8WV1RUpIqKCvXp08e5rK6uTlu2bNEf//hHvf7666qpqVFVVZXLXqLy8nIlJiZKkhITE7Vt2zaXx204C62hzdkiIyMVGRnp0b4AAIDA9Z2+uiMxMVG9e/dWWNj/7t6/f3917drVY4VJ34SsXbt2qbi42Hnr16+fsrOznT+3bNlS+fn5zvuUlJTo4MGDzj1XGRkZ2rVrlyoqKpxt8vLyFBMTo7S0NI/WCwAAgpPbe4h8qW3bturevbvLstatW6t9+/bO5ePGjdPUqVMVHx+vmJgY3XfffcrIyNCAAQMkSUOGDFFaWpruuOMOLVy4UGVlZXrooYeUm5vLXiAAQcvKF9ALNDwXoSGgA1FzLF68WGFhYRo9erROnTqlrKws/elPf3Kub9GihdatW6d7771XGRkZat26tXJycjRv3jw/Vg0AAAJJ0AWizZs3u/weFRWlJUuWaMmSJee8T0pKitavX+/lygAAQLD6TscQAQAAhBICEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEQAAsDwCEUJO5xmv+bsEACGO15nQQyACAACWRyDCBeG/JAAILrxuN41ABAAALI9ABAAALI9ABABBho88EAoCbR4TiAAAfhFob4iwNgIRgIDAmyMAfyIQAQAAyyMQAQAAyyMQAQAAyyMQAQAAyyMQAQAAyyMQAUGMM7MAwDMIRAAAwPIIRAAAwPIIRAAAwPIIRPA5jnuxDp5rAMGCQAQAACyPQAQA34I9XUDoIxABQAggtAEXhkCEoMKLPgDAGwhEAADA8ghEgBewJwuAv/E65B4CEQAAsDwCEQD4AP+tA4GNQAQAACyPQAQAABqx2l5NAhEAwOes9maLwEcgAgAAlkcgAgAAlkcgAgAAlkcgAgAAlkcgAgDABziQPLARiAAAgOURiAAENP6rBuALBCIAfhWqgSdU+wWEKgIRACCoEDbhDQQiAABgeQEdiObPn6+rr75abdu2VUJCgkaNGqWSkhKXNidPnlRubq7at2+vNm3aaPTo0SovL3dpc/DgQY0YMUKtWrVSQkKCpk+frtOnT/uyKwAAIIAFdCB66623lJubq/fee095eXmqra3VkCFD9PXXXzvbTJkyRf/85z+1evVqvfXWWzp8+LBuueUW5/q6ujqNGDFCNTU12rp1q5577jmtWLFCs2fP9keXAABohI8B/S+gA9GGDRt011136corr9RVV12lFStW6ODBgyoqKpIkVVdX69lnn9WiRYv0/e9/X3379tXy5cu1detWvffee5KkN954Q3v37tULL7ygXr16adiwYXr44Ye1ZMkS1dTU+LN7AJrAGwM8hbkEdwR0IDpbdXW1JCk+Pl6SVFRUpNraWmVmZjrbdO3aVZ06dVJBQYEkqaCgQD169JDdbne2ycrKksPh0J49e5r8O6dOnZLD4XC5AbhwvEEBCFRBE4jq6+s1efJkXXvtterevbskqaysTBEREYqLi3Npa7fbVVZW5mxzZhhqWN+wrinz589XbGys89axY0cP9wYIHIQUXKjOM17z2zxi/sJTgiYQ5ebmavfu3XrppZe8/rdmzpyp6upq5+3QoUNe/5u+1JwXEE+1AQB4Fq+93hEUgWjixIlat26dNm3apEsuucS5PDExUTU1NaqqqnJpX15ersTERGebs886a/i9oc3ZIiMjFRMT43KD//EiAAQeT26XbOPwp4AORMYYTZw4Ua+++qo2btyo1NRUl/V9+/ZVy5YtlZ+f71xWUlKigwcPKiMjQ5KUkZGhXbt2qaKiwtkmLy9PMTExSktL801HfIwXFXgacwrBhPmK7yLc3wWcT25urv7617/q73//u9q2bes85ic2NlbR0dGKjY3VuHHjNHXqVMXHxysmJkb33XefMjIyNGDAAEnSkCFDlJaWpjvuuEMLFy5UWVmZHnroIeXm5ioyMtKf3QMAAAEioPcQLV26VNXV1brhhhuUlJTkvK1atcrZZvHixfrhD3+o0aNHa+DAgUpMTNSaNWuc61u0aKF169apRYsWysjI0E9/+lPdeeedmjdvnj+6hO+A//b8g3EHYCUBHYiMMU3e7rrrLmebqKgoLVmyRJWVlfr666+1Zs2aRscGpaSkaP369Tp+/Lj+85//6Pe//73CwwN659g5hcqblDf6ESpjg+AWCPMwEGoIFYyldQR0III1+fsFyN9/P9AxPgBCEYEIAAD4RCD/Q0UgAgAAlkcgAgAAlkcgClKBvNsxmDGuQHBi28WFIhABAYQXdWvx1fMdDPMqGGpEaCMQAQg4vDkC8DUCEXyCN7jQxXMLIBQQiAB8Z74MQwQvAN5EIAKAANMQ/giBgO8QiEIML6De54kxPvsxeN7gD8w7SMyDBgQiIMjw4uVdjK/vMeYIBAQiAD4Ram96odYfb2O8EOgIRICX8AbwP982FoEyVt+ljvPdx1P9CpTxAUIZgSiI8KIIf2DehR6eU88IpnE8V63u9iGY+uwuAhHcFsobBNCAeY7virkTnAhEOC82bACAFRCIAD8hbIYuntv/6TzjNcYDQTEHCEQB7NteSIJhggVDjc0RKv0A3MXch1UQiAA/88UbTlN/gzc6wH/Y/gIPgQhe4+lTmP0lEGsCcGECYbtuTg2BUOeFCpY+EIiAIBQsLzCBiLHD2bzxdTxonkAaNwIR8P8F0obpK566NgmCF8/1/zAW1kYgAgAEFIIJ/IFABAQY3gwAV2duE2wf8BYCUQDx5IbOi0bgudDnJNgvwYDAYMW5YpU+W6Wf3kIgAr6Dc/3HygsSENi8+Y9JKLJSfwlEIcBKE9Zd/h4bf//9QBZKYxNKfUHTeI5DH4EIAM6BN0HAOghEFsMLPIJpDoTCcXXBNN4I/Ocr0OsLZgSiEBcMG08w1OhrjElw+q7PG8834H8EIgQF3jAaY0wA62L79zwCEbyODfe7C9SxC9S64H++nBvMQ3gSgQhBjxdF+Foozzm+1yt0+eN5Caa5QCCyAC7o9+0Yh9AVbM9tsNUbaAJl/PiewOBDIELA4oXDPYyXNXnzebf6m7qv+2mVcQ1UBCI4hdLGGEp9AUKFFbfLUOhzKPShOQhEIcIbE9YXG4FVNjTgTE3N+0DeFgKltkCpA+cXrM8TgcjCgnXSAmg+tnOgeQhEQDPxxoJAxxz9H8bCt0JhvAlEQShYJ16w1n0+odinYGb1g4DPZtV+n0uwfVQZyC5k3AJ1zAlEASZQJ0qw43goV8FUKwD4AoEIbgmU01B5Q28+K46VFfvsa6E8xsHSt2CpM1gQiNAsbHjBh+cM+B+2B3wbAlGAYuMFfMtX2xzbtnVY5bkOlWOzCERB4rtMrrPv460JGggTPxBq8IXm9DOUn+dz4QtFL0wo9ilYXGiYCNZr0AUiApFFuROWQm3j4AsOvcvfffX3mwnOL1jGPNhfE4OhxkBDIEIjbEjBj+ewaYwLJP/PA398/xy+HYEoADGhPStYx5Nd4e7xZN9CeZwQnJiT3kcggqX46riqUMH4+I+njxfzx/FnzB//4zloPgIRPCqYNr5gP0bAG6za7+bw19jwnCBU5kCg94NAFAACfZLAc3iuPYexBOBJBCIEBN7czo3r4yAQBMpV6v0pEGvyFH9e0iNQEIgAeEyov2CGev++jdX772tWGu9A6CuBCPCxQNjwfcEXx2gF21gGW72AlVgqEC1ZskSdO3dWVFSU0tPTtW3bNn+XhADGm1fg4Ln4Hz5CDWzBMm5c1qMxywSiVatWaerUqZozZ44++OADXXXVVcrKylJFRYW/SwsZwb4xwDuYF57DWALeY5lAtGjRIo0fP15jx45VWlqali1bplatWukvf/mLv0sD0Ex8LQcAbwn3dwG+UFNTo6KiIs2cOdO5LCwsTJmZmSooKGjU/tSpUzp16pTz9+rqakmSw+HwSn31p47L4XCo/tTxJtc3d50vH+PsdqH0GOdqx2OEzvPXacpq7Z6bFbRj35zH6DRltVt/K1Cev7PXdZ/zutuPca52PEZgzwFvvMc2PKYx5tsbGwv44osvjCSzdetWl+XTp083/fv3b9R+zpw5RhI3bty4cePGLQRuhw4d+tasYIk9RO6aOXOmpk6d6vy9vr5elZWVat++vWw2m0f/lsPhUMeOHXXo0CHFxMR49LGDEePhivFwxXi4YjwaY0xcWX08jDE6evSokpOTv7WtJQJRhw4d1KJFC5WXl7ssLy8vV2JiYqP2kZGRioyMdFkWFxfnzRIVExNjycl6LoyHK8bDFePhivFojDFxZeXxiI2NbVY7SxxUHRERob59+yo/P9+5rL6+Xvn5+crIyPBjZQAAIBBYYg+RJE2dOlU5OTnq16+f+vfvryeeeEJff/21xo4d6+/SAACAn1kmEN122236z3/+o9mzZ6usrEy9evXShg0bZLfb/VpXZGSk5syZ0+gjOqtiPFwxHq4YD1eMR2OMiSvGo/lsxjTnXDQAAIDQZYljiAAAAM6HQAQAACyPQAQAACyPQAQAACyPQORHS5YsUefOnRUVFaX09HRt27bN3yV5xPz583X11Verbdu2SkhI0KhRo1RSUuLS5oYbbpDNZnO5/fznP3dpc/DgQY0YMUKtWrVSQkKCpk+frtOnT7u02bx5s/r06aPIyEh16dJFK1as8Hb33Pab3/ymUV+7du3qXH/y5Enl5uaqffv2atOmjUaPHt3oIqKhMhaS1Llz50bjYbPZlJubKyn058aWLVt04403Kjk5WTabTWvXrnVZb4zR7NmzlZSUpOjoaGVmZmr//v0ubSorK5Wdna2YmBjFxcVp3LhxOnbsmEubnTt36vrrr1dUVJQ6duyohQsXNqpl9erV6tq1q6KiotSjRw+tX7/e4/39Nucbj9raWj344IPq0aOHWrdureTkZN155506fPiwy2M0NacWLFjg0iYUxkOS7rrrrkZ9HTp0qEubUJofPuWRLwuD21566SUTERFh/vKXv5g9e/aY8ePHm7i4OFNeXu7v0i5YVlaWWb58udm9e7cpLi42w4cPN506dTLHjh1zthk0aJAZP368OXLkiPNWXV3tXH/69GnTvXt3k5mZaT788EOzfv1606FDBzNz5kxnm08//dS0atXKTJ061ezdu9c89dRTpkWLFmbDhg0+7e+3mTNnjrnyyitd+vqf//zHuf7nP/+56dixo8nPzzfvv/++GTBggLnmmmuc60NpLIwxpqKiwmUs8vLyjCSzadMmY0zoz43169ebX//612bNmjVGknn11Vdd1i9YsMDExsaatWvXmh07dpiRI0ea1NRUc+LECWeboUOHmquuusq899575u233zZdunQxt99+u3N9dXW1sdvtJjs72+zevdu8+OKLJjo62vz5z392tnn33XdNixYtzMKFC83evXvNQw89ZFq2bGl27drl9TE40/nGo6qqymRmZppVq1aZjz76yBQUFJj+/fubvn37ujxGSkqKmTdvnsucOfP1JlTGwxhjcnJyzNChQ136WllZ6dImlOaHLxGI/KR///4mNzfX+XtdXZ1JTk428+fP92NV3lFRUWEkmbfeesu5bNCgQeaXv/zlOe+zfv16ExYWZsrKypzLli5damJiYsypU6eMMcY88MAD5sorr3S532233WaysrI824ELNGfOHHPVVVc1ua6qqsq0bNnSrF692rls3759RpIpKCgwxoTWWDTll7/8pbnssstMfX29McZac+PsN7z6+nqTmJhofve73zmXVVVVmcjISPPiiy8aY4zZu3evkWS2b9/ubPOvf/3L2Gw288UXXxhjjPnTn/5k2rVr5xwPY4x58MEHzRVXXOH8/cc//rEZMWKESz3p6enmnnvu8Wgf3dFUADjbtm3bjCRz4MAB57KUlBSzePHic94nlMYjJyfH3HTTTee8TyjPD2/jIzM/qKmpUVFRkTIzM53LwsLClJmZqYKCAj9W5h3V1dWSpPj4eJflK1euVIcOHdS9e3fNnDlTx48fd64rKChQjx49XC6cmZWVJYfDoT179jjbnDmGDW0CcQz379+v5ORkXXrppcrOztbBgwclSUVFRaqtrXXpR9euXdWpUydnP0JtLM5UU1OjF154QXfffbfLFydbaW6cqbS0VGVlZS61x8bGKj093WU+xMXFqV+/fs42mZmZCgsLU2FhobPNwIEDFRER4WyTlZWlkpISffXVV842wThG1dXVstlsjb5fcsGCBWrfvr169+6t3/3udy4foYbaeGzevFkJCQm64oordO+99+rLL790rrP6/LgQlrlSdSD573//q7q6ukZXybbb7froo4/8VJV31NfXa/Lkybr22mvVvXt35/Kf/OQnSklJUXJysnbu3KkHH3xQJSUlWrNmjSSprKysyfFpWHe+Ng6HQydOnFB0dLQ3u9Zs6enpWrFiha644godOXJEc+fO1fXXX6/du3errKxMERERjV7c7Xb7t/azYd352gTaWJxt7dq1qqqq0l133eVcZqW5cbaG+puq/cy+JSQkuKwPDw9XfHy8S5vU1NRGj9Gwrl27ducco4bHCEQnT57Ugw8+qNtvv93li0onTZqkPn36KD4+Xlu3btXMmTN15MgRLVq0SFJojcfQoUN1yy23KDU1VZ988ol+9atfadiwYSooKFCLFi0sPT8uFIEIXpWbm6vdu3frnXfecVk+YcIE5889evRQUlKSBg8erE8++USXXXaZr8v0qmHDhjl/7tmzp9LT05WSkqKXX345YN+YfeXZZ5/VsGHDlJyc7FxmpbmB5qutrdWPf/xjGWO0dOlSl3VTp051/tyzZ09FRETonnvu0fz580PuKyvGjBnj/LlHjx7q2bOnLrvsMm3evFmDBw/2Y2XBj4/M/KBDhw5q0aJFozOJysvLlZiY6KeqPG/ixIlat26dNm3apEsuueS8bdPT0yVJH3/8sSQpMTGxyfFpWHe+NjExMQEdNOLi4vS9731PH3/8sRITE1VTU6OqqiqXNmfOhVAdiwMHDujNN9/Uz372s/O2s9LcaKj/fK8NiYmJqqiocFl/+vRpVVZWemTOBOJrUEMYOnDggPLy8lz2DjUlPT1dp0+f1meffSYp9MbjTJdeeqk6dOjgsn1YbX54CoHIDyIiItS3b1/l5+c7l9XX1ys/P18ZGRl+rMwzjDGaOHGiXn31VW3cuLHRrtmmFBcXS5KSkpIkSRkZGdq1a5fLht3wQpiWluZsc+YYNrQJ9DE8duyYPvnkEyUlJalv375q2bKlSz9KSkp08OBBZz9CdSyWL1+uhIQEjRgx4rztrDQ3UlNTlZiY6FK7w+FQYWGhy3yoqqpSUVGRs83GjRtVX1/vDI8ZGRnasmWLamtrnW3y8vJ0xRVXqF27ds42wTBGDWFo//79evPNN9W+fftvvU9xcbHCwsKcHx2F0nic7fPPP9eXX37psn1YaX54lL+P6raql156yURGRpoVK1aYvXv3mgkTJpi4uDiXM2eC1b333mtiY2PN5s2bXU4NPX78uDHGmI8//tjMmzfPvP/++6a0tNT8/e9/N5deeqkZOHCg8zEaTq0eMmSIKS4uNhs2bDAXXXRRk6dWT58+3ezbt88sWbIkYE6tPtO0adPM5s2bTWlpqXn33XdNZmam6dChg6moqDDGfHPafadOnczGjRvN+++/bzIyMkxGRobz/qE0Fg3q6upMp06dzIMPPuiy3Apz4+jRo+bDDz80H374oZFkFi1aZD788EPnWVMLFiwwcXFx5u9//7vZuXOnuemmm5o87b53796msLDQvPPOO+byyy93Oa26qqrK2O12c8cdd5jdu3ebl156ybRq1arRadXh4eHm97//vdm3b5+ZM2eOX06rPt941NTUmJEjR5pLLrnEFBcXu7yeNJwhtXXrVrN48WJTXFxsPvnkE/PCCy+Yiy66yNx5550hNx5Hjx41999/vykoKDClpaXmzTffNH369DGXX365OXnypPMxQml++BKByI+eeuop06lTJxMREWH69+9v3nvvPX+X5BGSmrwtX77cGGPMwYMHzcCBA018fLyJjIw0Xbp0MdOnT3e51owxxnz22Wdm2LBhJjo62nTo0MFMmzbN1NbWurTZtGmT6dWrl4mIiDCXXnqp828Ekttuu80kJSWZiIgIc/HFF5vbbrvNfPzxx871J06cML/4xS9Mu3btTKtWrczNN99sjhw54vIYoTIWDV5//XUjyZSUlLgst8Lc2LRpU5PbR05OjjHmm1PvZ82aZex2u4mMjDSDBw9uNE5ffvmluf32202bNm1MTEyMGTt2rDl69KhLmx07dpjrrrvOREZGmosvvtgsWLCgUS0vv/yy+d73vmciIiLMlVdeaV577TWv9ftczjcepaWl53w9abhuVVFRkUlPTzexsbEmKirKdOvWzfz2t791CQjGhMZ4HD9+3AwZMsRcdNFFpmXLliYlJcWMHz++0T/SoTQ/fMlmjDE+2BEFAAAQsDiGCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCAAAWB6BCEBIu+GGGzR58uRzru/cubOeeOIJn9UDIDCF+7sAAPCmNWvWqGXLlv4uA0CAIxABCGnx8fH+LgFAEOAjMwAh7cyPzCoqKnTjjTcqOjpaqampWrlypUvbzZs3KyIiQm+//bZz2cKFC5WQkKDy8nJflg3Ax9hDBMAy7rrrLh0+fFibNm1Sy5YtNWnSJFVUVDjXN4SnO+64Qzt27NCnn36qWbNmafXq1bLb7X6sHIC3EYgAWMK///1v/etf/9K2bdt09dVXS5KeffZZdevWzaXdI488ory8PE2YMEG7d+9WTk6ORo4c6Y+SAfgQgQiAJezbt0/h4eHq27evc1nXrl0VFxfn0i4iIkIrV65Uz549lZKSosWLF/u4UgD+wDFEAHCWrVu3SpIqKytVWVnp52oA+AKBCIAldO3aVadPn1ZRUZFzWUlJiaqqqlzaffLJJ5oyZYqeeeYZpaenKycnR/X19T6uFoCvEYgAWMIVV1yhoUOH6p577lFhYaGKior0s5/9TNHR0c42dXV1+ulPf6qsrCyNHTtWy5cv186dO/X444/7sXIAvkAgAmAZy5cvV3JysgYNGqRbbrlFEyZMUEJCgnP9o48+qgMHDujPf/6zJCkpKUlPP/20HnroIe3YscNfZQPwAZsxxvi7CAAAAH9iDxEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALA8AhEAALC8/wct0sTYYZ2urgAAAABJRU5ErkJggg==",
 959 |       "text/plain": [
 960 |        "<Figure size 640x480 with 1 Axes>"
 961 |       ]
 962 |      },
 963 |      "metadata": {},
 964 |      "output_type": "display_data"
 965 |     }
 966 |    ],
 967 |    "source": [
 968 |     "voc_exp.plot_similirity_index()"
 969 |    ]
 970 |   },
 971 |   {
 972 |    "cell_type": "code",
 973 |    "execution_count": null,
 974 |    "id": "731cad56",
 975 |    "metadata": {},
 976 |    "outputs": [],
 977 |    "source": [
 978 |     "sim.where(sim>500)"
 979 |    ]
 980 |   },
 981 |   {
 982 |    "cell_type": "code",
 983 |    "execution_count": null,
 984 |    "id": "91122255",
 985 |    "metadata": {},
 986 |    "outputs": [],
 987 |    "source": []
 988 |   }
 989 |  ],
 990 |  "metadata": {
 991 |   "kernelspec": {
 992 |    "display_name": "Python 3 (ipykernel)",
 993 |    "language": "python",
 994 |    "name": "python3"
 995 |   },
 996 |   "language_info": {
 997 |    "codemirror_mode": {
 998 |     "name": "ipython",
 999 |     "version": 3
1000 |    },
1001 |    "file_extension": ".py",
1002 |    "mimetype": "text/x-python",
1003 |    "name": "python",
1004 |    "nbconvert_exporter": "python",
1005 |    "pygments_lexer": "ipython3",
1006 |    "version": "3.11.4"
1007 |   }
1008 |  },
1009 |  "nbformat": 4,
1010 |  "nbformat_minor": 5
1011 | }
1012 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.ruff]
 2 | # Enable the pycodestyle (`E`) and Pyflakes (`F`) rules by default.
 3 | # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
 4 | # McCabe complexity (`C901`) by default.
 5 | select = ["E9", "F63", "F7", "F82"]
 6 | ignore = []
 7 | 
 8 | # Allow autofix for all enabled rules (when `--fix`) is provided.
 9 | fixable = ["ALL"]
10 | unfixable = []
11 | 
12 | # Exclude a variety of commonly ignored directories.
13 | exclude = [
14 |     ".bzr",
15 |     ".direnv",
16 |     ".eggs",
17 |     ".git",
18 |     ".git-rewrite",
19 |     ".hg",
20 |     ".mypy_cache",
21 |     ".nox",
22 |     ".pants.d",
23 |     ".pytype",
24 |     ".ruff_cache",
25 |     ".svn",
26 |     ".tox",
27 |     ".venv",
28 |     "__pypackages__",
29 |     "_build",
30 |     "buck-out",
31 |     "build",
32 |     "dist",
33 |     "node_modules",
34 |     "venv",
35 | ]
36 | per-file-ignores = {}
37 | line-length = 120
38 | 
39 | # Allow unused variables when underscore-prefixed.
40 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
41 | 
42 | # Assume Python 3.8
43 | target-version = "py38"
44 | 
45 | [tool.black]
46 | line-length = 120
47 | include = '\.pyi?$'
48 | exclude = '''
49 | /(
50 |     \.git
51 |   | \.hg
52 |   | \.mypy_cache
53 |   | \.tox
54 |   | \.venv
55 |   | _build
56 |   | buck-out
57 |   | build
58 |   | dist
59 | )/
60 | '''
61 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | lancedb
2 | duckdb
3 | scikit-learn
4 | streamlit
5 | plotly
6 | ultralytics
7 | pandas==2.0.3
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from setuptools import find_packages, setup
 3 | 
 4 | # import pkg_resources as pkg
 5 | 
 6 | # Settings
 7 | FILE = Path(__file__).resolve()
 8 | PARENT = FILE.parent  # root directory
 9 | README = (PARENT / "README.md").read_text(encoding="utf-8")
10 | REQUIREMENTS = (PARENT / "requirements.txt").read_text(encoding="utf-8").splitlines()
11 | 
12 | 
13 | def get_version():
14 |     return "0.0.1"
15 | 
16 | 
17 | setup(
18 |     name="yoloexplorer",
19 |     version=get_version(),
20 |     python_requires=">=3.8",
21 |     description="",
22 |     # long_description=README,
23 |     install_requires=REQUIREMENTS,
24 |     long_description_content_type="text/markdown",
25 |     author="dev@lance",
26 |     author_email="contact@lancedb.com",
27 |     packages=find_packages(),  # required
28 |     package_data={"yoloexplorer": ["frontend/streamlit_dash/frontend/**"]},
29 |     include_package_data=True,
30 | )
31 | 


--------------------------------------------------------------------------------
/tests/test_explorer.py:
--------------------------------------------------------------------------------
 1 | from yoloexplorer import Explorer
 2 | 
 3 | 
 4 | class TestExplorer:
 5 |     def test_embeddings_creation(self):
 6 |         coco_exp = Explorer("coco8.yaml")
 7 |         coco_exp.build_embeddings(force=True)
 8 |         assert coco_exp.table_name == "coco8.yaml", "the table name should be coco8.yaml"
 9 |         assert len(coco_exp.table) == 4, "the length of the embeddings table should be 8"
10 | 
11 |     def test_sim_idx(self):
12 |         coco_exp = Explorer("coco8.yaml")
13 |         coco_exp.build_embeddings()
14 | 
15 |         idx = coco_exp.get_similarity_index(0, 1)  # get all imgs
16 |         assert len(idx) == 4, "the length of the similar index should be 8"
17 | 
18 |     def test_operations(self):
19 |         coco_exp = Explorer("coco8.yaml")
20 |         coco_exp.build_embeddings("yolov8n.pt")
21 | 
22 |         sim = coco_exp.get_similarity_index()
23 |         assert sim.shape[0] == 4, "the length of the embeddings table should be 1"
24 | 
25 |         _, ids = coco_exp.get_similar_imgs(3, 10)
26 |         coco_exp.remove_imgs(ids[0])
27 |         coco_exp.reset()
28 |         coco_exp.log_status()
29 |         coco_exp.remove_imgs([0, 1])
30 |         coco_exp.remove_imgs([0])
31 |         assert len(coco_exp.table.to_arrow()) == 1, "the length of the embeddings table should be 1"
32 |         coco_exp.persist()
33 |         assert len(coco_exp.table.to_arrow()) == 1, "the length of the embeddings table should be 1"
34 | 
35 |     def test_add_imgs(self):
36 |         coco_exp = Explorer("coco8.yaml")
37 |         coco_exp.build_embeddings()
38 |         coco128_exp = Explorer("coco128.yaml")
39 |         coco128_exp.build_embeddings()
40 | 
41 |         coco_exp.add_imgs(coco128_exp, [i for i in range(4)])
42 |         assert len(coco_exp.table) == 8, "the length of the embeddings table should be 8"
43 | 
44 |     def test_sql(self):
45 |         coco_exp = Explorer("coco8.yaml")
46 |         coco_exp.build_embeddings()
47 |         result = coco_exp.sql("SELECT id FROM 'table' LIMIT 2")
48 | 
49 |         assert result["id"].to_list() == [
50 |             0,
51 |             1,
52 |         ], f'the result of the sql query should be [0,1] found {result["id"].to_list}'
53 | 
54 |     def test_id_reassignment(self):
55 |         coco_exp = Explorer("coco128.yaml")
56 |         coco_exp.build_embeddings(force=True)
57 | 
58 |         coco8_exp = Explorer("coco8.yaml")
59 |         coco8_exp.build_embeddings(force=True)
60 |         # test removal
61 |         for i in range(4):
62 |             coco_exp.remove_imgs([i])
63 |             df = coco_exp.table.to_pandas()
64 |             assert df["id"].to_list() == [idx for idx in range(len(df))], "the ids should be reassigned"
65 | 
66 |         # test addition
67 |         coco_exp.add_imgs(coco8_exp, [i for i in range(4)])
68 |         df = coco_exp.table.to_pandas()
69 |         assert df["id"].to_list() == [idx for idx in range(len(df))], "the ids should be reassigned"
70 | 
71 |         # test reset
72 |         coco_exp.reset()
73 |         df = coco_exp.table.to_pandas()
74 |         assert df["id"].to_list() == [idx for idx in range(128)], "the ids should be reassigned"
75 | 
76 |     def test_sim_search(self):
77 |         coco_exp = Explorer("coco8.yaml")
78 |         coco_exp.build_embeddings()
79 |         coco_exp.get_similar_imgs(0, 10)
80 | 
81 |         paths = coco_exp.table.to_pandas()["path"].to_list()
82 |         coco_exp.get_similar_imgs(paths, 10)
83 | 
84 |     """
85 |     # Not supported yet
86 |     def test_copy_embeddings_from_table(self):
87 |         project = 'runs/test/temp/'
88 |         ds = Explorer('coco8.yaml', project=project)
89 |         ds.build_embeddings()
90 | 
91 |         table = project + ds.table_name + '.lance'
92 |         ds2 = Explorer(table=table)
93 |         assert ds2.table_name == 'coco8.yaml', 'the table name should be coco8.yaml'
94 |     """
95 | 


--------------------------------------------------------------------------------
/yoloexplorer/__init__.py:
--------------------------------------------------------------------------------
1 | from .explorer import Explorer
2 | from .frontend import datasets
3 | from . import config
4 | 
5 | __all__ = ["Explorer", "datasets", "config"]
6 | 


--------------------------------------------------------------------------------
/yoloexplorer/assets/docs/dash_intro.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/assets/docs/dash_intro.gif


--------------------------------------------------------------------------------
/yoloexplorer/assets/docs/intro.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/assets/docs/intro.gif


--------------------------------------------------------------------------------
/yoloexplorer/assets/docs/plotting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/assets/docs/plotting.png


--------------------------------------------------------------------------------
/yoloexplorer/assets/docs/sim_index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/assets/docs/sim_index.png


--------------------------------------------------------------------------------
/yoloexplorer/assets/docs/sim_plotting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/assets/docs/sim_plotting.png


--------------------------------------------------------------------------------
/yoloexplorer/config.py:
--------------------------------------------------------------------------------
1 | TEMP_CONFIG_PATH = ".config/last_config.json"
2 | 


--------------------------------------------------------------------------------
/yoloexplorer/dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import numpy as np
 4 | 
 5 | from ultralytics.data.dataset import YOLODataset
 6 | from ultralytics.data.augment import Format
 7 | from ultralytics.data.utils import check_det_dataset
 8 | 
 9 | 
10 | def get_dataset_info(data="coco128.yaml", task="detect"):
11 |     # TODO: handle other tasks
12 |     data = check_det_dataset(data)
13 | 
14 |     return data
15 | 
16 | 
17 | def get_relative_path(path1, path2):
18 |     """Gets the relative path of `path1` to `path2`.
19 | 
20 |     Args:
21 |     path1: The absolute path of the first file.
22 |     path2: The absolute path of the second file.
23 | 
24 |     Returns:
25 |     The relative path of `path1` to `path2`.
26 |     """
27 | 
28 |     relative_path = os.path.relpath(path1, os.path.dirname(path2))
29 | 
30 |     return relative_path
31 | 
32 | 
33 | class Dataset(YOLODataset):
34 |     def __init__(self, *args, data=None, **kwargs):
35 |         super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)
36 | 
37 |     # NOTE: Load the image directly without any resize operations.
38 |     def load_image(self, i):
39 |         """Loads 1 image from dataset index 'i', returns (im, resized hw)."""
40 |         im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i]
41 |         if im is None:  # not cached in RAM
42 |             if fn.exists():  # load npy
43 |                 im = np.load(fn)
44 |             else:  # read image
45 |                 im = cv2.imread(f)  # BGR
46 |                 if im is None:
47 |                     raise FileNotFoundError(f"Image Not Found {f}")
48 |             h0, w0 = im.shape[:2]  # orig hw
49 |             return im, (h0, w0), im.shape[:2]
50 | 
51 |         return self.ims[i], self.im_hw0[i], self.im_hw[i]
52 | 
53 |     def build_transforms(self, hyp=None):
54 |         transforms = Format(
55 |             bbox_format="xyxy",
56 |             normalize=False,
57 |             return_mask=self.use_segments,
58 |             return_keypoint=self.use_keypoints,
59 |             batch_idx=True,
60 |             mask_ratio=hyp.mask_ratio,
61 |             mask_overlap=hyp.overlap_mask,
62 |         )
63 |         return transforms
64 | 


--------------------------------------------------------------------------------
/yoloexplorer/explorer.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from collections import defaultdict
  3 | import math
  4 | import json
  5 | 
  6 | import pandas as pd
  7 | import cv2
  8 | import duckdb
  9 | import matplotlib.pyplot as plt
 10 | import numpy as np
 11 | import yaml
 12 | from tqdm import tqdm
 13 | from ultralytics.utils import LOGGER, colorstr
 14 | from ultralytics.utils.plotting import Annotator, colors
 15 | from torch import Tensor
 16 | import lancedb
 17 | import pyarrow as pa
 18 | from lancedb.embeddings import with_embeddings
 19 | from sklearn.decomposition import PCA
 20 | 
 21 | from yoloexplorer.dataset import get_dataset_info, Dataset
 22 | from yoloexplorer.frontend import launch
 23 | from yoloexplorer.config import TEMP_CONFIG_PATH
 24 | 
 25 | import torch
 26 | import torchvision.models as models
 27 | from torchvision import datasets, transforms
 28 | from PIL import Image
 29 | import sys
 30 | 
 31 | SCHEMA = [
 32 |     "id",
 33 |     # "img", # Make this optional; disabled by default. Not feasible unless we can have row_id/primary key to index
 34 |     "path",
 35 |     "cls",
 36 |     "labels",
 37 |     "bboxes",
 38 |     "segments",
 39 |     "keypoints",
 40 |     "meta",
 41 | ]  # + "vector" with embeddings
 42 | 
 43 | 
 44 | def encode(img_path):
 45 |     img = cv2.imread(img_path)
 46 |     ext = Path(img_path).suffix
 47 |     img_encoded = cv2.imencode(ext, img)[1].tobytes()
 48 | 
 49 |     return img_encoded
 50 | 
 51 | 
 52 | def decode(img_encoded):
 53 |     nparr = np.frombuffer(img_encoded, np.byte)
 54 |     img = cv2.imdecode(nparr, cv2.IMREAD_ANYCOLOR)
 55 | 
 56 |     return img
 57 | 
 58 | 
 59 | class Explorer:
 60 |     """
 61 |     Dataset explorer
 62 |     """
 63 | 
 64 |     def __init__(self, data, device="", model="resnet18", batch_size=64, project="run") -> None:
 65 |         """
 66 |         Args:
 67 |             data (str, optional): path to dataset file
 68 |             table (str, optional): path to LanceDB table to load embeddings Table from.
 69 |             model (str, optional): path to model. Defaults to None.
 70 |             device (str, optional): device to use. Defaults to ''. If empty, uses the default device.
 71 |             project (str, optional): path to project. Defaults to "runs/dataset".
 72 |         """
 73 |         self.data = data
 74 |         self.table = None
 75 |         self.model = model
 76 |         self.device = device
 77 |         self.batch_size = batch_size
 78 |         self.project = project
 79 |         self.dataset_info = None
 80 |         self.predictor = None
 81 |         self.trainset = None
 82 |         self.removed_img_count = 0
 83 |         self.verbose = False  # For embedding function
 84 |         self._sim_index = None
 85 |         self.version = None
 86 | 
 87 |         self.table_name = Path(data).name
 88 |         self.temp_table_name = self.table_name + "_temp"
 89 | 
 90 |         self.model_arch_supported = [
 91 |             "resnet18",
 92 |             "resnet50",
 93 |             "efficientnet_b0",
 94 |             "efficientnet_v2_s",
 95 |             "googlenet",
 96 |             "mobilenet_v3_small",
 97 |         ]
 98 | 
 99 |         if model:
100 |             self.predictor = self._setup_predictor(model, device)
101 |         if data:
102 |             self.dataset_info = get_dataset_info(self.data)
103 | 
104 |         self.transform = transforms.Compose(
105 |             [
106 |                 transforms.Resize((224, 224)),
107 |                 transforms.ToTensor(),
108 |             ]
109 |         )
110 | 
111 |     def build_embeddings(self, verbose=False, force=False, store_imgs=False):
112 |         """
113 |         Builds the dataset in LanceDB table format
114 | 
115 |         Args:
116 |             batch (int, optional): batch size. Defaults to 1000.
117 |             verbose (bool, optional): verbose. Defaults to False.
118 |             force (bool, optional): force rebuild. Defaults to False.
119 |         """
120 |         trainset = self.dataset_info["train"]
121 |         trainset = trainset if isinstance(trainset, list) else [trainset]
122 |         self.trainset = trainset
123 |         self.verbose = verbose
124 | 
125 |         dataset = Dataset(img_path=trainset, data=self.dataset_info, augment=False, cache=False)
126 |         batch_size = self.batch_size  # TODO: fix this hardcoding
127 |         db = self._connect()
128 |         if not force and self.table_name in db.table_names():
129 |             LOGGER.info("LanceDB embedding space already exists. Attempting to reuse it. Use force=True to overwrite.")
130 |             self.table = self._open_table(self.table_name)
131 |             self.version = self.table.version
132 |             if len(self.table) == dataset.ni:
133 |                 return
134 |             else:
135 |                 self.table = None
136 |                 LOGGER.info("Table length does not match the number of images in the dataset. Building embeddings...")
137 | 
138 |         table_data = defaultdict(list)
139 |         for idx, batch in enumerate(dataset):
140 |             batch["id"] = idx
141 |             batch["cls"] = batch["cls"].flatten().int().tolist()
142 |             box_cls_pair = sorted(zip(batch["bboxes"].tolist(), batch["cls"]), key=lambda x: x[1])
143 |             batch["bboxes"] = [box for box, _ in box_cls_pair]
144 |             batch["cls"] = [cls for _, cls in box_cls_pair]
145 |             batch["labels"] = [self.dataset_info["names"][i] for i in batch["cls"]]
146 |             batch["path"] = batch["im_file"]
147 |             # batch["cls"] = batch["cls"].tolist()
148 |             keys = (key for key in SCHEMA if key in batch)
149 |             for key in keys:
150 |                 val = batch[key]
151 |                 if isinstance(val, Tensor):
152 |                     val = val.tolist()
153 |                 table_data[key].append(val)
154 | 
155 |             table_data["img"].append(encode(batch["im_file"])) if store_imgs else None
156 | 
157 |             if len(table_data[key]) == batch_size or idx == dataset.ni - 1:
158 |                 df = pd.DataFrame(table_data)
159 |                 df = with_embeddings(self._embedding_func, df, "path", batch_size=batch_size)
160 |                 if self.table:
161 |                     self.table.add(df)
162 |                 else:
163 |                     self.table = self._create_table(self.table_name, data=df, mode="overwrite")
164 |                 self.version = self.table.version
165 |                 table_data = defaultdict(list)
166 | 
167 |         LOGGER.info(f'{colorstr("LanceDB:")} Embedding space built successfully.')
168 | 
169 |     def plot_embeddings(self):
170 |         """
171 |         Projects the embedding space to 2D using PCA
172 | 
173 |         Args:
174 |             n_components (int, optional): number of components. Defaults to 2.
175 |         """
176 |         if self.table is None:
177 |             LOGGER.error("No embedding space found. Please build the embedding space first.")
178 |             return None
179 |         pca = PCA(n_components=2)
180 |         embeddings = np.array(self.table.to_arrow()["vector"].to_pylist())
181 |         embeddings = pca.fit_transform(embeddings)
182 |         plt.scatter(embeddings[:, 0], embeddings[:, 1])
183 |         plt.show()
184 | 
185 |     def get_similar_imgs(self, img, n=10):
186 |         """
187 |         Returns the n most similar images to the given image
188 | 
189 |         Args:
190 |             img (int, str, Path): index of image in the table, or path to image
191 |             n (int, optional): number of similar images to return. Defaults to 10.
192 | 
193 |         Returns:
194 |             tuple: (list of paths, list of ids)
195 |         """
196 |         embeddings = None
197 |         if self.table is None:
198 |             LOGGER.error("No embedding space found. Please build the embedding space first.")
199 |             return None
200 |         if isinstance(img, int):
201 |             embeddings = self.table.to_pandas()["vector"][img]
202 |         elif isinstance(img, (str, Path)):
203 |             img = img
204 |         elif isinstance(img, bytes):
205 |             img = decode(img)
206 |         elif isinstance(img, list):  # exceptional case for batch search from dash
207 |             df = self.table.to_pandas().set_index("path")
208 |             array = None
209 |             try:
210 |                 array = df.loc[img]["vector"].to_list()
211 |                 embeddings = np.array(array)
212 |             except KeyError:
213 |                 pass
214 |         else:
215 |             LOGGER.error("img should be index from the table(int), path of an image (str or Path), or bytes")
216 |             return
217 | 
218 |         if embeddings is None:
219 |             if isinstance(img, list):
220 |                 embeddings = np.array(
221 |                     [self.predictor(self._image_encode(i)).squeeze().cpu().detach().numpy() for i in img]
222 |                 )
223 |             else:
224 |                 embeddings = self.predictor(self._image_encode(img)).squeeze().cpu().detach().numpy()
225 | 
226 |         if len(embeddings.shape) > 1:
227 |             embeddings = np.mean(embeddings, axis=0)
228 | 
229 |         sim = self.table.search(embeddings).limit(n).to_df()
230 |         return sim["path"].to_list(), sim["id"].to_list()
231 | 
232 |     def plot_similar_imgs(self, img, n=10):
233 |         """
234 |         Plots the n most similar images to the given image
235 | 
236 |         Args:
237 |             img (int, str, Path): index of image in the table, or path to image.
238 |             n (int, optional): number of similar images to return. Defaults to 10.
239 |         """
240 |         _, ids = self.get_similar_imgs(img, n)
241 |         self.plot_imgs(ids)
242 | 
243 |     def plot_imgs(self, ids=None, query=None, labels=True):
244 |         if ids is None and query is None:
245 |             ValueError("ids or query must be provided")
246 | 
247 |         # Resize the images to the minimum and maximum width and height
248 |         resized_images = []
249 |         df = self.sql(query) if query else self.table.to_pandas().iloc[ids]
250 |         for _, row in df.iterrows():
251 |             img = cv2.imread(row["path"])
252 |             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
253 |             if labels:
254 |                 ann = Annotator(img)
255 |                 for box, label, cls in zip(row["bboxes"], row["labels"], row["cls"]):
256 |                     ann.box_label(box, label, color=colors(cls, True))
257 | 
258 |                 img = ann.result()
259 |             resized_images.append(img)
260 | 
261 |         if not resized_images:
262 |             LOGGER.error("No images found")
263 |             return
264 |         # Create a grid of the images
265 | 
266 |         cols = 10 if len(resized_images) > 10 else max(2, len(resized_images))
267 |         rows = max(1, math.ceil(len(resized_images) / cols))
268 |         fig, axes = plt.subplots(nrows=rows, ncols=cols)
269 |         fig.subplots_adjust(hspace=0, wspace=0)
270 |         for i, ax in enumerate(axes.ravel()):
271 |             if i < len(resized_images):
272 |                 ax.imshow(resized_images[i])
273 |             ax.axis("off")
274 |         # Display the grid of images
275 |         plt.show()
276 | 
277 |     def get_similarity_index(self, top_k=0.01, sim_thres=0.90, reduce=False, sorted=False):
278 |         """
279 | 
280 |         Args:
281 |             sim_thres (float, optional): Similarity threshold to set the minimum similarity. Defaults to 0.9.
282 |             top_k (float, optional): Top k fraction of the similar embeddings to apply the threshold on. Default 0.1.
283 |             dim (int, optional): Dimension of the reduced embedding space. Defaults to 256.
284 |             sorted (bool, optional): Sort the embeddings by similarity. Defaults to False.
285 |         Returns:
286 |             np.array: Similarity index
287 |         """
288 |         if self.table is None:
289 |             LOGGER.error("No embedding space found. Please build the embedding space first.")
290 |             return None
291 |         if top_k > 1.0:
292 |             LOGGER.warning("top_k should be between 0 and 1. Setting top_k to 1.0")
293 |             top_k = 1.0
294 |         if top_k < 0.0:
295 |             LOGGER.warning("top_k should be between 0 and 1. Setting top_k to 0.0")
296 |             top_k = 0.0
297 |         if sim_thres is not None:
298 |             if sim_thres > 1.0:
299 |                 LOGGER.warning("sim_thres should be between 0 and 1. Setting sim_thres to 1.0")
300 |                 sim_thres = 1.0
301 |             if sim_thres < 0.0:
302 |                 LOGGER.warning("sim_thres should be between 0 and 1. Setting sim_thres to 0.0")
303 |                 sim_thres = 0.0
304 |         embs = np.array(self.table.to_arrow()["vector"].to_pylist())
305 |         self._sim_index = np.zeros(len(embs))
306 |         limit = max(int(len(embs) * top_k), 1)
307 | 
308 |         # create a new table with reduced dimensionality to speedup the search
309 |         self._search_table = self.table
310 |         if reduce:
311 |             dim = min(256, embs.shape[1])  # TODO: make this configurable
312 |             pca = PCA(n_components=min(dim, len(embs)))
313 |             embs = pca.fit_transform(embs)
314 |             dim = embs.shape[1]
315 |             values = pa.array(embs.reshape(-1), type=pa.float32())
316 |             table_data = pa.FixedSizeListArray.from_arrays(values, dim)
317 |             table = pa.table([table_data, self.table.to_arrow()["id"]], names=["vector", "id"])
318 |             self._search_table = self._create_table("reduced_embs", data=table, mode="overwrite")
319 | 
320 |         # with multiprocessing.Pool() as pool: # multiprocessing doesn't do much. Need to revisit
321 |         #    list(tqdm(pool.imap(build_index, iterable)))
322 | 
323 |         for _, emb in enumerate(tqdm(embs)):
324 |             df = self._search_table.search(emb).metric("cosine").limit(limit).to_df()
325 |             if sim_thres is not None:
326 |                 df = df.query(f"_distance >= {1.0 - sim_thres}")
327 |             for idx in df["id"][1:]:
328 |                 self._sim_index[idx] += 1
329 |         self._drop_table("reduced_embs") if reduce else None
330 | 
331 |         return self._sim_index if not sorted else np.sort(self._sim_index)
332 | 
333 |     def plot_similarity_index(self, sim_thres=0.90, top_k=0.01, reduce=False, sorted=False):
334 |         """
335 |         Plots the similarity index
336 | 
337 |         Args:
338 |             threshold (float, optional): Similarity threshold to set the minimum similarity. Defaults to 0.9.
339 |             top_k (float, optional): Top k fraction of the similar embeddings to apply the threshold on. Default 0.1.
340 |             dim (int, optional): Dimension of the reduced embedding space. Defaults to 256.
341 |             sorted (bool, optional): Whether to sort the index or not. Defaults to False.
342 |         """
343 |         index = self.get_similarity_index(top_k, sim_thres, reduce)
344 |         if sorted:
345 |             index = np.sort(index)
346 |         plt.bar([i for i in range(len(index))], index)
347 |         plt.xlabel("idx")
348 |         plt.ylabel("similarity count")
349 |         plt.show()
350 | 
351 |     def remove_imgs(self, idxs):
352 |         """
353 |         Works on temporary table. To apply the changes to the main table, call `persist()`
354 | 
355 |         Args:
356 |             idxs (int or list): Index of the image to remove from the dataset.
357 |         """
358 |         if isinstance(idxs, int):
359 |             idxs = [idxs]
360 | 
361 |         pa_table = self.table.to_arrow()
362 |         mask = [True for _ in range(len(pa_table))]
363 |         for idx in idxs:
364 |             mask[idx] = False
365 | 
366 |         self.removed_img_count += len(idxs)
367 | 
368 |         table = pa_table.filter(mask)
369 |         ids = [i for i in range(len(table))]
370 |         table = table.set_column(0, "id", [ids])  # TODO: Revisit this. This is a hack to fix the ids==dix
371 |         self.table = self._create_table(self.temp_table_name, data=table, mode="overwrite")  # work on a temporary table
372 | 
373 |         self.log_status()
374 | 
375 |     def add_imgs(self, exp, idxs):
376 |         """
377 |         Works on temporary table. To apply the changes to the main table, call `persist()`
378 | 
379 |         Args:
380 |             data (pd.DataFrame or pa.Table): Table rows to add to the dataset.
381 |         """
382 |         table_df = self.table.to_pandas()
383 |         data = exp.table.to_pandas().iloc[idxs]
384 |         assert len(table_df["vector"].iloc[0]) == len(data["vector"].iloc[0]), "Vector dimension mismatch"
385 |         table_df = pd.concat([table_df, data], ignore_index=True)
386 |         ids = [i for i in range(len(table_df))]
387 |         table_df["id"] = ids
388 |         self.table = self._create_table(
389 |             self.temp_table_name, data=table_df, mode="overwrite"
390 |         )  # work on a temporary table
391 |         self.log_status()
392 | 
393 |     def reset(self):
394 |         """
395 |         Resets the dataset table to its original state or to the last persisted state.
396 |         """
397 |         if self.table is None:
398 |             LOGGER.info("No changes made to the dataset.")
399 |             return
400 | 
401 |         db = self._connect()
402 |         if self.temp_table_name in db.table_names():
403 |             self._drop_table(self.temp_table_name)
404 | 
405 |         self.table = self._open_table(self.table_name)
406 |         self.removed_img_count = 0
407 |         # self._sim_index = None # Not sure if we should reset this as computing the index is expensive
408 |         LOGGER.info("Dataset reset to original state.")
409 | 
410 |     def persist(self, name=None):
411 |         """
412 |         Persists the changes made to the dataset. Available only if data is provided in the constructor.
413 | 
414 |         Args:
415 |             name (str, optional): Name of the new dataset. Defaults to `data_updated.yaml`.
416 |         """
417 |         db = self._connect()
418 |         if self.table is None or self.temp_table_name not in db.table_names():
419 |             LOGGER.info("No changes made to the dataset.")
420 |             return
421 | 
422 |         LOGGER.info("Persisting changes to the dataset...")
423 |         self.log_status()
424 | 
425 |         if not name:
426 |             name = self.data.split(".")[0] + "_updated"
427 |         datafile_name = name + ".yaml"
428 |         train_txt = "train_updated.txt"
429 | 
430 |         path = Path(name).resolve()  # add new train.txt file in the dataset parent path
431 |         path.mkdir(parents=True, exist_ok=True)
432 |         if (path / train_txt).exists():
433 |             (path / train_txt).unlink()  # remove existing
434 | 
435 |         for img in tqdm(self.table.to_pandas()["path"].to_list()):
436 |             with open(path / train_txt, "a") as f:
437 |                 f.write(f"{img}" + "\n")  # add image to txt file
438 | 
439 |         new_dataset_info = self.dataset_info.copy()
440 |         new_dataset_info.pop("yaml_file")
441 |         new_dataset_info.pop("path")  # relative paths will get messed up when merging datasets
442 |         new_dataset_info.pop("download")  # Assume all files are present offline, there is no way to store metadata yet
443 |         new_dataset_info["train"] = (path / train_txt).resolve().as_posix()
444 |         for key, value in new_dataset_info.items():
445 |             if isinstance(value, Path):
446 |                 new_dataset_info[key] = value.as_posix()
447 | 
448 |         yaml.dump(new_dataset_info, open(path / datafile_name, "w"))  # update dataset.yaml file
449 | 
450 |         # TODO: not sure if this should be called data_final to prevent overwriting the original data?
451 |         self.table = self._create_table(datafile_name, data=self.table.to_arrow(), mode="overwrite")
452 |         db.drop_table(self.temp_table_name)
453 | 
454 |         LOGGER.info("Changes persisted to the dataset.")
455 |         log = self._log_training_cmd(Path(path / datafile_name).relative_to(Path.cwd()).as_posix())
456 | 
457 |         return log
458 | 
459 |     def log_status(self):
460 |         # TODO: Pretty print log status
461 |         LOGGER.info("\n|-----------------------------------------------|")
462 |         LOGGER.info(f"\t Number of images: {len(self.table.to_arrow())}")
463 |         LOGGER.info("|------------------------------------------------|")
464 | 
465 |     def sql(self, query: str):
466 |         """
467 |         Executes a SQL query on the dataset table.
468 | 
469 |         Args:
470 |             query (str): SQL query to execute.
471 |         """
472 |         if self.table is None:
473 |             LOGGER.info("No table found. Please provide a dataset to work on.")
474 |             return
475 | 
476 |         table = self.table.to_arrow()  # noqa
477 |         result = duckdb.sql(query).to_df()
478 | 
479 |         return result
480 | 
481 |     def dash(self, exps=None, analysis=False):
482 |         """
483 |         Launches a dashboard to visualize the dataset.
484 |         """
485 |         config = {}
486 |         Path(TEMP_CONFIG_PATH).parent.mkdir(exist_ok=True, parents=True)
487 |         with open(TEMP_CONFIG_PATH, "w+") as file:
488 |             config_exp = [self.config]
489 |             if exps:
490 |                 for exp in exps:
491 |                     config_exp.append(exp.config)
492 |             config["exps"] = config_exp
493 |             config["analysis"] = analysis
494 | 
495 |             json.dump(config, file)
496 | 
497 |         launch()
498 | 
499 |     @property
500 |     def config(self):
501 |         return {"project": self.project, "model": self.model, "device": self.device, "data": self.data}
502 | 
503 |     def _log_training_cmd(self, data_path):
504 |         success_log = (
505 |             f'{colorstr("LanceDB: ") }New dataset created successfully! Run the following command to train a model:'
506 |         )
507 |         train_cmd = f"yolo train  model={self.model} data={data_path} epochs=10"
508 |         success_log = success_log + "\n" + train_cmd
509 |         LOGGER.info(success_log)
510 | 
511 |         return train_cmd
512 | 
513 |     def _connect(self):
514 |         db = lancedb.connect(self.project)
515 | 
516 |         return db
517 | 
518 |     def _create_table(self, name, data=None, mode="overwrite"):
519 |         db = lancedb.connect(self.project)
520 |         table = db.create_table(name, data=data, mode=mode)
521 | 
522 |         return table
523 | 
524 |     def _open_table(self, name):
525 |         db = lancedb.connect(self.project)
526 |         table = db.open_table(name) if name in db.table_names() else None
527 |         if table is None:
528 |             raise ValueError(f'{colorstr("LanceDB: ") }Table not found.')
529 |         return table
530 | 
531 |     def _drop_table(self, name):
532 |         db = lancedb.connect(self.project)
533 |         if name in db.table_names():
534 |             db.drop_table(name)
535 |             return True
536 | 
537 |         return False
538 | 
539 |     def _copy_table_to_project(self, table_path):
540 |         if not table_path.endswith(".lance"):
541 |             raise ValueError(f"{colorstr('LanceDB: ')} Table must be a .lance file")
542 | 
543 |         LOGGER.info(f"Copying table from {table_path}")
544 |         path = Path(table_path).parent
545 |         name = Path(table_path).stem  # lancedb doesn't need .lance extension
546 |         db = lancedb.connect(path)
547 |         table = db.open_table(name)
548 |         return self._create_table(self.table_name, data=table.to_arrow(), mode="overwrite")
549 | 
550 |     def _image_encode(self, img):
551 |         image = Image.open(img)
552 |         n_channels = np.array(image).ndim
553 |         if n_channels == 2:
554 |             image = image.convert(mode="RGB")
555 | 
556 |         img_tensor = self.transform(image)
557 |         trans_img = img_tensor.unsqueeze(0)
558 |         return trans_img
559 | 
560 |     def _embedding_func(self, imgs):
561 |         embeddings = []
562 |         for img in tqdm(imgs):
563 |             encod_img = self._image_encode(img)
564 |             embeddings.append(self.predictor(encod_img).squeeze().cpu().detach().numpy())
565 | 
566 |         return embeddings
567 | 
568 |     def _setup_predictor(self, model_arch, device=""):
569 |         if model_arch in self.model_arch_supported:
570 |             load_model = getattr(models, model_arch)
571 |             model = load_model(pretrained=True)
572 |             predictor = torch.nn.Sequential(*list(model.children())[:-1])
573 |             return predictor
574 | 
575 |         else:
576 |             LOGGER.error(f"Supported for {model_arch} is not added yet")
577 |             sys.exit(1)
578 | 
579 |     def create_index(self):
580 |         # TODO: create index
581 |         pass
582 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import launch
2 | 
3 | __all__ = ["launch"]
4 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/datasets.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import subprocess
  3 | 
  4 | import streamlit as st
  5 | from yoloexplorer import config
  6 | from yoloexplorer.frontend.states import init_states, update_state, widget_key
  7 | from yoloexplorer.frontend.streamlit_dash import image_select
  8 | 
  9 | 
 10 | @st.cache_data
 11 | def _get_config():
 12 |     with open(config.TEMP_CONFIG_PATH) as json_file:
 13 |         data = json.load(json_file)
 14 |     return data["exps"]
 15 | 
 16 | 
 17 | @st.cache_data
 18 | def _get_dataset(idx=0):
 19 |     from yoloexplorer import Explorer  # function scope import
 20 | 
 21 |     config = _get_config()[idx]
 22 |     exp = Explorer(**config)
 23 |     exp.build_embeddings()
 24 | 
 25 |     return exp
 26 | 
 27 | 
 28 | def _get_primary_dataset():
 29 |     data = st.session_state["PRIMARY_DATASET"]
 30 |     exp = st.session_state[f"EXPLORER_{data}"]
 31 | 
 32 |     return exp
 33 | 
 34 | 
 35 | def reset_to_init_state():
 36 |     if st.session_state.get(f"STAGED_IMGS") is None:  # if app is not initialized TODO: better check
 37 |         cfgs = _get_config()
 38 |         init_states(cfgs)
 39 |         for idx, cfg in enumerate(cfgs):
 40 |             data = cfg["data"].split(".")[0]
 41 |             exp = _get_dataset(idx)
 42 |             update_state(f"EXPLORER_{data}", exp)
 43 |             update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list())
 44 | 
 45 | 
 46 | def query_form(data):
 47 |     with st.form(widget_key("query", data)):
 48 |         col1, col2 = st.columns([0.8, 0.2])
 49 |         with col1:
 50 |             query = st.text_input("Query", "", label_visibility="collapsed")
 51 |         with col2:
 52 |             submit = st.form_submit_button("Query")
 53 |         if submit:
 54 |             run_sql_query(data, query)
 55 | 
 56 | 
 57 | def similarity_form(selected_imgs, selected_staged_imgs, data):
 58 |     st.write("Similarity Search")
 59 |     with st.form(widget_key("similarity", data)):
 60 |         subcol1, subcol2 = st.columns([1, 1])
 61 |         with subcol1:
 62 |             st.write("Limit")
 63 |             limit = st.number_input("limit", min_value=None, max_value=None, value=25, label_visibility="collapsed")
 64 | 
 65 |         with subcol2:
 66 |             disabled = len(selected_imgs) and len(selected_staged_imgs)
 67 |             st.write("Selected: ", len(selected_imgs))
 68 |             submit = st.form_submit_button("Search", disabled=disabled)
 69 |             if disabled:
 70 |                 st.error("Cannot search from staging and dataset")
 71 |             if submit:
 72 |                 find_similar_imgs(data, selected_imgs or selected_staged_imgs, limit)
 73 | 
 74 | 
 75 | def staging_area_form(data, selected_imgs):
 76 |     st.write("Staging Area")
 77 |     with st.form(widget_key("staging_area", data)):
 78 |         col1, col2 = st.columns([1, 1])
 79 |         staged_imgs = set(st.session_state[f"STAGED_IMGS"]) - set(selected_imgs)
 80 |         with col1:
 81 |             st.form_submit_button(
 82 |                 ":wastebasket:",
 83 |                 disabled=len(selected_imgs) == 0,
 84 |                 on_click=update_state,
 85 |                 args=("STAGED_IMGS", staged_imgs),
 86 |             )
 87 |         with col2:
 88 |             st.form_submit_button("Clear", on_click=update_state, args=("STAGED_IMGS", set()))
 89 | 
 90 | 
 91 | def selected_options_form(data, selected_imgs, selected_staged_imgs, total_staged_imgs):
 92 |     with st.form(widget_key("selected_options", data)):
 93 |         col1, col2 = st.columns([1, 1])
 94 |         with col1:
 95 |             st.form_submit_button(
 96 |                 "Add to Stage",
 97 |                 # key=widget_key("staging", data),
 98 |                 on_click=add_to_staging,
 99 |                 args=("STAGED_IMGS", total_staged_imgs),
100 |                 disabled=not selected_imgs,
101 |             )
102 | 
103 |         with col2:
104 |             if data == st.session_state["PRIMARY_DATASET"]:
105 |                 st.form_submit_button(
106 |                     ":wastebasket:",
107 |                     disabled=not selected_imgs or (len(selected_imgs) and len(selected_staged_imgs)),
108 |                     on_click=remove_imgs,
109 |                     args=(data, selected_imgs),
110 |                 )
111 | 
112 |             else:
113 |                 st.form_submit_button(
114 |                     f"Add to {st.session_state['PRIMARY_DATASET']}",
115 |                     on_click=add_imgs,
116 |                     args=(data, selected_imgs),
117 |                     disabled=not selected_imgs,
118 |                 )
119 | 
120 | 
121 | def persist_reset_form():
122 |     with st.form(widget_key("persist_reset", "PRIMARY_DATASET")):
123 |         col1, col2 = st.columns([1, 1])
124 |         with col1:
125 |             st.form_submit_button("Reset", on_click=reset)
126 | 
127 |         with col2:
128 |             st.form_submit_button("Persist", on_click=update_state, args=("PERSISTING", True))
129 | 
130 | 
131 | def find_similar_imgs(data, imgs, limit=25, rerun=False):
132 |     exp = st.session_state[f"EXPLORER_{data}"]
133 |     _, idx = exp.get_similar_imgs(imgs, limit)
134 |     paths = exp.table.to_pandas()["path"][idx].to_list()
135 |     update_state(f"IMGS_{data}", paths)
136 |     st.experimental_rerun()
137 | 
138 | 
139 | def run_sql_query(data, query):
140 |     if query.rstrip().lstrip():
141 |         exp = st.session_state[f"EXPLORER_{data}"]
142 |         df = exp.sql(query)
143 |         update_state(f"IMGS_{data}", df["path"].to_list())
144 |         st.experimental_rerun()
145 | 
146 | 
147 | def add_to_staging(key, imgs):
148 |     update_state(key, imgs)
149 |     # st.experimental_rerun()
150 | 
151 | 
152 | def remove_imgs(data, imgs):
153 |     exp = st.session_state[f"EXPLORER_{data}"]
154 |     idxs = exp.table.to_pandas().set_index("path").loc[imgs]["id"].to_list()
155 |     exp.remove_imgs(idxs)
156 |     update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list())
157 |     # st.experimental_rerun()
158 | 
159 | 
160 | def add_imgs(from_data, imgs):
161 |     data = st.session_state["PRIMARY_DATASET"]
162 |     exp = st.session_state[f"EXPLORER_{data}"]
163 |     from_exp = st.session_state[f"EXPLORER_{from_data}"]
164 |     idxs = from_exp.table.to_pandas().set_index("path").loc[imgs]["id"].to_list()
165 |     exp.add_imgs(from_exp, idxs)
166 |     update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list())
167 |     update_state(f"SUCCESS_MSG", f"Added {len(imgs)} to {data}")
168 | 
169 | 
170 | def reset():
171 |     data = st.session_state["PRIMARY_DATASET"]
172 |     exp = st.session_state[f"EXPLORER_{data}"]
173 |     exp.reset()
174 |     update_state("STAGED_IMGS", None)
175 | 
176 | 
177 | def persist_changes():
178 |     exp = _get_primary_dataset()
179 |     log = None
180 |     with st.spinner("Creating new dataset..."):
181 |         log = exp.persist()
182 |     st.success("Dataset created successfully!")
183 |     st.code(log, language="shell")
184 |     update_state("PERSISTING", False)
185 |     st.button("Refresh", on_click=update_state, args=("STAGED_IMGS", None))
186 | 
187 | 
188 | def rerender_button(data):
189 |     col1, col2, col3 = st.columns([0.26, 0.3, 0.1])
190 |     with col1:
191 |         pass
192 |     with col2:
193 |         st.button(
194 |             "Render Imgs :arrows_counterclockwise:",
195 |             key=widget_key("render_imgs", data),
196 |             help="""
197 |         Imgs might not be rendered automatically in some cases to save memory when stage area is used.
198 |         Click this button to force render imgs.
199 |         """,
200 |         )
201 |     with col3:
202 |         pass
203 | 
204 | 
205 | def layout():
206 |     st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
207 | 
208 |     if st.session_state.get("PERSISTING"):
209 |         persist_changes()
210 |         return
211 | 
212 |     # staging area
213 |     selected_staged_imgs = []
214 |     if st.session_state.get(f"STAGED_IMGS"):
215 |         staged_imgs = st.session_state[f"STAGED_IMGS"]
216 |         total_staged_imgs = len(staged_imgs)
217 |         col1, col2 = st.columns([0.8, 0.2], gap="small")
218 |         with col1:
219 |             selected_staged_imgs = image_select(
220 |                 f"Staged samples: {total_staged_imgs}", images=list(staged_imgs), use_container_width=False
221 |             )
222 |         with col2:
223 |             staging_area_form(data="staging_area", selected_imgs=selected_staged_imgs)
224 |     # Dataset tabs
225 |     cfgs = _get_config()
226 |     tabs = st.tabs([cfg["data"].split(".")[0] for cfg in cfgs])
227 |     for idx, tab in enumerate(tabs):
228 |         with tab:
229 |             data = cfgs[idx]["data"].split(".")[0]
230 | 
231 |             col1, col2 = st.columns([0.75, 0.25], gap="small")
232 |             reset_to_init_state()
233 | 
234 |             imgs = st.session_state[f"IMGS_{data}"]
235 |             total_imgs = len(imgs)
236 |             with col1:
237 |                 subcol1, subcol2, subcol3, subcol4 = st.columns([0.2, 0.2, 0.3, 0.3])
238 |                 with subcol1:
239 |                     num = st.number_input(
240 |                         "Max Images Displayed",
241 |                         min_value=0,
242 |                         max_value=total_imgs,
243 |                         value=min(250, total_imgs),
244 |                         key=widget_key("num_imgs_displayed", data),
245 |                     )
246 |                 with subcol2:
247 |                     start_idx = st.number_input(
248 |                         "Start Index", min_value=0, max_value=total_imgs, value=0, key=widget_key("start_idx", data)
249 |                     )
250 |                 with subcol3:
251 |                     select_all = st.checkbox("Select All", value=False, key=widget_key("select_all", data))
252 |                 with subcol4:
253 |                     labels = st.checkbox(
254 |                         "Labels",
255 |                         value=False,
256 |                         key=widget_key("labels", data),
257 |                     )
258 | 
259 |                 query_form(data)
260 |                 selected_imgs = []
261 |                 if total_imgs:
262 |                     imgs_displayed = imgs[start_idx : start_idx + num]
263 |                     label_args = {"bboxes": None, "labels": None, "classes": None}
264 |                     if labels:
265 |                         table = st.session_state[f"EXPLORER_{data}"].table
266 |                         df = table.to_pandas().set_index("path").loc[imgs_displayed]
267 |                         label_args["bboxes"] = df["bboxes"].to_list()
268 |                         label_args["labels"] = df["labels"].to_list()
269 |                         label_args["classes"] = df["cls"].to_list()
270 | 
271 |                     selected_imgs = image_select(
272 |                         f"Total samples: {total_imgs}",
273 |                         images=imgs_displayed,
274 |                         use_container_width=False,
275 |                         indices=[i for i in range(num)] if select_all else None,
276 |                         **label_args,
277 |                     )
278 |                     if st.session_state.get(f"STAGED_IMGS"):
279 |                         rerender_button(data)
280 | 
281 |             with col2:
282 |                 similarity_form(selected_imgs, selected_staged_imgs, data)
283 |                 total_staged_imgs = set(st.session_state["STAGED_IMGS"])
284 |                 total_staged_imgs.update(selected_imgs)
285 | 
286 |                 selected_options_form(data, selected_imgs, selected_staged_imgs, total_staged_imgs)
287 |                 if data == st.session_state["PRIMARY_DATASET"]:
288 |                     persist_reset_form()
289 | 
290 | 
291 | def launch():
292 |     cmd = ["streamlit", "run", __file__, "--server.maxMessageSize", "1024"]
293 |     try:
294 |         subprocess.run(cmd, check=True)
295 |     except Exception as e:
296 |         print(e)
297 | 
298 | 
299 | if __name__ == "__main__":
300 |     layout()
301 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/layout.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import json
  3 | import subprocess
  4 | 
  5 | import streamlit as st
  6 | from streamlit_dash import image_select
  7 | from yoloexplorer import config
  8 | from yoloexplorer.frontend.states import init_states, update_state, widget_key
  9 | 
 10 | 
 11 | @st.cache_data
 12 | def _get_config():
 13 |     with open(config.TEMP_CONFIG_PATH) as json_file:
 14 |         data = json.load(json_file)
 15 |     return data
 16 | 
 17 | 
 18 | @st.cache_data
 19 | def _get_dataset(idx=0):
 20 |     from yoloexplorer import Explorer  # function scope import
 21 | 
 22 |     config = _get_config()[idx]
 23 |     exp = Explorer(**config)
 24 |     exp.build_embeddings()
 25 | 
 26 |     return exp
 27 | 
 28 | 
 29 | def _get_primary_dataset():
 30 |     data = st.session_state["PRIMARY_DATASET"]
 31 |     exp = st.session_state[f"EXPLORER_{data}"]
 32 | 
 33 |     return exp
 34 | 
 35 | 
 36 | def reset_to_init_state():
 37 |     if st.session_state.get(f"STAGED_IMGS") is None:  # if app is not initialized TODO: better check
 38 |         print("Initializing app...")
 39 |         cfgs = _get_config()
 40 |         init_states(cfgs)
 41 |         for idx, cfg in enumerate(cfgs):
 42 |             data = cfg["data"].split(".")[0]
 43 |             exp = _get_dataset(idx)
 44 |             update_state(f"EXPLORER_{data}", exp)
 45 |             update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list())
 46 | 
 47 | 
 48 | def query_form(data):
 49 |     with st.form(widget_key("query", data)):
 50 |         col1, col2 = st.columns([0.8, 0.2])
 51 |         with col1:
 52 |             query = st.text_input("Query", "", label_visibility="collapsed")
 53 |         with col2:
 54 |             submit = st.form_submit_button("Query")
 55 |         if submit:
 56 |             run_sql_query(data, query)
 57 | 
 58 | 
 59 | def similarity_form(selected_imgs, selected_staged_imgs, data):
 60 |     st.write("Similarity Search")
 61 |     with st.form(widget_key("similarity", data)):
 62 |         subcol1, subcol2 = st.columns([1, 1])
 63 |         with subcol1:
 64 |             st.write("Limit")
 65 |             limit = st.number_input("limit", min_value=None, max_value=None, value=25, label_visibility="collapsed")
 66 | 
 67 |         with subcol2:
 68 |             disabled = len(selected_imgs) and len(selected_staged_imgs)
 69 |             st.write("Selected: ", len(selected_imgs))
 70 |             st.form_submit_button(
 71 |                 "Search",
 72 |                 disabled=disabled,
 73 |                 on_click=find_similar_imgs,
 74 |                 args=(data, selected_imgs or selected_staged_imgs, limit),
 75 |             )
 76 |             if disabled:
 77 |                 st.error("Cannot search from staging and dataset")
 78 | 
 79 | 
 80 | def staging_area_form(data, selected_imgs):
 81 |     st.write("Staging Area")
 82 |     with st.form(widget_key("staging_area", data)):
 83 |         col1, col2 = st.columns([1, 1])
 84 |         staged_imgs = set(st.session_state[f"STAGED_IMGS"]) - set(selected_imgs)
 85 |         with col1:
 86 |             st.form_submit_button(
 87 |                 ":wastebasket:",
 88 |                 disabled=len(selected_imgs) == 0,
 89 |                 on_click=update_state,
 90 |                 args=("STAGED_IMGS", staged_imgs),
 91 |             )
 92 |         with col2:
 93 |             st.form_submit_button("Clear", on_click=update_state, args=("STAGED_IMGS", set()))
 94 | 
 95 | 
 96 | def selected_options_form(data, selected_imgs, selected_staged_imgs, total_staged_imgs):
 97 |     with st.form(widget_key("selected_options", data)):
 98 |         col1, col2 = st.columns([1, 1])
 99 |         with col1:
100 |             st.form_submit_button(
101 |                 "Add to Staging",
102 |                 # key=widget_key("staging", data),
103 |                 disabled=not selected_imgs,
104 |                 on_click=add_to_staging,
105 |                 args=("STAGED_IMGS", total_staged_imgs),
106 |             )
107 | 
108 |         with col2:
109 |             if data == st.session_state["PRIMARY_DATASET"]:
110 |                 st.form_submit_button(
111 |                     ":wastebasket:",
112 |                     on_click=remove_imgs,
113 |                     args=(data, selected_imgs),
114 |                     disabled=not selected_imgs or (len(selected_imgs) and len(selected_staged_imgs)),
115 |                 )
116 |             else:
117 |                 st.form_submit_button(
118 |                     f"Add to {st.session_state['PRIMARY_DATASET']}",
119 |                     on_click=add_imgs,
120 |                     args=(data, selected_imgs),
121 |                     disabled=not selected_imgs,
122 |                 )
123 | 
124 | 
125 | def persist_reset_form():
126 |     with st.form(widget_key("persist_reset", "PRIMARY_DATASET")):
127 |         col1, col2 = st.columns([1, 1])
128 |         with col1:
129 |             st.form_submit_button("Reset", on_click=reset)
130 | 
131 |         with col2:
132 |             st.form_submit_button("Persist", on_click=update_state, args=("PERSISTING", True))
133 | 
134 | 
135 | def find_similar_imgs(data, imgs, limit=25):
136 |     exp = st.session_state[f"EXPLORER_{data}"]
137 |     _, idx = exp.get_similar_imgs(imgs, limit)
138 |     paths = exp.table.to_pandas()["path"][idx].to_list()
139 |     update_state(f"IMGS_{data}", paths)
140 |     # st.experimental_rerun()
141 | 
142 | 
143 | def run_sql_query(data, query):
144 |     if query.rstrip().lstrip():
145 |         exp = st.session_state[f"EXPLORER_{data}"]
146 |         df = exp.sql(query)
147 |         update_state(f"IMGS_{data}", df["path"].to_list())
148 |         st.experimental_rerun()
149 | 
150 | 
151 | def add_to_staging(key, imgs):
152 |     update_state(key, imgs)
153 |     # st.experimental_rerun()
154 | 
155 | 
156 | def remove_imgs(data, imgs):
157 |     exp = st.session_state[f"EXPLORER_{data}"]
158 |     idxs = exp.table.to_pandas().set_index("path").loc[imgs]["id"].to_list()
159 |     exp.remove_imgs(idxs)
160 |     update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list())
161 | 
162 | 
163 | def add_imgs(from_data, imgs):
164 |     data = st.session_state["PRIMARY_DATASET"]
165 |     exp = st.session_state[f"EXPLORER_{data}"]
166 |     from_exp = st.session_state[f"EXPLORER_{from_data}"]
167 |     idxs = from_exp.table.to_pandas().set_index("path").loc[imgs]["id"].to_list()
168 |     exp.add_imgs(from_exp, idxs)
169 |     update_state(f"IMGS_{data}", exp.table.to_pandas()["path"].to_list())
170 |     update_state(f"SUCCESS_MSG", f"Added {len(imgs)} to {data}")
171 | 
172 | 
173 | def reset():
174 |     data = st.session_state["PRIMARY_DATASET"]
175 |     exp = st.session_state[f"EXPLORER_{data}"]
176 |     exp.reset()
177 |     update_state("STAGED_IMGS", None)
178 | 
179 | 
180 | def persist_changes():
181 |     exp = _get_primary_dataset()
182 |     with st.spinner("Creating new dataset..."):
183 |         exp.persist()
184 |     st.success("Dataset created successfully! Auto-reload in 30 seconds...")
185 |     update_state("PERSISTING", False)
186 |     st.button("Refresh", on_click=update_state, args=("STAGED_IMGS", None))
187 | 
188 | 
189 | def layout():
190 |     st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
191 | 
192 |     if st.session_state.get("PERSISTING"):
193 |         persist_changes()
194 |         return
195 | 
196 |     # staging area
197 |     selected_staged_imgs = []
198 |     if st.session_state.get(f"STAGED_IMGS"):
199 |         staged_imgs = st.session_state[f"STAGED_IMGS"]
200 |         total_staged_imgs = len(staged_imgs)
201 |         col1, col2 = st.columns([0.8, 0.2], gap="small")
202 |         with col1:
203 |             selected_staged_imgs = image_select(
204 |                 f"Staged samples: {total_staged_imgs}", images=list(staged_imgs), use_container_width=False
205 |             )
206 |         with col2:
207 |             staging_area_form(data="staging_area", selected_imgs=selected_staged_imgs)
208 | 
209 |     # Dataset tabs
210 |     cfgs = _get_config()
211 |     tabs = st.tabs([cfg["data"].split(".")[0] for cfg in cfgs])
212 |     for idx, tab in enumerate(tabs):
213 |         with tab:
214 |             data = cfgs[idx]["data"].split(".")[0]
215 | 
216 |             col1, col2 = st.columns([0.75, 0.25], gap="small")
217 |             reset_to_init_state()
218 | 
219 |             imgs = st.session_state[f"IMGS_{data}"]
220 |             total_imgs = len(imgs)
221 |             with col1:
222 |                 subcol1, subcol2, subcol3 = st.columns([0.2, 0.2, 0.6])
223 |                 with subcol1:
224 |                     num = st.number_input(
225 |                         "Max Images Displayed",
226 |                         min_value=0,
227 |                         max_value=total_imgs,
228 |                         value=min(250, total_imgs),
229 |                         key=widget_key("num_imgs_displayed", data),
230 |                     )
231 |                 with subcol2:
232 |                     start_idx = st.number_input(
233 |                         "Start Index", min_value=0, max_value=total_imgs, value=0, key=widget_key("start_idx", data)
234 |                     )
235 |                 with subcol3:
236 |                     select_all = st.checkbox("Select All", value=False, key=widget_key("select_all", data))
237 | 
238 |                 query_form(data)
239 |                 if total_imgs:
240 |                     imgs_displayed = imgs[start_idx : start_idx + num]
241 |                     selected_imgs = image_select(
242 |                         f"Total samples: {total_imgs}",
243 |                         images=imgs_displayed,
244 |                         use_container_width=False,
245 |                         indices=[i for i in range(num)] if select_all else None,
246 |                     )
247 | 
248 |             with col2:
249 |                 similarity_form(selected_imgs, selected_staged_imgs, data)
250 |                 total_staged_imgs = set(st.session_state["STAGED_IMGS"])
251 |                 total_staged_imgs.update(selected_imgs)
252 | 
253 |                 display_labels = st.checkbox("Labels", value=False, key=widget_key("labels", data))
254 |                 selected_options_form(data, selected_imgs, selected_staged_imgs, total_staged_imgs)
255 |                 if data == st.session_state["PRIMARY_DATASET"]:
256 |                     persist_reset_form()
257 | 
258 | 
259 | def launch():
260 |     cmd = ["streamlit", "run", __file__, "--server.maxMessageSize", "1024"]
261 |     try:
262 |         subprocess.run(cmd, check=True)
263 |     except Exception as e:
264 |         print(e)
265 | 
266 | 
267 | if __name__ == "__main__":
268 |     layout()
269 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/pages/1_table.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import numpy as np
 4 | import streamlit as st
 5 | from sklearn.manifold import TSNE
 6 | from sklearn.decomposition import PCA
 7 | import plotly.express as px
 8 | 
 9 | from yoloexplorer import config
10 | from yoloexplorer.frontend.datasets import _get_primary_dataset
11 | 
12 | 
13 | @st.cache_resource
14 | def reduce_dim(df, alg):
15 |     embeddings = np.array(df["vector"].to_list())
16 |     if alg == "TSNE":
17 |         tsne = TSNE(n_components=2, random_state=0)
18 |         embeddings = tsne.fit_transform(embeddings)
19 |     elif alg == "PCA":
20 |         pca = PCA(n_components=2)
21 |         embeddings = pca.fit_transform(embeddings)
22 |     return embeddings
23 | 
24 | 
25 | def embeddings():
26 |     exp = _get_primary_dataset()
27 |     df = exp.table.to_pandas()
28 |     col1, col2 = st.columns([0.5, 0.5])
29 |     with col1:
30 |         st.dataframe(df, use_container_width=True)
31 |     with col2:
32 |         option = st.selectbox("Dim Reducer Algorithm", ("TSNE", "PCA", "UMAP (Coming soon)"))
33 |         if option == "TSNE":
34 |             embeddings = reduce_dim(df, "TSNE")
35 |         elif option == "PCA":
36 |             embeddings = reduce_dim(df, "TSNE")
37 |         elif option == "UMAP (Coming soon)":
38 |             st.write("Coming soon")
39 | 
40 |         fig = px.scatter(x=embeddings[:, 0], y=embeddings[:, 1])
41 |         st.plotly_chart(fig, use_container_width=True)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     embeddings()
46 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/pages/2_analysis.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import numpy as np
 4 | import streamlit as st
 5 | 
 6 | from yoloexplorer import config
 7 | from yoloexplorer.frontend.datasets import _get_primary_dataset
 8 | 
 9 | INTEGRATION_IMPORT_ERROR = None
10 | 
11 | try:
12 |     import data_gradients  # noqa
13 | 
14 |     import data_gradients.feature_extractors.object_detection as detection
15 |     from data_gradients.datasets.detection import YoloFormatDetectionDataset
16 | except ImportError:
17 |     INTEGRATION_IMPORT_ERROR = "data-gradients"
18 | 
19 | 
20 | @st.cache_data
21 | def _get_config():
22 |     with open(config.TEMP_CONFIG_PATH) as json_file:
23 |         data = json.load(json_file)
24 |     return data["analysis"]
25 | 
26 | 
27 | @st.cache_data
28 | def _get_task_from_data(data):
29 |     # TODO: support more tasks
30 |     return "detection"
31 | 
32 | 
33 | DETECTION = (
34 |     {
35 |         "DetectionBoundingBoxArea": detection.DetectionBoundingBoxArea,
36 |         "DetectionBoundingBoxPerImageCount": detection.DetectionBoundingBoxPerImageCount,
37 |         "DetectionBoundingBoxSize": detection.DetectionBoundingBoxSize,
38 |         "DetectionClassFrequency": detection.DetectionClassFrequency,
39 |         "DetectionClassHeatmap": detection.DetectionClassHeatmap,
40 |         "DetectionClassesPerImageCount": detection.DetectionClassesPerImageCount,
41 |         "DetectionSampleVisualization": detection.DetectionSampleVisualization,
42 |         "DetectionBoundingBoxIoU": detection.DetectionBoundingBoxIoU,
43 |     }
44 |     if INTEGRATION_IMPORT_ERROR is None
45 |     else {}
46 | )
47 | 
48 | SEGMENTATION = {} if INTEGRATION_IMPORT_ERROR is None else {}
49 | 
50 | TASK2MODULES = {"detection": DETECTION, "segmentation": SEGMENTATION}
51 | TASK2LABELS = {"detection": "bboxes", "segmentation": "masks"}
52 | 
53 | 
54 | @st.cache_resource
55 | def analyse_dataset():
56 |     exp = _get_primary_dataset()
57 |     info = exp.dataset_info
58 |     deci_ds = YoloFormatDetectionDataset(
59 |         root_dir=info["path"],
60 |         images_dir=info["image_dir"],
61 |     )
62 | 
63 | 
64 | def analysis():
65 |     if not _get_config():
66 |         st.error("Enable analysis by passing `analysis=True` when launching the dashboard.")
67 |         return
68 | 
69 |     if INTEGRATION_IMPORT_ERROR:
70 |         st.error(
71 |             f"The following package(s) are required to run this module: `{INTEGRATION_IMPORT_ERROR}`. Please install them and try again."
72 |         )
73 |         return
74 | 
75 |     task = _get_task_from_data(_get_primary_dataset())
76 |     modules = TASK2MODULES[task]
77 |     results = []
78 |     for _, module in modules.items():
79 |         pass
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     analysis()
84 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/redirect.py:
--------------------------------------------------------------------------------
  1 | """
  2 | TAKEN FROM - https://gist.github.com/schaumb/037f139035d93cff3ad9f4f7e5f739ce
  3 | """
  4 | 
  5 | import streamlit as st
  6 | import io
  7 | import contextlib
  8 | import sys
  9 | import re
 10 | 
 11 | 
 12 | class _Redirect:
 13 |     class IOStuff(io.StringIO):
 14 |         def __init__(self, trigger, max_buffer, buffer_separator, regex, dup=None):
 15 |             super().__init__()
 16 |             self._trigger = trigger
 17 |             self._max_buffer = max_buffer
 18 |             self._buffer_separator = buffer_separator
 19 |             self._regex = regex and re.compile(regex)
 20 |             self._dup = dup
 21 | 
 22 |         def write(self, __s: str) -> int:
 23 |             if self._max_buffer:
 24 |                 concatenated_len = super().tell() + len(__s)
 25 |                 if concatenated_len > self._max_buffer:
 26 |                     rest = self.get_filtered_output()[concatenated_len - self._max_buffer :]
 27 |                     if self._buffer_separator is not None:
 28 |                         rest = rest.split(self._buffer_separator, 1)[-1]
 29 |                     super().seek(0)
 30 |                     super().write(rest)
 31 |                     super().truncate(super().tell() + len(__s))
 32 |             res = super().write(__s)
 33 |             if self._dup is not None:
 34 |                 self._dup.write(__s)
 35 |             self._trigger(self.get_filtered_output())
 36 |             return res
 37 | 
 38 |         def get_filtered_output(self):
 39 |             if self._regex is None or self._buffer_separator is None:
 40 |                 return self.getvalue()
 41 | 
 42 |             return self._buffer_separator.join(
 43 |                 filter(self._regex.search, self.getvalue().split(self._buffer_separator))
 44 |             )
 45 | 
 46 |         def print_at_end(self):
 47 |             self._trigger(self.get_filtered_output())
 48 | 
 49 |     def __init__(
 50 |         self,
 51 |         stdout=None,
 52 |         stderr=False,
 53 |         format=None,
 54 |         to=None,
 55 |         max_buffer=None,
 56 |         buffer_separator="\n",
 57 |         regex=None,
 58 |         duplicate_out=False,
 59 |     ):
 60 |         self.io_args = {
 61 |             "trigger": self._write,
 62 |             "max_buffer": max_buffer,
 63 |             "buffer_separator": buffer_separator,
 64 |             "regex": regex,
 65 |         }
 66 |         self.redirections = []
 67 |         self.st = None
 68 |         self.stderr = stderr is True
 69 |         self.stdout = stdout is True or (stdout is None and not self.stderr)
 70 |         self.format = format or "code"
 71 |         self.to = to
 72 |         self.fun = None
 73 |         self.duplicate_out = duplicate_out or None
 74 |         self.active_nested = None
 75 | 
 76 |         if not self.stdout and not self.stderr:
 77 |             raise ValueError("one of stdout or stderr must be True")
 78 | 
 79 |         if self.format not in ["text", "markdown", "latex", "code", "write"]:
 80 |             raise ValueError(
 81 |                 f"format need oneof the following: {', '.join(['text', 'markdown', 'latex', 'code', 'write'])}"
 82 |             )
 83 | 
 84 |         if self.to and (not hasattr(self.to, "text") or not hasattr(self.to, "empty")):
 85 |             raise ValueError(f"'to' is not a streamlit container object")
 86 | 
 87 |     def __enter__(self):
 88 |         if self.st is not None:
 89 |             if self.to is None:
 90 |                 if self.active_nested is None:
 91 |                     self.active_nested = self(
 92 |                         format=self.format,
 93 |                         max_buffer=self.io_args["max_buffer"],
 94 |                         buffer_separator=self.io_args["buffer_separator"],
 95 |                         regex=self.io_args["regex"],
 96 |                         duplicate_out=self.duplicate_out,
 97 |                     )
 98 |                 return self.active_nested.__enter__()
 99 |             else:
100 |                 raise Exception("Already entered")
101 |         to = self.to or st
102 | 
103 |         to.text(
104 |             f"Redirected output from "
105 |             f"{'stdout and stderr' if self.stdout and self.stderr else 'stdout' if self.stdout else 'stderr'}"
106 |             f"{' [' + self.io_args['regex'] + ']' if self.io_args['regex'] else ''}"
107 |             f":"
108 |         )
109 |         self.st = to.empty()
110 |         self.fun = getattr(self.st, self.format)
111 | 
112 |         io_obj = None
113 | 
114 |         def redirect(to_duplicate):
115 |             nonlocal io_obj
116 |             io_obj = _Redirect.IOStuff(dup=self.duplicate_out and to_duplicate, **self.io_args)
117 |             redirection = contextlib.redirect_stdout(io_obj)
118 |             self.redirections.append((redirection, io_obj))
119 |             redirection.__enter__()
120 | 
121 |         if self.stderr:
122 |             redirect(sys.stderr)
123 |         if self.stdout:
124 |             redirect(sys.stdout)
125 | 
126 |         return io_obj
127 | 
128 |     def __call__(self, to=None, format=None, max_buffer=None, buffer_separator="\n", regex=None, duplicate_out=False):
129 |         return _Redirect(
130 |             self.stdout,
131 |             self.stderr,
132 |             format=format,
133 |             to=to,
134 |             max_buffer=max_buffer,
135 |             buffer_separator=buffer_separator,
136 |             regex=regex,
137 |             duplicate_out=duplicate_out,
138 |         )
139 | 
140 |     def __exit__(self, *exc):
141 |         if self.active_nested is not None:
142 |             nested = self.active_nested
143 |             if nested.active_nested is None:
144 |                 self.active_nested = None
145 |             return nested.__exit__(*exc)
146 | 
147 |         res = None
148 |         for redirection, io_obj in reversed(self.redirections):
149 |             res = redirection.__exit__(*exc)
150 |             io_obj.print_at_end()
151 | 
152 |         self.redirections = []
153 |         self.st = None
154 |         self.fun = None
155 |         return res
156 | 
157 |     def _write(self, data):
158 |         self.fun(data)
159 | 
160 | 
161 | stdout = _Redirect()
162 | stderr = _Redirect(stderr=True)
163 | stdouterr = _Redirect(stdout=True, stderr=True)
164 | 
165 | """
166 | # can be used as
167 | import time
168 | import sys
169 | from random import getrandbits
170 | import streamlit.redirect as rd
171 | st.text('Suboutput:')
172 | so = st.empty()
173 | with rd.stdout, rd.stderr(format='markdown', to=st.sidebar):
174 |     print("hello  ")
175 |     time.sleep(1)
176 |     i = 5
177 |     while i > 0:
178 |         print("**M**izu?  ", file=sys.stdout if getrandbits(1) else sys.stderr)
179 |         i -= 1
180 |         with rd.stdout(to=so):
181 |             print(f" cica {i}")
182 |         if i:
183 |             time.sleep(1)
184 | # """
185 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/states.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | 
 4 | def widget_key(action, data):
 5 |     return f"form_{action}_on_{data}"
 6 | 
 7 | 
 8 | def init_states(config_list):
 9 |     for config in config_list:
10 |         data = config["data"].split(".")[0]
11 |         st.session_state[f"EXPLORER_{data}"] = None
12 |         st.session_state[f"IMGS_{data}"] = []
13 |         st.session_state[f"SELECTED_IMGS_{data}"] = []
14 |         st.session_state[f"SHOW_LABELS_{data}"] = False
15 |     st.session_state["STAGED_IMGS"] = set()
16 |     st.session_state["PRIMARY_DATASET"] = config_list[0]["data"].split(".")[0]
17 |     st.session_state[f"SUCCESS_MSG"] = ""
18 |     st.session_state["PERSISTING"] = False
19 | 
20 | 
21 | def update_state(state, value):
22 |     st.session_state[state] = value
23 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/__init__.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import io
  3 | import os
  4 | from pathlib import Path
  5 | import cv2
  6 | 
  7 | import numpy as np
  8 | import streamlit as st
  9 | import streamlit.components.v1 as components
 10 | from PIL import Image
 11 | 
 12 | from ultralytics.utils.plotting import Annotator, colors
 13 | 
 14 | _RELEASE = True
 15 | 
 16 | if not _RELEASE:
 17 |     _component_func = components.declare_component("image_select", url="http://localhost:3001")
 18 | else:
 19 |     path = (Path(__file__).parent / "frontend" / "build").resolve()
 20 |     _component_func = components.declare_component("image_select", path=path)
 21 | 
 22 | 
 23 | @st.cache_data
 24 | def _encode_file(img):
 25 |     with open(img, "rb") as img_file:
 26 |         encoded = base64.b64encode(img_file.read()).decode()
 27 |     return f"data:image/jpeg;base64, {encoded}"
 28 | 
 29 | 
 30 | @st.cache_data
 31 | def _encode_numpy(img):
 32 |     pil_img = Image.fromarray(img)
 33 |     buffer = io.BytesIO()
 34 |     pil_img.save(buffer, format="JPEG")
 35 |     encoded = base64.b64encode(buffer.getvalue()).decode()
 36 |     return f"data:image/jpeg;base64, {encoded}"
 37 | 
 38 | 
 39 | def image_select(
 40 |     title: str,
 41 |     images: list,
 42 |     captions: list = None,
 43 |     indices: list = None,
 44 |     *,
 45 |     use_container_width: bool = True,
 46 |     return_value: str = "original",
 47 |     key: str = None,
 48 |     bboxes=None,
 49 |     labels=None,
 50 |     classes=None,
 51 | ):
 52 |     """Shows several images and returns the image selected by the user.
 53 | 
 54 |     Args:
 55 |         title (str): The label shown above the images.
 56 |         images (list): The images to show. Allowed image formats are paths to local
 57 |             files, URLs, PIL images, and numpy arrays.
 58 |         captions (list of str): The captions to show below the images. Defaults to
 59 |             None, in which case no captions are shown.
 60 |         indices (list of int, optional): The indices of the images that are selected by default.
 61 |             Defaults to None.
 62 |         use_container_width (bool, optional): Whether to stretch the images to the
 63 |             width of the surrounding container. Defaults to True.
 64 |         return_value ("original" or "index", optional): Whether to return the
 65 |             original object passed into `images` or the index of the selected image.
 66 |             Defaults to "original".
 67 |         key (str, optional): The key of the component. Defaults to None.
 68 |         bboxes (list of list of float, optional): The bounding boxes to show on the
 69 |             images. Defaults to None.
 70 |         labels (list of str, optional): The labels to show on the bounding boxes.
 71 |             Defaults to None.
 72 |         classes (list of str, optional): The classes to show on the bounding boxes.
 73 |     Returns:
 74 |         (any): The image selected by the user (same object and type as passed to
 75 |             `images`).
 76 |     """
 77 | 
 78 |     # Do some checks to verify the input.
 79 |     if len(images) < 1:
 80 |         raise ValueError("At least one image must be passed but `images` is empty.")
 81 |     if captions is not None and len(images) != len(captions):
 82 |         raise ValueError(
 83 |             "The number of images and captions must be equal but `captions` has "
 84 |             f"{len(captions)} elements and `images` has {len(images)} elements."
 85 |         )
 86 |     if indices is None:
 87 |         indices = []
 88 |     if isinstance(indices, int):
 89 |         indices = [indices]
 90 |     if not isinstance(indices, list):
 91 |         raise ValueError(f"`indices` must be a list of integers but it is {type(indices)}.")
 92 |     for i, index in enumerate(indices):
 93 |         if index >= len(images):
 94 |             raise ValueError(
 95 |                 f"Image index at {i} must be smaller than the number of images ({len(images)}) " f"but it is {index}."
 96 |             )
 97 | 
 98 |     # Encode local images/numpy arrays/PIL images to base64.
 99 |     encoded_images = []
100 |     for idx, img in enumerate(images):
101 |         if bboxes:
102 |             if labels is None:
103 |                 if classes is None:
104 |                     raise ValueError("Labels or classes must be passed if bounding boxes are passed.")
105 |                 labels = classes
106 |             img = cv2.imread(img)
107 |             img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
108 |             ann = Annotator(img)
109 |             for box, label, cls in zip(bboxes[idx], labels[idx], classes[idx]):
110 |                 ann.box_label(box, label, color=colors(cls, True))
111 |             img = ann.result()
112 | 
113 |         if isinstance(img, (np.ndarray, Image.Image)):  # numpy array or PIL image
114 |             img = _encode_numpy(np.asarray(img))
115 |         elif os.path.exists(img):  # local file
116 |             img = _encode_file(img)
117 |         encoded_images.append(img)
118 | 
119 |     # Pass everything to the frontend.
120 |     component_values = _component_func(
121 |         label=title,
122 |         images=encoded_images,
123 |         captions=captions,
124 |         indices=indices,
125 |         use_container_width=use_container_width,
126 |         key=key,
127 |         default=indices,
128 |     )
129 | 
130 |     # The frontend component returns the index of the selected image but we want to
131 |     # return the actual image.
132 |     if return_value == "original":
133 |         return [images[component_value] for component_value in component_values]
134 |     elif return_value == "index":
135 |         return component_values
136 |     else:
137 |         raise ValueError("`return_value` must be either 'original' or 'index' " f"but is '{return_value}'.")
138 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "endOfLine": "lf",
3 |   "semi": false,
4 |   "trailingComma": "es5"
5 | }
6 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/build/asset-manifest.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files": {
 3 |     "main.js": "./static/js/main.c396fd5a.chunk.js",
 4 |     "main.js.map": "./static/js/main.c396fd5a.chunk.js.map",
 5 |     "runtime-main.js": "./static/js/runtime-main.58369df8.js",
 6 |     "runtime-main.js.map": "./static/js/runtime-main.58369df8.js.map",
 7 |     "static/js/2.ea259f3e.chunk.js": "./static/js/2.ea259f3e.chunk.js",
 8 |     "static/js/2.ea259f3e.chunk.js.map": "./static/js/2.ea259f3e.chunk.js.map",
 9 |     "index.html": "./index.html",
10 |     "precache-manifest.cfc10f28dbda458a05dba1d053ca3f16.js": "./precache-manifest.cfc10f28dbda458a05dba1d053ca3f16.js",
11 |     "service-worker.js": "./service-worker.js",
12 |     "static/js/2.ea259f3e.chunk.js.LICENSE.txt": "./static/js/2.ea259f3e.chunk.js.LICENSE.txt"
13 |   },
14 |   "entrypoints": [
15 |     "static/js/runtime-main.58369df8.js",
16 |     "static/js/2.ea259f3e.chunk.js",
17 |     "static/js/main.c396fd5a.chunk.js"
18 |   ]
19 | }
20 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/build/index.html:
--------------------------------------------------------------------------------
1 | <!doctype html><html lang="en"><head><title>streamlit-image-select</title><meta charset="UTF-8"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="streamlit-image-select"/><link href="https://fonts.googleapis.com/css?family=Source+Sans+Pro" rel="stylesheet"><link rel="stylesheet" href="./styles.css"/></head><body><noscript>You need to enable JavaScript to run this app.</noscript><script>!function(e){function t(t){for(var n,l,a=t[0],i=t[1],f=t[2],c=0,s=[];c<a.length;c++)l=a[c],Object.prototype.hasOwnProperty.call(o,l)&&o[l]&&s.push(o[l][0]),o[l]=0;for(n in i)Object.prototype.hasOwnProperty.call(i,n)&&(e[n]=i[n]);for(p&&p(t);s.length;)s.shift()();return u.push.apply(u,f||[]),r()}function r(){for(var e,t=0;t<u.length;t++){for(var r=u[t],n=!0,a=1;a<r.length;a++){var i=r[a];0!==o[i]&&(n=!1)}n&&(u.splice(t--,1),e=l(l.s=r[0]))}return e}var n={},o={1:0},u=[];function l(t){if(n[t])return n[t].exports;var r=n[t]={i:t,l:!1,exports:{}};return e[t].call(r.exports,r,r.exports,l),r.l=!0,r.exports}l.m=e,l.c=n,l.d=function(e,t,r){l.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:r})},l.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},l.t=function(e,t){if(1&t&&(e=l(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var r=Object.create(null);if(l.r(r),Object.defineProperty(r,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var n in e)l.d(r,n,function(t){return e[t]}.bind(null,n));return r},l.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return l.d(t,"a",t),t},l.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},l.p="./";var a=this["webpackJsonpstreamlit-dash"]=this["webpackJsonpstreamlit-dash"]||[],i=a.push.bind(a);a.push=t,a=a.slice();for(var f=0;f<a.length;f++)t(a[f]);var p=i;r()}([])</script><script src="./static/js/2.ea259f3e.chunk.js"></script><script src="./static/js/main.c396fd5a.chunk.js"></script></body></html>
2 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/build/precache-manifest.cfc10f28dbda458a05dba1d053ca3f16.js:
--------------------------------------------------------------------------------
 1 | self.__precacheManifest = (self.__precacheManifest || []).concat([
 2 |   {
 3 |     "revision": "87c83509714a5c6e22c524d2ea080b8d",
 4 |     "url": "./index.html"
 5 |   },
 6 |   {
 7 |     "revision": "77469b56b388ef2477dc",
 8 |     "url": "./static/js/2.ea259f3e.chunk.js"
 9 |   },
10 |   {
11 |     "revision": "b5321db7731dbb9a09b1fc4c60b61213",
12 |     "url": "./static/js/2.ea259f3e.chunk.js.LICENSE.txt"
13 |   },
14 |   {
15 |     "revision": "8972a333194e8ffe8afc",
16 |     "url": "./static/js/main.c396fd5a.chunk.js"
17 |   },
18 |   {
19 |     "revision": "18300b1ffba716d884c2",
20 |     "url": "./static/js/runtime-main.58369df8.js"
21 |   }
22 | ]);
23 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/build/service-worker.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Welcome to your Workbox-powered service worker!
 3 |  *
 4 |  * You'll need to register this file in your web app and you should
 5 |  * disable HTTP caching for this file too.
 6 |  * See https://goo.gl/nhQhGp
 7 |  *
 8 |  * The rest of the code is auto-generated. Please don't update this file
 9 |  * directly; instead, make changes to your Workbox build configuration
10 |  * and re-run your build process.
11 |  * See https://goo.gl/2aRDsh
12 |  */
13 | 
14 | importScripts("https://storage.googleapis.com/workbox-cdn/releases/4.3.1/workbox-sw.js");
15 | 
16 | importScripts(
17 |   "./precache-manifest.cfc10f28dbda458a05dba1d053ca3f16.js"
18 | );
19 | 
20 | self.addEventListener('message', (event) => {
21 |   if (event.data && event.data.type === 'SKIP_WAITING') {
22 |     self.skipWaiting();
23 |   }
24 | });
25 | 
26 | workbox.core.clientsClaim();
27 | 
28 | /**
29 |  * The workboxSW.precacheAndRoute() method efficiently caches and responds to
30 |  * requests for URLs in the manifest.
31 |  * See https://goo.gl/S9QRab
32 |  */
33 | self.__precacheManifest = [].concat(self.__precacheManifest || []);
34 | workbox.precaching.precacheAndRoute(self.__precacheManifest, {});
35 | 
36 | workbox.routing.registerNavigationRoute(workbox.precaching.getCacheKeyForURL("./index.html"), {
37 | 
38 |   blacklist: [/^\/_/,/\/[^/?]+\.[^/]+$/],
39 | });
40 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/build/static/js/2.ea259f3e.chunk.js.LICENSE.txt:
--------------------------------------------------------------------------------
 1 | /*
 2 | object-assign
 3 | (c) Sindre Sorhus
 4 | @license MIT
 5 | */
 6 | 
 7 | /*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
 8 | 
 9 | /**
10 |  * @license
11 |  * Copyright 2018-2021 Streamlit Inc.
12 |  *
13 |  * Licensed under the Apache License, Version 2.0 (the "License");
14 |  * you may not use this file except in compliance with the License.
15 |  * You may obtain a copy of the License at
16 |  *
17 |  *    http://www.apache.org/licenses/LICENSE-2.0
18 |  *
19 |  * Unless required by applicable law or agreed to in writing, software
20 |  * distributed under the License is distributed on an "AS IS" BASIS,
21 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 |  * See the License for the specific language governing permissions and
23 |  * limitations under the License.
24 |  */
25 | 
26 | /** @license React v16.13.1
27 |  * react-is.production.min.js
28 |  *
29 |  * Copyright (c) Facebook, Inc. and its affiliates.
30 |  *
31 |  * This source code is licensed under the MIT license found in the
32 |  * LICENSE file in the root directory of this source tree.
33 |  */
34 | 
35 | /** @license React v16.14.0
36 |  * react.production.min.js
37 |  *
38 |  * Copyright (c) Facebook, Inc. and its affiliates.
39 |  *
40 |  * This source code is licensed under the MIT license found in the
41 |  * LICENSE file in the root directory of this source tree.
42 |  */
43 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/build/static/js/main.c396fd5a.chunk.js:
--------------------------------------------------------------------------------
1 | (this["webpackJsonpstreamlit-dash"]=this["webpackJsonpstreamlit-dash"]||[]).push([[0],[,,,function(e,t,s){e.exports=s(4)},function(e,t,s){"use strict";s.r(t);var a=s(0),d=document.body.appendChild(document.createElement("label")),c=d.appendChild(document.createTextNode("")),n=document.body.appendChild(document.createElement("div"));n.classList.add("container");var i=[];a.a.events.addEventListener(a.a.RENDER_EVENT,(function(e){var t=e.detail;t.theme&&(d.style.font=t.theme.font,d.style.color=t.theme.textColor,"dark"===t.theme.base?document.body.querySelectorAll(".box, .caption").forEach((function(e){e.classList.add("dark")})):document.body.querySelectorAll(".box, .caption").forEach((function(e){e.classList.remove("dark")}))),c.textContent=t.args.label;var s=t.args.images,o=t.args.captions;0===n.childNodes.length&&(s.forEach((function(e,s){var d=n.appendChild(document.createElement("div"));d.classList.add("item"),!0===t.args.use_container_width&&d.classList.add("stretch");var c=d.appendChild(document.createElement("div"));c.classList.add("image-box");var l=c.appendChild(document.createElement("img"));if(l.classList.add("image"),l.src=e,o){var r=d.appendChild(document.createElement("div"));r.classList.add("caption"),r.textContent=o[s]}void 0!==t.args.indices&&t.args.indices.includes(s)&&(c.classList.add("selected"),l.classList.add("selected"),i.push(s)),l.onclick=function(){c.classList.contains("selected")?(i.splice(i.indexOf(s),1),c.classList.remove("selected"),l.classList.remove("selected")):(i.push(s),c.classList.add("selected"),l.classList.add("selected")),i.sort(),a.a.setComponentValue(i)}})),a.a.setComponentValue(i)),a.a.setFrameHeight()})),a.a.setComponentReady(),a.a.setFrameHeight()}],[[3,1,2]]]);
2 | //# sourceMappingURL=main.c396fd5a.chunk.js.map
3 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/build/static/js/main.c396fd5a.chunk.js.map:
--------------------------------------------------------------------------------
1 | {"version":3,"sources":["index.tsx"],"names":["labelDiv","document","body","appendChild","createElement","label","createTextNode","container","classList","add","selected_component_values","Streamlit","events","addEventListener","RENDER_EVENT","event","data","detail","theme","style","font","color","textColor","base","querySelectorAll","forEach","el","remove","textContent","args","images","captions","childNodes","length","image","i","item","box","img","src","caption","undefined","includes","push","onclick","contains","splice","indexOf","sort","setComponentValue","setFrameHeight","setComponentReady"],"mappings":"uJAAA,kBAEMA,EAAWC,SAASC,KAAKC,YAAYF,SAASG,cAAc,UAC5DC,EAAQL,EAASG,YAAYF,SAASK,eAAe,KACrDC,EAAYN,SAASC,KAAKC,YAAYF,SAASG,cAAc,QACnEG,EAAUC,UAAUC,IAAI,aACxB,IAAMC,EAAsC,GAuF5CC,IAAUC,OAAOC,iBAAiBF,IAAUG,cAhF5C,SAAkBC,GAEhB,IAAMC,EAAQD,EAAkCE,OAE5CD,EAAKE,QACPlB,EAASmB,MAAMC,KAAOJ,EAAKE,MAAME,KACjCpB,EAASmB,MAAME,MAAQL,EAAKE,MAAMI,UACV,SAApBN,EAAKE,MAAMK,KACbtB,SAASC,KAAKsB,iBAAiB,kBAAkBC,SAAQ,SAACC,GACxDA,EAAGlB,UAAUC,IAAI,WAGnBR,SAASC,KAAKsB,iBAAiB,kBAAkBC,SAAQ,SAACC,GACxDA,EAAGlB,UAAUmB,OAAO,YAO1BtB,EAAMuB,YAAcZ,EAAKa,KAAL,MACpB,IAAIC,EAASd,EAAKa,KAAL,OACTE,EAAWf,EAAKa,KAAL,SAGqB,IAAhCtB,EAAUyB,WAAWC,SACvBH,EAAOL,SAAQ,SAACS,EAAeC,GAC7B,IAAIC,EAAO7B,EAAUJ,YAAYF,SAASG,cAAc,QACxDgC,EAAK5B,UAAUC,IAAI,SACsB,IAArCO,EAAKa,KAAL,qBACFO,EAAK5B,UAAUC,IAAI,WAGrB,IAAI4B,EAAMD,EAAKjC,YAAYF,SAASG,cAAc,QAClDiC,EAAI7B,UAAUC,IAAI,aAElB,IAAI6B,EAAMD,EAAIlC,YAAYF,SAASG,cAAc,QAIjD,GAHAkC,EAAI9B,UAAUC,IAAI,SAClB6B,EAAIC,IAAML,EAENH,EAAU,CACZ,IAAIS,EAAUJ,EAAKjC,YAAYF,SAASG,cAAc,QACtDoC,EAAQhC,UAAUC,IAAI,WACtB+B,EAAQZ,YAAcG,EAASI,QAIJM,IAAzBzB,EAAKa,KAAL,SAAsCb,EAAKa,KAAL,QAAqBa,SAASP,KACtEE,EAAI7B,UAAUC,IAAI,YAClB6B,EAAI9B,UAAUC,IAAI,YAClBC,EAA0BiC,KAAKR,IAGjCG,EAAIM,QAAU,WAERP,EAAI7B,UAAUqC,SAAS,aACzBnC,EAA0BoC,OAAOpC,EAA0BqC,QAAQZ,GAAI,GACvEE,EAAI7B,UAAUmB,OAAO,YACrBW,EAAI9B,UAAUmB,OAAO,cAErBjB,EAA0BiC,KAAKR,GAC/BE,EAAI7B,UAAUC,IAAI,YAClB6B,EAAI9B,UAAUC,IAAI,aAEpBC,EAA0BsC,OAC1BrC,IAAUsC,kBAAkBvC,OAIhCC,IAAUsC,kBAAkBvC,IAO9BC,IAAUuC,oBAQZvC,IAAUwC,oBAIVxC,IAAUuC,mB","file":"static/js/main.c396fd5a.chunk.js","sourcesContent":["import { Streamlit, RenderData } from \"streamlit-component-lib\"\n\nconst labelDiv = document.body.appendChild(document.createElement(\"label\"))\nconst label = labelDiv.appendChild(document.createTextNode(\"\"))\nconst container = document.body.appendChild(document.createElement(\"div\"))\ncontainer.classList.add(\"container\")\nconst selected_component_values: number[] = []\n\n/**\n * The component's render function. This will be called immediately after\n * the component is initially loaded, and then again every time the\n * component gets new data from Python.\n */\nfunction onRender(event: Event): void {\n  // Get the RenderData from the event\n  const data = (event as CustomEvent<RenderData>).detail\n\n  if (data.theme) {\n    labelDiv.style.font = data.theme.font\n    labelDiv.style.color = data.theme.textColor\n    if (data.theme.base === \"dark\") {\n      document.body.querySelectorAll(\".box, .caption\").forEach((el) => {\n        el.classList.add(\"dark\")\n      })\n    } else {\n      document.body.querySelectorAll(\".box, .caption\").forEach((el) => {\n        el.classList.remove(\"dark\")\n      })\n    }\n\n    // TODO: Gray out the component if it's disabled.\n  }\n\n  label.textContent = data.args[\"label\"]\n  let images = data.args[\"images\"]\n  let captions = data.args[\"captions\"]\n  // console.log(captions)\n\n  if (container.childNodes.length === 0) {\n    images.forEach((image: string, i: number) => {\n      let item = container.appendChild(document.createElement(\"div\"))\n      item.classList.add(\"item\")\n      if (data.args[\"use_container_width\"] === true) {\n        item.classList.add(\"stretch\")\n      }\n\n      let box = item.appendChild(document.createElement(\"div\"))\n      box.classList.add(\"image-box\")\n\n      let img = box.appendChild(document.createElement(\"img\"))\n      img.classList.add(\"image\")\n      img.src = image\n\n      if (captions) {\n        let caption = item.appendChild(document.createElement(\"div\"))\n        caption.classList.add(\"caption\")\n        caption.textContent = captions[i]\n      }\n\n      // check if i is in the index array\n      if (data.args[\"indices\"] !== undefined && data.args[\"indices\"].includes(i)) {\n        box.classList.add(\"selected\")\n        img.classList.add(\"selected\")\n        selected_component_values.push(i)\n      }\n\n      img.onclick = function () {\n        // check if the image is already selected, then un-select it and remove it from the array\n        if (box.classList.contains(\"selected\")) {\n          selected_component_values.splice(selected_component_values.indexOf(i), 1)\n          box.classList.remove(\"selected\")\n          img.classList.remove(\"selected\")\n        } else {\n          selected_component_values.push(i)\n          box.classList.add(\"selected\")\n          img.classList.add(\"selected\")\n        }\n        selected_component_values.sort()\n        Streamlit.setComponentValue(selected_component_values)\n      }\n    })\n    // return selected_component_values\n    Streamlit.setComponentValue(selected_component_values)\n  }\n\n  // We tell Streamlit to update our frameHeight after each render event, in\n  // case it has changed. (This isn't strictly necessary for the example\n  // because our height stays fixed, but this is a low-cost function, so\n  // there's no harm in doing it redundantly.)\n  Streamlit.setFrameHeight()\n}\n\n// Attach our `onRender` handler to Streamlit's render event.\nStreamlit.events.addEventListener(Streamlit.RENDER_EVENT, onRender)\n\n// Tell Streamlit we're ready to start receiving data. We won't get our\n// first RENDER_EVENT until we call this function.\nStreamlit.setComponentReady()\n\n// Finally, tell Streamlit to update our initial height. We omit the\n// `height` parameter here to have it default to our scrollHeight.\nStreamlit.setFrameHeight()\n"],"sourceRoot":""}
2 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/build/static/js/runtime-main.58369df8.js:
--------------------------------------------------------------------------------
1 | !function(e){function t(t){for(var n,l,a=t[0],i=t[1],f=t[2],c=0,s=[];c<a.length;c++)l=a[c],Object.prototype.hasOwnProperty.call(o,l)&&o[l]&&s.push(o[l][0]),o[l]=0;for(n in i)Object.prototype.hasOwnProperty.call(i,n)&&(e[n]=i[n]);for(p&&p(t);s.length;)s.shift()();return u.push.apply(u,f||[]),r()}function r(){for(var e,t=0;t<u.length;t++){for(var r=u[t],n=!0,a=1;a<r.length;a++){var i=r[a];0!==o[i]&&(n=!1)}n&&(u.splice(t--,1),e=l(l.s=r[0]))}return e}var n={},o={1:0},u=[];function l(t){if(n[t])return n[t].exports;var r=n[t]={i:t,l:!1,exports:{}};return e[t].call(r.exports,r,r.exports,l),r.l=!0,r.exports}l.m=e,l.c=n,l.d=function(e,t,r){l.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:r})},l.r=function(e){"undefined"!==typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},l.t=function(e,t){if(1&t&&(e=l(e)),8&t)return e;if(4&t&&"object"===typeof e&&e&&e.__esModule)return e;var r=Object.create(null);if(l.r(r),Object.defineProperty(r,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var n in e)l.d(r,n,function(t){return e[t]}.bind(null,n));return r},l.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return l.d(t,"a",t),t},l.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},l.p="./";var a=this["webpackJsonpstreamlit-dash"]=this["webpackJsonpstreamlit-dash"]||[],i=a.push.bind(a);a.push=t,a=a.slice();for(var f=0;f<a.length;f++)t(a[f]);var p=i;r()}([]);
2 | //# sourceMappingURL=runtime-main.58369df8.js.map
3 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/build/static/js/runtime-main.58369df8.js.map:
--------------------------------------------------------------------------------
1 | {"version":3,"sources":["../webpack/bootstrap"],"names":["webpackJsonpCallback","data","moduleId","chunkId","chunkIds","moreModules","executeModules","i","resolves","length","Object","prototype","hasOwnProperty","call","installedChunks","push","modules","parentJsonpFunction","shift","deferredModules","apply","checkDeferredModules","result","deferredModule","fulfilled","j","depId","splice","__webpack_require__","s","installedModules","1","exports","module","l","m","c","d","name","getter","o","defineProperty","enumerable","get","r","Symbol","toStringTag","value","t","mode","__esModule","ns","create","key","bind","n","object","property","p","jsonpArray","this","oldJsonpFunction","slice"],"mappings":"aACE,SAASA,EAAqBC,GAQ7B,IAPA,IAMIC,EAAUC,EANVC,EAAWH,EAAK,GAChBI,EAAcJ,EAAK,GACnBK,EAAiBL,EAAK,GAIHM,EAAI,EAAGC,EAAW,GACpCD,EAAIH,EAASK,OAAQF,IACzBJ,EAAUC,EAASG,GAChBG,OAAOC,UAAUC,eAAeC,KAAKC,EAAiBX,IAAYW,EAAgBX,IACpFK,EAASO,KAAKD,EAAgBX,GAAS,IAExCW,EAAgBX,GAAW,EAE5B,IAAID,KAAYG,EACZK,OAAOC,UAAUC,eAAeC,KAAKR,EAAaH,KACpDc,EAAQd,GAAYG,EAAYH,IAKlC,IAFGe,GAAqBA,EAAoBhB,GAEtCO,EAASC,QACdD,EAASU,OAATV,GAOD,OAHAW,EAAgBJ,KAAKK,MAAMD,EAAiBb,GAAkB,IAGvDe,IAER,SAASA,IAER,IADA,IAAIC,EACIf,EAAI,EAAGA,EAAIY,EAAgBV,OAAQF,IAAK,CAG/C,IAFA,IAAIgB,EAAiBJ,EAAgBZ,GACjCiB,GAAY,EACRC,EAAI,EAAGA,EAAIF,EAAed,OAAQgB,IAAK,CAC9C,IAAIC,EAAQH,EAAeE,GACG,IAA3BX,EAAgBY,KAAcF,GAAY,GAE3CA,IACFL,EAAgBQ,OAAOpB,IAAK,GAC5Be,EAASM,EAAoBA,EAAoBC,EAAIN,EAAe,KAItE,OAAOD,EAIR,IAAIQ,EAAmB,GAKnBhB,EAAkB,CACrBiB,EAAG,GAGAZ,EAAkB,GAGtB,SAASS,EAAoB1B,GAG5B,GAAG4B,EAAiB5B,GACnB,OAAO4B,EAAiB5B,GAAU8B,QAGnC,IAAIC,EAASH,EAAiB5B,GAAY,CACzCK,EAAGL,EACHgC,GAAG,EACHF,QAAS,IAUV,OANAhB,EAAQd,GAAUW,KAAKoB,EAAOD,QAASC,EAAQA,EAAOD,QAASJ,GAG/DK,EAAOC,GAAI,EAGJD,EAAOD,QAKfJ,EAAoBO,EAAInB,EAGxBY,EAAoBQ,EAAIN,EAGxBF,EAAoBS,EAAI,SAASL,EAASM,EAAMC,GAC3CX,EAAoBY,EAAER,EAASM,IAClC5B,OAAO+B,eAAeT,EAASM,EAAM,CAAEI,YAAY,EAAMC,IAAKJ,KAKhEX,EAAoBgB,EAAI,SAASZ,GACX,qBAAXa,QAA0BA,OAAOC,aAC1CpC,OAAO+B,eAAeT,EAASa,OAAOC,YAAa,CAAEC,MAAO,WAE7DrC,OAAO+B,eAAeT,EAAS,aAAc,CAAEe,OAAO,KAQvDnB,EAAoBoB,EAAI,SAASD,EAAOE,GAEvC,GADU,EAAPA,IAAUF,EAAQnB,EAAoBmB,IAC/B,EAAPE,EAAU,OAAOF,EACpB,GAAW,EAAPE,GAA8B,kBAAVF,GAAsBA,GAASA,EAAMG,WAAY,OAAOH,EAChF,IAAII,EAAKzC,OAAO0C,OAAO,MAGvB,GAFAxB,EAAoBgB,EAAEO,GACtBzC,OAAO+B,eAAeU,EAAI,UAAW,CAAET,YAAY,EAAMK,MAAOA,IACtD,EAAPE,GAA4B,iBAATF,EAAmB,IAAI,IAAIM,KAAON,EAAOnB,EAAoBS,EAAEc,EAAIE,EAAK,SAASA,GAAO,OAAON,EAAMM,IAAQC,KAAK,KAAMD,IAC9I,OAAOF,GAIRvB,EAAoB2B,EAAI,SAAStB,GAChC,IAAIM,EAASN,GAAUA,EAAOiB,WAC7B,WAAwB,OAAOjB,EAAgB,SAC/C,WAA8B,OAAOA,GAEtC,OADAL,EAAoBS,EAAEE,EAAQ,IAAKA,GAC5BA,GAIRX,EAAoBY,EAAI,SAASgB,EAAQC,GAAY,OAAO/C,OAAOC,UAAUC,eAAeC,KAAK2C,EAAQC,IAGzG7B,EAAoB8B,EAAI,KAExB,IAAIC,EAAaC,KAAK,8BAAgCA,KAAK,+BAAiC,GACxFC,EAAmBF,EAAW5C,KAAKuC,KAAKK,GAC5CA,EAAW5C,KAAOf,EAClB2D,EAAaA,EAAWG,QACxB,IAAI,IAAIvD,EAAI,EAAGA,EAAIoD,EAAWlD,OAAQF,IAAKP,EAAqB2D,EAAWpD,IAC3E,IAAIU,EAAsB4C,EAI1BxC,I","file":"static/js/runtime-main.58369df8.js","sourcesContent":[" \t// install a JSONP callback for chunk loading\n \tfunction webpackJsonpCallback(data) {\n \t\tvar chunkIds = data[0];\n \t\tvar moreModules = data[1];\n \t\tvar executeModules = data[2];\n\n \t\t// add \"moreModules\" to the modules object,\n \t\t// then flag all \"chunkIds\" as loaded and fire callback\n \t\tvar moduleId, chunkId, i = 0, resolves = [];\n \t\tfor(;i < chunkIds.length; i++) {\n \t\t\tchunkId = chunkIds[i];\n \t\t\tif(Object.prototype.hasOwnProperty.call(installedChunks, chunkId) && installedChunks[chunkId]) {\n \t\t\t\tresolves.push(installedChunks[chunkId][0]);\n \t\t\t}\n \t\t\tinstalledChunks[chunkId] = 0;\n \t\t}\n \t\tfor(moduleId in moreModules) {\n \t\t\tif(Object.prototype.hasOwnProperty.call(moreModules, moduleId)) {\n \t\t\t\tmodules[moduleId] = moreModules[moduleId];\n \t\t\t}\n \t\t}\n \t\tif(parentJsonpFunction) parentJsonpFunction(data);\n\n \t\twhile(resolves.length) {\n \t\t\tresolves.shift()();\n \t\t}\n\n \t\t// add entry modules from loaded chunk to deferred list\n \t\tdeferredModules.push.apply(deferredModules, executeModules || []);\n\n \t\t// run deferred modules when all chunks ready\n \t\treturn checkDeferredModules();\n \t};\n \tfunction checkDeferredModules() {\n \t\tvar result;\n \t\tfor(var i = 0; i < deferredModules.length; i++) {\n \t\t\tvar deferredModule = deferredModules[i];\n \t\t\tvar fulfilled = true;\n \t\t\tfor(var j = 1; j < deferredModule.length; j++) {\n \t\t\t\tvar depId = deferredModule[j];\n \t\t\t\tif(installedChunks[depId] !== 0) fulfilled = false;\n \t\t\t}\n \t\t\tif(fulfilled) {\n \t\t\t\tdeferredModules.splice(i--, 1);\n \t\t\t\tresult = __webpack_require__(__webpack_require__.s = deferredModule[0]);\n \t\t\t}\n \t\t}\n\n \t\treturn result;\n \t}\n\n \t// The module cache\n \tvar installedModules = {};\n\n \t// object to store loaded and loading chunks\n \t// undefined = chunk not loaded, null = chunk preloaded/prefetched\n \t// Promise = chunk loading, 0 = chunk loaded\n \tvar installedChunks = {\n \t\t1: 0\n \t};\n\n \tvar deferredModules = [];\n\n \t// The require function\n \tfunction __webpack_require__(moduleId) {\n\n \t\t// Check if module is in cache\n \t\tif(installedModules[moduleId]) {\n \t\t\treturn installedModules[moduleId].exports;\n \t\t}\n \t\t// Create a new module (and put it into the cache)\n \t\tvar module = installedModules[moduleId] = {\n \t\t\ti: moduleId,\n \t\t\tl: false,\n \t\t\texports: {}\n \t\t};\n\n \t\t// Execute the module function\n \t\tmodules[moduleId].call(module.exports, module, module.exports, __webpack_require__);\n\n \t\t// Flag the module as loaded\n \t\tmodule.l = true;\n\n \t\t// Return the exports of the module\n \t\treturn module.exports;\n \t}\n\n\n \t// expose the modules object (__webpack_modules__)\n \t__webpack_require__.m = modules;\n\n \t// expose the module cache\n \t__webpack_require__.c = installedModules;\n\n \t// define getter function for harmony exports\n \t__webpack_require__.d = function(exports, name, getter) {\n \t\tif(!__webpack_require__.o(exports, name)) {\n \t\t\tObject.defineProperty(exports, name, { enumerable: true, get: getter });\n \t\t}\n \t};\n\n \t// define __esModule on exports\n \t__webpack_require__.r = function(exports) {\n \t\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n \t\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n \t\t}\n \t\tObject.defineProperty(exports, '__esModule', { value: true });\n \t};\n\n \t// create a fake namespace object\n \t// mode & 1: value is a module id, require it\n \t// mode & 2: merge all properties of value into the ns\n \t// mode & 4: return value when already ns object\n \t// mode & 8|1: behave like require\n \t__webpack_require__.t = function(value, mode) {\n \t\tif(mode & 1) value = __webpack_require__(value);\n \t\tif(mode & 8) return value;\n \t\tif((mode & 4) && typeof value === 'object' && value && value.__esModule) return value;\n \t\tvar ns = Object.create(null);\n \t\t__webpack_require__.r(ns);\n \t\tObject.defineProperty(ns, 'default', { enumerable: true, value: value });\n \t\tif(mode & 2 && typeof value != 'string') for(var key in value) __webpack_require__.d(ns, key, function(key) { return value[key]; }.bind(null, key));\n \t\treturn ns;\n \t};\n\n \t// getDefaultExport function for compatibility with non-harmony modules\n \t__webpack_require__.n = function(module) {\n \t\tvar getter = module && module.__esModule ?\n \t\t\tfunction getDefault() { return module['default']; } :\n \t\t\tfunction getModuleExports() { return module; };\n \t\t__webpack_require__.d(getter, 'a', getter);\n \t\treturn getter;\n \t};\n\n \t// Object.prototype.hasOwnProperty.call\n \t__webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); };\n\n \t// __webpack_public_path__\n \t__webpack_require__.p = \"./\";\n\n \tvar jsonpArray = this[\"webpackJsonpstreamlit-dash\"] = this[\"webpackJsonpstreamlit-dash\"] || [];\n \tvar oldJsonpFunction = jsonpArray.push.bind(jsonpArray);\n \tjsonpArray.push = webpackJsonpCallback;\n \tjsonpArray = jsonpArray.slice();\n \tfor(var i = 0; i < jsonpArray.length; i++) webpackJsonpCallback(jsonpArray[i]);\n \tvar parentJsonpFunction = oldJsonpFunction;\n\n\n \t// run deferred modules from other chunks\n \tcheckDeferredModules();\n"],"sourceRoot":""}
2 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/build/styles.css:
--------------------------------------------------------------------------------
 1 | *,
 2 | ::after,
 3 | ::before {
 4 |     box-sizing: border-box;
 5 | }
 6 | 
 7 | body {
 8 |     font-family: "Source Sans Pro", sans-serif;
 9 |     font-weight: 400;
10 |     line-height: 1.6;
11 |     text-size-adjust: 100%;
12 |     margin: 0;
13 | }
14 | 
15 | label {
16 |     font-size: 14px;
17 |     color: rgb(49, 51, 63);
18 |     margin-bottom: 0.5rem;
19 |     height: auto;
20 |     min-height: 1.5rem;
21 |     vertical-align: middle;
22 |     display: flex;
23 |     flex-direction: row;
24 |     -webkit-box-align: center;
25 |     align-items: center;
26 | }
27 | 
28 | .container {
29 |     width: 100%;
30 |     display: flex;
31 |     flex-direction: row;
32 |     flex-wrap: wrap;
33 |     gap: 0.5rem;
34 | }
35 | .item {
36 |     width: 10rem;
37 | }
38 | .item.stretch {
39 |     flex: 1;
40 | }
41 | 
42 | .image-box {
43 |     border: 1px solid rgba(49, 51, 63, 0.2);
44 |     border-radius: 0.25rem;
45 |     padding: calc(0.25rem + 1px);
46 |     height: 10rem;
47 |     min-width: 10rem;
48 | }
49 | 
50 | .image-box.dark {
51 |     border-color: rgba(250, 250, 250, 0.2);
52 |     background-color: rgb(19, 23, 32);
53 | }
54 | 
55 | .image {
56 |     width: 100%;
57 |     height: 100%;
58 |     object-fit: cover;
59 |     opacity: 0.8;
60 | }
61 | 
62 | .image-box:hover {
63 |     border-color: var(--primary-color);
64 |     cursor: pointer;
65 | }
66 | 
67 | .image:hover {
68 |     opacity: 1;
69 |     /* filter: brightness(1.1); */
70 | }
71 | 
72 | 
73 | .image-box.selected {
74 |     border-color: var(--primary-color);
75 |     border-width: 2px;
76 |     padding: 0.25rem;
77 |     /* box-shadow: rgb(255 75 75 / 50%) 0px 0px 0px 0.2rem; */
78 | }
79 | 
80 | .image.selected {
81 |     opacity: 1;
82 |     /* filter: brightness(1.1); */
83 | }
84 | 
85 | .caption {
86 |     margin-top: 0.25rem;
87 |     font-weight: 400;
88 |     font-size: 14px;
89 |     color: rgba(49, 51, 63, 0.6);
90 | }
91 | 
92 | .caption.dark {
93 |     color:rgba(250, 250, 250, 0.6);
94 | }
95 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "streamlit-dash",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "dependencies": {
 6 |     "@types/jest": "^24.0.0",
 7 |     "@types/node": "^12.0.0",
 8 |     "react-scripts": "3.4.1",
 9 |     "streamlit-component-lib": "^1.3.0",
10 |     "typescript": "~3.8.0"
11 |   },
12 |   "scripts": {
13 |     "start": "react-scripts start",
14 |     "build": "react-scripts --openssl-legacy-provider build",
15 |     "test": "react-scripts test",
16 |     "eject": "react-scripts eject"
17 |   },
18 |   "eslintConfig": {
19 |     "extends": "react-app"
20 |   },
21 |   "browserslist": {
22 |     "production": [">0.2%", "not dead", "not op_mini all"],
23 |     "development": [
24 |       "last 1 chrome version",
25 |       "last 1 firefox version",
26 |       "last 1 safari version"
27 |     ]
28 |   },
29 |   "homepage": "."
30 | }
31 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/public/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <title>streamlit-image-select</title>
 5 |     <meta charset="UTF-8" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 7 |     <meta name="theme-color" content="#000000" />
 8 |     <meta name="description" content="streamlit-image-select" />
 9 |     <link href='https://fonts.googleapis.com/css?family=Source+Sans+Pro' rel='stylesheet' type='text/css'>
10 |     <link rel="stylesheet" type="text/css" href="./styles.css" />
11 |   </head>
12 |   <body>
13 |     <noscript>You need to enable JavaScript to run this app.</noscript>
14 |   </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/public/styles.css:
--------------------------------------------------------------------------------
 1 | *,
 2 | ::after,
 3 | ::before {
 4 |     box-sizing: border-box;
 5 | }
 6 | 
 7 | body {
 8 |     font-family: "Source Sans Pro", sans-serif;
 9 |     font-weight: 400;
10 |     line-height: 1.6;
11 |     text-size-adjust: 100%;
12 |     margin: 0;
13 | }
14 | 
15 | label {
16 |     font-size: 14px;
17 |     color: rgb(49, 51, 63);
18 |     margin-bottom: 0.5rem;
19 |     height: auto;
20 |     min-height: 1.5rem;
21 |     vertical-align: middle;
22 |     display: flex;
23 |     flex-direction: row;
24 |     -webkit-box-align: center;
25 |     align-items: center;
26 | }
27 | 
28 | .container {
29 |     width: 100%;
30 |     display: flex;
31 |     flex-direction: row;
32 |     flex-wrap: wrap;
33 |     gap: 0.5rem;
34 | }
35 | .item {
36 |     width: 10rem;
37 | }
38 | .item.stretch {
39 |     flex: 1;
40 | }
41 | 
42 | .image-box {
43 |     border: 1px solid rgba(49, 51, 63, 0.2);
44 |     border-radius: 0.25rem;
45 |     padding: calc(0.25rem + 1px);
46 |     height: 10rem;
47 |     min-width: 10rem;
48 | }
49 | 
50 | .image-box.dark {
51 |     border-color: rgba(250, 250, 250, 0.2);
52 |     background-color: rgb(19, 23, 32);
53 | }
54 | 
55 | .image {
56 |     width: 100%;
57 |     height: 100%;
58 |     object-fit: cover;
59 |     opacity: 0.8;
60 | }
61 | 
62 | .image-box:hover {
63 |     border-color: var(--primary-color);
64 |     cursor: pointer;
65 | }
66 | 
67 | .image:hover {
68 |     opacity: 1;
69 |     /* filter: brightness(1.1); */
70 | }
71 | 
72 | 
73 | .image-box.selected {
74 |     border-color: var(--primary-color);
75 |     border-width: 2px;
76 |     padding: 0.25rem;
77 |     /* box-shadow: rgb(255 75 75 / 50%) 0px 0px 0px 0.2rem; */
78 | }
79 | 
80 | .image.selected {
81 |     opacity: 1;
82 |     /* filter: brightness(1.1); */
83 | }
84 | 
85 | .caption {
86 |     margin-top: 0.25rem;
87 |     font-weight: 400;
88 |     font-size: 14px;
89 |     color: rgba(49, 51, 63, 0.6);
90 | }
91 | 
92 | .caption.dark {
93 |     color:rgba(250, 250, 250, 0.6);
94 | }
95 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/src/index.tsx:
--------------------------------------------------------------------------------
  1 | import { Streamlit, RenderData } from "streamlit-component-lib"
  2 | 
  3 | const labelDiv = document.body.appendChild(document.createElement("label"))
  4 | const label = labelDiv.appendChild(document.createTextNode(""))
  5 | const container = document.body.appendChild(document.createElement("div"))
  6 | container.classList.add("container")
  7 | const selected_component_values: number[] = []
  8 | 
  9 | /**
 10 |  * The component's render function. This will be called immediately after
 11 |  * the component is initially loaded, and then again every time the
 12 |  * component gets new data from Python.
 13 |  */
 14 | function onRender(event: Event): void {
 15 |   // Get the RenderData from the event
 16 |   const data = (event as CustomEvent<RenderData>).detail
 17 | 
 18 |   if (data.theme) {
 19 |     labelDiv.style.font = data.theme.font
 20 |     labelDiv.style.color = data.theme.textColor
 21 |     if (data.theme.base === "dark") {
 22 |       document.body.querySelectorAll(".box, .caption").forEach((el) => {
 23 |         el.classList.add("dark")
 24 |       })
 25 |     } else {
 26 |       document.body.querySelectorAll(".box, .caption").forEach((el) => {
 27 |         el.classList.remove("dark")
 28 |       })
 29 |     }
 30 | 
 31 |     // TODO: Gray out the component if it's disabled.
 32 |   }
 33 | 
 34 |   label.textContent = data.args["label"]
 35 |   let images = data.args["images"]
 36 |   let captions = data.args["captions"]
 37 |   // console.log(captions)
 38 | 
 39 |   if (container.childNodes.length === 0) {
 40 |     images.forEach((image: string, i: number) => {
 41 |       let item = container.appendChild(document.createElement("div"))
 42 |       item.classList.add("item")
 43 |       if (data.args["use_container_width"] === true) {
 44 |         item.classList.add("stretch")
 45 |       }
 46 | 
 47 |       let box = item.appendChild(document.createElement("div"))
 48 |       box.classList.add("image-box")
 49 | 
 50 |       let img = box.appendChild(document.createElement("img"))
 51 |       img.classList.add("image")
 52 |       img.src = image
 53 | 
 54 |       if (captions) {
 55 |         let caption = item.appendChild(document.createElement("div"))
 56 |         caption.classList.add("caption")
 57 |         caption.textContent = captions[i]
 58 |       }
 59 | 
 60 |       // check if i is in the index array
 61 |       if (data.args["indices"] !== undefined && data.args["indices"].includes(i)) {
 62 |         box.classList.add("selected")
 63 |         img.classList.add("selected")
 64 |         selected_component_values.push(i)
 65 |       }
 66 | 
 67 |       img.onclick = function () {
 68 |         // check if the image is already selected, then un-select it and remove it from the array
 69 |         if (box.classList.contains("selected")) {
 70 |           selected_component_values.splice(selected_component_values.indexOf(i), 1)
 71 |           box.classList.remove("selected")
 72 |           img.classList.remove("selected")
 73 |         } else {
 74 |           selected_component_values.push(i)
 75 |           box.classList.add("selected")
 76 |           img.classList.add("selected")
 77 |         }
 78 |         selected_component_values.sort()
 79 |         Streamlit.setComponentValue(selected_component_values)
 80 |       }
 81 |     })
 82 |     // return selected_component_values
 83 |     Streamlit.setComponentValue(selected_component_values)
 84 |   }
 85 | 
 86 |   // We tell Streamlit to update our frameHeight after each render event, in
 87 |   // case it has changed. (This isn't strictly necessary for the example
 88 |   // because our height stays fixed, but this is a low-cost function, so
 89 |   // there's no harm in doing it redundantly.)
 90 |   Streamlit.setFrameHeight()
 91 | }
 92 | 
 93 | // Attach our `onRender` handler to Streamlit's render event.
 94 | Streamlit.events.addEventListener(Streamlit.RENDER_EVENT, onRender)
 95 | 
 96 | // Tell Streamlit we're ready to start receiving data. We won't get our
 97 | // first RENDER_EVENT until we call this function.
 98 | Streamlit.setComponentReady()
 99 | 
100 | // Finally, tell Streamlit to update our initial height. We omit the
101 | // `height` parameter here to have it default to our scrollHeight.
102 | Streamlit.setFrameHeight()
103 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/src/react-app-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="react-scripts" />
2 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/streamlit_dash/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "es5",
 4 |     "lib": ["dom", "dom.iterable", "esnext"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "esModuleInterop": true,
 8 |     "allowSyntheticDefaultImports": true,
 9 |     "strict": true,
10 |     "forceConsistentCasingInFileNames": true,
11 |     "module": "esnext",
12 |     "moduleResolution": "node",
13 |     "resolveJsonModule": true,
14 |     "isolatedModules": true,
15 |     "noEmit": true,
16 |     "jsx": "react"
17 |   },
18 |   "include": ["src"]
19 | }
20 | 


--------------------------------------------------------------------------------
/yoloexplorer/frontend/styles/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lancedb/yoloexplorer/3834fb0b26b0db5506975c5f97fc75d4f598ecc4/yoloexplorer/frontend/styles/__init__.py


--------------------------------------------------------------------------------
/yoloexplorer/yolo_predictor.py:
--------------------------------------------------------------------------------
 1 | import torch.nn.functional as F
 2 | 
 3 | from ultralytics.yolo.utils import ops, LOGGER
 4 | from ultralytics.yolo.utils.torch_utils import smart_inference_mode
 5 | from ultralytics.yolo.v8.detect.predict import DetectionPredictor
 6 | 
 7 | 
 8 | class YOLOEmbeddingsPredictor(DetectionPredictor):
 9 |     def postprocess(self, preds, img, orig_imgs):
10 |         embedding = preds[1]
11 |         embedding = F.adaptive_avg_pool2d(embedding, 2).flatten(1)
12 |         return embedding
13 | 
14 |     @smart_inference_mode()
15 |     def embed(self, source=None, model=None, verbose=True):
16 |         """Streams real-time inference on camera feed and saves results to file."""
17 |         # Setup model
18 |         if not self.model:
19 |             self.setup_model(model)
20 |         # Setup source every time predict is called
21 |         self.setup_source(source if source is not None else self.args.source)
22 | 
23 |         # Warmup model
24 |         if not self.done_warmup:
25 |             self.model.warmup(
26 |                 imgsz=(
27 |                     1 if self.model.pt or self.model.triton else self.dataset.bs,
28 |                     3,
29 |                     *self.imgsz,
30 |                 )
31 |             )
32 |             self.done_warmup = True
33 | 
34 |         self.seen, self.windows, self.batch, profilers = (
35 |             0,
36 |             [],
37 |             None,
38 |             (ops.Profile(), ops.Profile(), ops.Profile()),
39 |         )
40 |         for batch in self.dataset:
41 |             path, im0s, _, _ = batch
42 |             if verbose:
43 |                 LOGGER.info(path[0])
44 |             # Preprocess
45 |             with profilers[0]:
46 |                 im = self.preprocess(im0s)
47 | 
48 |             # Inference
49 |             with profilers[1]:
50 |                 preds = self.model(im, augment=self.args.augment, embed_from=-1)
51 | 
52 |             with profilers[2]:
53 |                 embeddings = self.postprocess(preds, im, im0s)
54 | 
55 |             return embeddings
56 |             # yielding seems pointless as this is designed specifically to be used in for loops,
57 |             # batching with embed_func would make things complex
58 | 


--------------------------------------------------------------------------------