├── notebooks ├── __init__.py ├── memery ├── memery.ipynb ├── 07_cli.ipynb ├── 05_ranker.ipynb ├── 03_encoder.ipynb ├── 04_indexer.ipynb ├── 09_streamlit_app.ipynb ├── 02_crafter.ipynb ├── 01_loader.ipynb ├── 08_jupyter_gui.ipynb ├── 00_core.ipynb ├── _visualize.ipynb └── _working_pipeline.ipynb ├── windows-run.bat ├── docs └── testimage.png ├── graphs ├── embed_d.jpg ├── embed_n.jpg ├── mde_d.gif ├── mde_n.gif ├── normalized-d.jpg ├── normalized.jpg ├── plotted_ims.jpg └── normalized-lg.jpg ├── memery ├── __init__.py ├── ranker.py ├── indexer.py ├── encoder.py ├── cli.py ├── loader.py ├── crafter.py ├── gui.py ├── streamlit_app.py └── core.py ├── images ├── E2GoeMyWEAAkcLz.jpeg ├── memes │ ├── stonks-meme.jpg │ ├── Envato-Elements.png │ ├── Shrek_screenshot.jpg │ ├── Wholesome-Meme-1.jpg │ ├── Wholesome-Meme-3.jpg │ ├── Wholesome-Meme-4.jpg │ ├── Wholesome-Meme-5.jpg │ ├── Wholesome-Meme-6.jpg │ ├── Wholesome-Meme-7.jpg │ ├── Wholesome-Meme-8.jpg │ ├── Wholesome-Meme-9.jpg │ ├── Wholesome-Meme.jpg │ ├── corrupted-file.jpeg │ ├── Wholesome-Meme-10.jpg │ ├── Wholesome-Meme-12.jpg │ ├── Wholesome-Meme-13.jpg │ ├── Wholesome-Meme-14.jpg │ ├── Wholesome-Meme-15.jpg │ ├── Wholesome-Meme-16.jpg │ ├── Wholesome-Meme-17.jpg │ ├── Wholesome-Meme-18.jpg │ ├── Wholesome-Meme-21.jpg │ ├── Wholesome-Meme-22.jpg │ ├── Wholesome-Meme-23.jpg │ ├── Wholesome-Meme-25.jpg │ ├── Wholesome-Meme-27.jpg │ ├── Wholesome-Meme-28.jpg │ ├── Wholesome-Meme-29.jpg │ ├── Wholesome-Meme-31.jpg │ ├── Wholesome-Meme-33.jpg │ ├── Wholesome-Meme-34.jpg │ ├── Wholesome-Meme-35.jpg │ ├── Wholesome-Meme-36.jpg │ ├── Wholesome-Meme-39.jpg │ ├── Wholesome-Meme-40.jpg │ ├── Wholesome-Meme-40.png │ ├── Wholesome-Meme-41.jpg │ ├── Wholesome-Meme-42.jpg │ ├── Wholesome-Meme-44.png │ ├── Wholesome-Meme-45.jpg │ ├── Wholesome-Meme-57.jpg │ ├── Wholesome-Meme-59.jpg │ ├── Wholesome-Meme-60.jpg │ ├── Wholesome-Meme-61.png │ ├── Wholesome-Meme-63.jpg │ ├── Wholesome-Meme-64.jpg │ ├── Wholesome-Meme-65.jpg │ ├── Wholesome-Meme-67.png │ ├── Wholesome-Meme-68.jpg │ ├── Wholesome-Meme-69.jpg │ ├── Wholesome-Meme-70.jpg │ ├── Wholesome-Meme-71.jpg │ ├── Wholesome-Meme-72.jpg │ ├── Wholesome-Meme-73.png │ ├── Wholesome-Meme-74.jpg │ ├── Wholesome-Meme-76.jpg │ ├── Wholesome-Meme-77.jpg │ ├── Wholesome-Meme-78.jpg │ ├── Wholesome-Meme-80.jpg │ ├── Wholesome-Meme-81.jpg │ ├── Wholesome-Meme-82.jpg │ ├── Wholesome-Meme-84.jpg │ ├── Wholesome-Meme-85.jpg │ ├── Wholesome-Meme-86.jpg │ ├── Wholesome-Meme-88.jpg │ ├── Wholesome-Meme-89.jpg │ ├── Wholesome-Meme-97.jpg │ ├── Wholesome-Meme-98.jpg │ ├── Wholesome-Meme-99.jpg │ ├── halloween-Pumpkin-min.jpg │ ├── mexican-food-concept-EXFWKZG.jpg │ ├── embarassed-dog-on-bed-SA2BDZW.jpg │ ├── love-from-the-past-PPBEUVU-min.jpg │ ├── cute-baby-touching-his-moms-face.jpeg │ ├── cute-dog-with-cupcake-P9E2YL5-min.jpg │ ├── portrait-of-happy-birthday-boy-B8VU4LZ.jpg │ ├── braydon-anderson-wOHH-NUTvVc-unsplash-min.jpg │ ├── Father-and-son-having-fun-at-the-breakfast-table.jpg │ ├── i-love-you-note-in-the-valentine-day-settings-X87BZ44.jpg │ └── happy-young-couple-eat-breakfast-in-bed-in-morning-RH4KQ72.jpg ├── jupyter-screenshot.png └── streamlit-screenshot.png ├── install ├── requirements.txt ├── windows-install.py └── cuda_install.py ├── .streamlit └── config.toml ├── pyproject.toml ├── LICENSE ├── .github └── workflows │ └── python-app.yml ├── windows-uninstall.bat ├── CONTRIBUTING.md ├── .gitignore ├── windows-install.bat └── README.md /notebooks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/memery: -------------------------------------------------------------------------------- 1 | ../memery/ -------------------------------------------------------------------------------- /windows-run.bat: -------------------------------------------------------------------------------- 1 | memery serve -------------------------------------------------------------------------------- /docs/testimage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/docs/testimage.png -------------------------------------------------------------------------------- /graphs/embed_d.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/graphs/embed_d.jpg -------------------------------------------------------------------------------- /graphs/embed_n.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/graphs/embed_n.jpg -------------------------------------------------------------------------------- /graphs/mde_d.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/graphs/mde_d.gif -------------------------------------------------------------------------------- /graphs/mde_n.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/graphs/mde_n.gif -------------------------------------------------------------------------------- /memery/__init__.py: -------------------------------------------------------------------------------- 1 | __path__ = __import__("pkgutil").extend_path(__path__, __name__) 2 | -------------------------------------------------------------------------------- /graphs/normalized-d.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/graphs/normalized-d.jpg -------------------------------------------------------------------------------- /graphs/normalized.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/graphs/normalized.jpg -------------------------------------------------------------------------------- /graphs/plotted_ims.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/graphs/plotted_ims.jpg -------------------------------------------------------------------------------- /graphs/normalized-lg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/graphs/normalized-lg.jpg -------------------------------------------------------------------------------- /images/E2GoeMyWEAAkcLz.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/E2GoeMyWEAAkcLz.jpeg -------------------------------------------------------------------------------- /images/memes/stonks-meme.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/stonks-meme.jpg -------------------------------------------------------------------------------- /images/jupyter-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/jupyter-screenshot.png -------------------------------------------------------------------------------- /install/requirements.txt: -------------------------------------------------------------------------------- 1 | packaging>=20.0 2 | poetry>=1.6.1 3 | protobuf==3.20.* 4 | setuptools>=68.2.2 5 | -------------------------------------------------------------------------------- /images/memes/Envato-Elements.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Envato-Elements.png -------------------------------------------------------------------------------- /images/memes/Shrek_screenshot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Shrek_screenshot.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-1.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-3.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-4.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-5.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-6.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-7.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-8.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-9.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme.jpg -------------------------------------------------------------------------------- /images/memes/corrupted-file.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/corrupted-file.jpeg -------------------------------------------------------------------------------- /images/streamlit-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/streamlit-screenshot.png -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-10.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-12.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-13.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-14.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-14.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-15.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-15.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-16.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-16.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-17.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-17.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-18.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-18.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-21.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-21.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-22.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-22.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-23.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-23.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-25.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-25.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-27.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-27.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-28.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-28.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-29.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-29.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-31.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-31.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-33.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-33.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-34.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-34.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-35.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-35.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-36.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-36.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-39.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-39.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-40.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-40.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-40.png -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-41.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-41.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-42.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-42.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-44.png -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-45.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-45.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-57.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-57.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-59.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-59.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-60.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-60.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-61.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-61.png -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-63.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-63.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-64.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-64.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-65.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-65.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-67.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-67.png -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-68.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-68.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-69.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-69.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-70.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-70.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-71.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-71.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-72.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-72.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-73.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-73.png -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-74.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-74.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-76.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-76.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-77.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-77.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-78.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-78.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-80.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-80.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-81.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-81.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-82.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-82.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-84.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-84.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-85.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-85.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-86.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-86.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-88.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-88.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-89.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-89.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-97.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-97.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-98.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-98.jpg -------------------------------------------------------------------------------- /images/memes/Wholesome-Meme-99.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Wholesome-Meme-99.jpg -------------------------------------------------------------------------------- /images/memes/halloween-Pumpkin-min.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/halloween-Pumpkin-min.jpg -------------------------------------------------------------------------------- /images/memes/mexican-food-concept-EXFWKZG.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/mexican-food-concept-EXFWKZG.jpg -------------------------------------------------------------------------------- /images/memes/embarassed-dog-on-bed-SA2BDZW.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/embarassed-dog-on-bed-SA2BDZW.jpg -------------------------------------------------------------------------------- /images/memes/love-from-the-past-PPBEUVU-min.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/love-from-the-past-PPBEUVU-min.jpg -------------------------------------------------------------------------------- /images/memes/cute-baby-touching-his-moms-face.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/cute-baby-touching-his-moms-face.jpeg -------------------------------------------------------------------------------- /images/memes/cute-dog-with-cupcake-P9E2YL5-min.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/cute-dog-with-cupcake-P9E2YL5-min.jpg -------------------------------------------------------------------------------- /images/memes/portrait-of-happy-birthday-boy-B8VU4LZ.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/portrait-of-happy-birthday-boy-B8VU4LZ.jpg -------------------------------------------------------------------------------- /images/memes/braydon-anderson-wOHH-NUTvVc-unsplash-min.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/braydon-anderson-wOHH-NUTvVc-unsplash-min.jpg -------------------------------------------------------------------------------- /.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | primaryColor="#55ff00" 3 | backgroundColor="#252724" 4 | secondaryBackgroundColor="#616460" 5 | textColor="#80cb59" 6 | font="monospace" 7 | -------------------------------------------------------------------------------- /images/memes/Father-and-son-having-fun-at-the-breakfast-table.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/Father-and-son-having-fun-at-the-breakfast-table.jpg -------------------------------------------------------------------------------- /images/memes/i-love-you-note-in-the-valentine-day-settings-X87BZ44.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/i-love-you-note-in-the-valentine-day-settings-X87BZ44.jpg -------------------------------------------------------------------------------- /images/memes/happy-young-couple-eat-breakfast-in-bed-in-morning-RH4KQ72.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexfazio/memery/main/images/memes/happy-young-couple-eat-breakfast-in-bed-in-morning-RH4KQ72.jpg -------------------------------------------------------------------------------- /memery/ranker.py: -------------------------------------------------------------------------------- 1 | from annoy import AnnoyIndex 2 | 3 | def ranker(query_vec, treemap: AnnoyIndex) -> list[int]: 4 | nn_indexes = treemap.get_nns_by_vector(query_vec[0], treemap.get_n_items()) 5 | return(nn_indexes) 6 | 7 | def nns_to_files(db, indexes) -> list[str]: 8 | # return([[v['fpath'] for k,v in db.items() if v['index'] == ind][0] for ind in indexes]) 9 | return([db[ind]['fpath'] for ind in indexes]) -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "memery" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["deepfates ", "wkrettek "] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.9" 9 | torch = "^2.2.0" 10 | annoy = "^1.17.0" 11 | torchvision = "^0.17.0" 12 | tqdm = "^4.64.0" 13 | Pillow = "^9.1.0" 14 | typer = "^0.4.1" 15 | streamlit = "1.3.1" 16 | clip = {git = "https://github.com/openai/CLIP", rev = "main"} 17 | ftfy = "^6.1.1" 18 | regex = "^2022.4.24" 19 | altair = "^4.0.0" 20 | numpy = "^1.24.0" 21 | protobuf = "^3.20.0" 22 | 23 | [tool.poetry.scripts] 24 | memery = "memery.cli:main" 25 | 26 | [tool.poetry.dev-dependencies] 27 | ipywidgets = "^7.7.0" 28 | ipython = "^8.3.0" 29 | 30 | [build-system] 31 | requires = ["poetry-core>=1.0.0"] 32 | build-backend = "poetry.core.masonry.api" 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 max brewer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /memery/indexer.py: -------------------------------------------------------------------------------- 1 | from annoy import AnnoyIndex 2 | import torch 3 | 4 | def join_all(db, new_files, new_embeddings) -> dict: 5 | start = len(db) 6 | for i, file in enumerate(new_files): 7 | path, hash = file 8 | index = i + start 9 | db[index] = { 10 | 'hash': hash, 11 | 'fpath': path, 12 | 'embed': new_embeddings[i], 13 | } 14 | return(db) 15 | 16 | def build_treemap(db) -> AnnoyIndex: 17 | treemap = AnnoyIndex(512, 'angular') 18 | for k, v in db.items(): 19 | treemap.add_item(k, v['embed']) 20 | 21 | # Build the treemap, with 5 trees rn 22 | treemap.build(5) 23 | 24 | return(treemap) 25 | 26 | 27 | def save_archives(root, treemap, db) -> tuple[str, str]: 28 | dbpath = root/'memery.pt' 29 | if dbpath.exists(): 30 | # dbpath.rename(root/'memery-bak.pt') 31 | dbpath.unlink() 32 | torch.save(db, dbpath) 33 | 34 | treepath = root/'memery.ann' 35 | if treepath.exists(): 36 | # treepath.rename(root/'memery-bak.ann') 37 | treepath.unlink() 38 | treemap.save(str(treepath)) 39 | 40 | return(str(dbpath), str(treepath)) -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: [ main ] 9 | pull_request: 10 | branches: [ main ] 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | build: 17 | 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python 3.10 23 | uses: actions/setup-python@v3 24 | with: 25 | python-version: "3.10" 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install flake8 pytest poetry 30 | poetry install 31 | - name: Lint with flake8 32 | run: | 33 | # stop the build if there are Python syntax errors or undefined names 34 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 35 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 36 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 37 | -------------------------------------------------------------------------------- /windows-uninstall.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal enabledelayedexpansion 3 | 4 | :: Display warning message 5 | echo WARNING this uninstalls *** ALL *** python libraries 6 | echo WARNING this deletes the poetry.lock file 7 | echo Are you sure you want to continue? y/N 8 | set /p user_input= 9 | 10 | :: Check user input 11 | if /i "%user_input%"=="y" ( 12 | echo Uninstalling Python libraries... 13 | pip freeze > installed_packages.txt 14 | 15 | :: Check if installed_packages.txt is empty 16 | for %%A in (installed_packages.txt) do ( 17 | if %%~zA==0 ( 18 | echo all pip packages uninstalled 19 | goto poetry_check 20 | ) 21 | ) 22 | 23 | pip uninstall -r installed_packages.txt -y 24 | goto poetry_check 25 | ) else ( 26 | echo Exiting... 27 | goto end 28 | ) 29 | 30 | :poetry_check 31 | :: Check if poetry is installed 32 | poetry -V >nul 2>&1 33 | if %ERRORLEVEL% equ 0 ( 34 | echo Poetry is installed, removing all environments... 35 | for /f "delims=" %%i in ('poetry env list') do poetry env remove %%i 36 | ) else ( 37 | echo Poetry is not installed, skipping poetry environment removal. 38 | ) 39 | 40 | :cleanup 41 | :: Clean up 42 | if exist installed_packages.txt del installed_packages.txt 43 | 44 | :: Delete poetry.lock if it exists 45 | if exist poetry.lock del poetry.lock 46 | 47 | :end 48 | :: End of script 49 | endlocal 50 | -------------------------------------------------------------------------------- /memery/encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import clip 3 | from clip.model import CLIP 4 | from tqdm import tqdm 5 | from torch.utils.data import DataLoader 6 | from torch import Tensor, device 7 | from torchvision.transforms import Compose 8 | 9 | def load_model(device: device) -> CLIP: 10 | model, _ = clip.load("ViT-B/32", device, jit=False) 11 | model = model.float() 12 | return(model) 13 | 14 | def image_encoder(img_loader: DataLoader, device: device, model: CLIP): 15 | image_embeddings = torch.tensor(()).to(device) 16 | with torch.no_grad(): 17 | for images, labels in tqdm(img_loader): 18 | batch_features = model.encode_image(images.to(device)) 19 | image_embeddings = torch.cat((image_embeddings, batch_features)).to(device) 20 | 21 | image_embeddings = image_embeddings / image_embeddings.norm(dim=-1, keepdim=True) 22 | return(image_embeddings) 23 | 24 | def text_encoder(text: str, device: device, model: CLIP): 25 | with torch.no_grad(): 26 | text = clip.tokenize(text).to(device) 27 | text_features = model.encode_text(text) 28 | text_features = text_features / text_features.norm(dim=-1, keepdim=True) 29 | return(text_features) 30 | 31 | def image_query_encoder(image: Tensor, device: device, model: CLIP): 32 | with torch.no_grad(): 33 | image_embed = model.encode_image(image.unsqueeze(0).to(device)) 34 | image_embed = image_embed / image_embed.norm(dim=-1, keepdim=True) 35 | return(image_embed) -------------------------------------------------------------------------------- /install/windows-install.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import platform 3 | import webbrowser 4 | from packaging import version 5 | 6 | from cuda_install import cuda_check 7 | 8 | def get_python_version(): 9 | return platform.python_version() 10 | 11 | def open_python_download_page(): 12 | webbrowser.open("https://www.python.org/downloads/") 13 | 14 | def is_poetry_installed(): 15 | try: 16 | subprocess.run(["poetry", "--version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 17 | return True 18 | except (subprocess.CalledProcessError, FileNotFoundError): 19 | return False 20 | 21 | def install_poetry(): 22 | try: 23 | subprocess.run(["pip", "install", "poetry"], check=True) 24 | print("Poetry installed successfully.") 25 | except (subprocess.CalledProcessError, FileNotFoundError) as e: 26 | print(f"Failed to install Poetry: {e}") 27 | 28 | if __name__ == "__main__": 29 | current_version = get_python_version() 30 | print(f"Found Python version: {current_version}. Project tested with Python 3.10.6.") 31 | 32 | if version.parse(current_version) < version.parse("3.9.0"): 33 | print("\033[91mProject requires Python greater than 3.9. Please install Python 3.9 or greater.\033[0m") 34 | open_python_download_page() 35 | 36 | if is_poetry_installed(): 37 | print("Poetry is already installed.") 38 | else: 39 | print("Poetry is not installed. Installing...") 40 | install_poetry() 41 | 42 | # check if cuda is installed 43 | # cuda_check("11.3.0") 44 | -------------------------------------------------------------------------------- /memery/cli.py: -------------------------------------------------------------------------------- 1 | import typer 2 | from memery.core import Memery 3 | import memery 4 | import streamlit.cli 5 | from typing import Optional 6 | # Sometimes you just want to be able to pipe information through the terminal. This is that command 7 | 8 | app = typer.Typer() 9 | 10 | def main(): 11 | app() 12 | 13 | @app.command() 14 | def recall( 15 | root: str = typer.Argument('.', help="Image folder to search"), 16 | text: str = typer.Option(None, *("-t", "--text"), help="Text query"), 17 | image: str = typer.Option(None, *("-i", "--image"), help="Filepath to image query") , 18 | number: int = typer.Option(10, *("-n", "--number"), help="Number of results to return") 19 | ) -> list[str]: 20 | """Search recursively over a folder from the command line""" 21 | memery = Memery() 22 | ranked = memery.query_flow(root, query=text, image_query=image) 23 | print(ranked[:number]) 24 | 25 | @app.command() 26 | def serve(root: Optional[str] = typer.Argument(None)): 27 | """Runs the streamlit GUI in your browser""" 28 | app_path = memery.__file__.replace('__init__.py','streamlit_app.py') 29 | if root is None: 30 | streamlit.cli.main(['run', app_path, './images']) 31 | else: 32 | streamlit.cli.main(['run', app_path, f'{root}']) 33 | 34 | @app.command() 35 | def build( 36 | root: str = typer.Argument('.'), 37 | workers: int = typer.Option(default=0) 38 | ): 39 | ''' 40 | Indexes the directory and all subdirectories 41 | ''' 42 | memery = Memery() 43 | memery.index_flow(root, num_workers=workers) 44 | return None 45 | 46 | @app.command() 47 | def purge(root: str = typer.Argument('.')): 48 | """ 49 | Cleans out all files saved by memery 50 | """ 51 | memery = Memery() 52 | memery.clean(root) 53 | print("Purged files!") 54 | 55 | if __name__ == "__main__": 56 | main() -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | ## Did you find a bug? 4 | 5 | * Ensure the bug was not already reported by searching on GitHub under Issues. 6 | * If you're unable to find an open issue addressing the problem, open a new one. Be sure to include a title and clear description, as much relevant information as possible, and a code sample or an executable test case demonstrating the expected behavior that is not occurring. 7 | * Be sure to add the complete error messages. 8 | 9 | #### Did you write a patch that fixes a bug? 10 | 11 | * Open a new GitHub pull request with the patch. 12 | * Ensure that your PR includes a test that fails without your patch, and pass with it. 13 | * Ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable. 14 | 15 | ## PR submission guidelines 16 | 17 | * Keep each PR focused. While it's more convenient, do not combine several unrelated fixes together. Create as many branches as needing to keep each PR focused. 18 | * Do not mix style changes/fixes with "functional" changes. It's very difficult to review such PRs and it most likely get rejected. 19 | * Do not add/remove vertical whitespace. Preserve the original style of the file you edit as much as you can. 20 | * Do not turn an already submitted PR into your development playground. If after you submitted PR, you discovered that more work is needed - close the PR, do the required work and then submit a new PR. Otherwise each of your commits requires attention from maintainers of the project. 21 | * If, however, you submitted a PR and received a request for changes, you should proceed with commits inside that PR, so that the maintainer can see the incremental fixes and won't need to review the whole PR again. In the exception case where you realize it'll take many many commits to complete the requests, then it's probably best to close the PR, do the work and then submit it again. Use common sense where you'd choose one way over another. -------------------------------------------------------------------------------- /notebooks/memery.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from memery.core import Memery" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "memery = Memery()\n", 19 | "ranked = memery.query_flow('../images', 'dad joke')\n", 20 | "\n", 21 | "print(ranked[:5])" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "memery = Memery()\n", 31 | "root = '../images/'\n", 32 | "db = memery.get_db(root + 'memery.pt')\n", 33 | "index = memery.get_index(root + 'memery.ann')\n", 34 | "model = memery.get_model()" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "memery.index_flow(root)\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "memery.reset_state()\n", 53 | "memery.model = None" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "memery.query_flow(root, 'Wow its already working')" 63 | ] 64 | } 65 | ], 66 | "metadata": { 67 | "interpreter": { 68 | "hash": "deeee0b52e76b5e3a563dfd39c9570f6111f9f254cd04b55dab6af9643751b0b" 69 | }, 70 | "kernelspec": { 71 | "display_name": "Python 3.9.12 ('memery-OXFjyqC6-py3.9')", 72 | "language": "python", 73 | "name": "python3" 74 | }, 75 | "language_info": { 76 | "codemirror_mode": { 77 | "name": "ipython", 78 | "version": 3 79 | }, 80 | "file_extension": ".py", 81 | "mimetype": "text/x-python", 82 | "name": "python", 83 | "nbconvert_exporter": "python", 84 | "pygments_lexer": "ipython3", 85 | "version": "3.9.6" 86 | }, 87 | "orig_nbformat": 4 88 | }, 89 | "nbformat": 4, 90 | "nbformat_minor": 2 91 | } 92 | -------------------------------------------------------------------------------- /memery/loader.py: -------------------------------------------------------------------------------- 1 | # Builtins 2 | from pathlib import Path 3 | from typing import Any 4 | 5 | # External 6 | from PIL import Image 7 | import torch 8 | from torch import device 9 | from annoy import AnnoyIndex 10 | import logging 11 | 12 | # We take the filename and last modified time to check for modified images 13 | def hash_path(filepath: str) -> str: 14 | return f'{filepath.stem}_{str(filepath.stat().st_mtime).split(".")[0]}' 15 | 16 | def get_image_files(path: Path) -> list[str]: 17 | img_extensions = {'.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'} 18 | return [(f, hash_path(f)) for f in path.rglob('*') if f.suffix in img_extensions] 19 | 20 | def get_valid_images(path: Path) -> list[str]: 21 | filepaths = get_image_files(path) 22 | return [f for f in filepaths if verify_image(f[0])] 23 | 24 | def verify_image(f: str): 25 | try: 26 | img = Image.open(f) 27 | img.verify() 28 | return(True) 29 | except Exception as e: 30 | logging.exception('Skipping bad file: %s\ndue to %s', f, e) 31 | pass 32 | 33 | def archive_loader(filepaths: list[str], db: Any) -> tuple[ set[str], list[str] ]: # Just guessing on the return type 34 | 35 | current_hashes = [hash for path, hash in filepaths] 36 | archive_db = {i:db[item[0]] for i, item in enumerate(db.items()) if item[1]['hash'] in current_hashes} 37 | archive_hashes = [v['hash'] for v in archive_db.values()] 38 | new_files = [(str(path), hash) for path, hash in filepaths if hash not in archive_hashes and verify_image(path)] 39 | 40 | return(archive_db, new_files) 41 | 42 | def db_loader(dbpath: str, device: device) -> Any: 43 | ''' 44 | Loads a .pt file 45 | ''' 46 | if Path(dbpath).exists(): 47 | db = torch.load(dbpath, device) 48 | else: 49 | db = {} 50 | return(db) 51 | 52 | def treemap_loader(treepath: str) -> AnnoyIndex: 53 | ''' 54 | Loads a .ann file 55 | ''' 56 | treemap = AnnoyIndex(512, 'angular') 57 | treepath = Path(treepath) 58 | if treepath.exists(): 59 | treemap.load(str(treepath)) 60 | else: 61 | treemap = None 62 | return(treemap) 63 | 64 | if __name__ == '__main__': 65 | print('TESTING') -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.bak 2 | .gitattributes 3 | .last_checked 4 | .gitconfig 5 | *.bak 6 | *.log 7 | *~ 8 | ~* 9 | _tmp* 10 | tmp* 11 | tags 12 | 13 | # memery files 14 | *.ann 15 | *.pt 16 | 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | env/ 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | *.egg-info/ 41 | .installed.cfg 42 | *.egg 43 | share/ 44 | bin/ 45 | etc/ 46 | 47 | # PyInstaller 48 | # Usually these files are written by a python script from a template 49 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 50 | *.manifest 51 | *.spec 52 | 53 | # Installer logs 54 | pip-log.txt 55 | pip-delete-this-directory.txt 56 | 57 | # Unit test / coverage reports 58 | htmlcov/ 59 | .tox/ 60 | .coverage 61 | .coverage.* 62 | .cache 63 | nosetests.xml 64 | coverage.xml 65 | *.cover 66 | .hypothesis/ 67 | 68 | # Translations 69 | *.mo 70 | *.pot 71 | 72 | # Django stuff: 73 | *.log 74 | local_settings.py 75 | 76 | # Flask stuff: 77 | instance/ 78 | .webassets-cache 79 | 80 | # Scrapy stuff: 81 | .scrapy 82 | 83 | # Sphinx documentation 84 | docs/_build/ 85 | 86 | # PyBuilder 87 | target/ 88 | 89 | # Jupyter Notebook 90 | .ipynb_checkpoints 91 | 92 | # pyenv 93 | .python-version 94 | 95 | # celery beat schedule file 96 | celerybeat-schedule 97 | 98 | # SageMath parsed files 99 | *.sage.py 100 | 101 | # dotenv 102 | .env 103 | 104 | # virtualenv 105 | .venv 106 | venv/ 107 | ENV/ 108 | 109 | # Spyder project settings 110 | .spyderproject 111 | .spyproject 112 | 113 | # Rope project settings 114 | .ropeproject 115 | 116 | # mkdocs documentation 117 | /site 118 | 119 | # mypy 120 | .mypy_cache/ 121 | 122 | .vscode 123 | *.swp 124 | 125 | # osx generated files 126 | .DS_Store 127 | .DS_Store? 128 | .Trashes 129 | ehthumbs.db 130 | Thumbs.db 131 | .idea 132 | 133 | # pytest 134 | .pytest_cache 135 | 136 | # tools/trust-doc-nbs 137 | docs_src/.last_checked 138 | 139 | # symlinks to fastai 140 | docs_src/fastai 141 | tools/fastai 142 | 143 | # link checker 144 | checklink/cookies.txt 145 | 146 | # .gitconfig is now autogenerated 147 | .gitconfig 148 | poetry.lock 149 | -------------------------------------------------------------------------------- /memery/crafter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor, device 3 | from torchvision.datasets import VisionDataset 4 | from PIL import Image, ImageFile 5 | from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize 6 | from torch.utils.data import DataLoader 7 | 8 | 9 | def make_dataset(new_files: list[str]) -> tuple[list[str], list[str]]: 10 | '''Returns a list of samples of a form (path_to_sample, class) and in 11 | this case the class is just the filename''' 12 | samples = [] 13 | slugs = [] 14 | for i, f in enumerate(new_files): 15 | path, slug = f 16 | samples.append((str(path), i)) 17 | slugs.append((slug, i)) 18 | return(samples, slugs) 19 | 20 | def pil_loader(path: str) -> Image.Image: 21 | ImageFile.LOAD_TRUNCATED_IMAGES = True # Allow truncated images 22 | try: 23 | # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) 24 | with open(path, 'rb') as f: 25 | img = Image.open(f) 26 | return img.convert('RGB') 27 | except Exception as e: 28 | print(f"Skipping image {path}: {e}") 29 | return None 30 | 31 | class DatasetImagePaths(VisionDataset): 32 | 33 | def __init__(self, new_files, transforms = None): 34 | super(DatasetImagePaths, self).__init__(new_files, transforms=transforms) 35 | samples, slugs = make_dataset(new_files) 36 | self.samples = samples 37 | self.slugs = slugs 38 | self.loader = pil_loader 39 | self.root = 'file dataset' 40 | def __len__(self): 41 | return(len(self.samples)) 42 | 43 | def __getitem__(self, index): 44 | path, target = self.samples[index] 45 | try: 46 | sample = self.loader(path) 47 | if sample is not None: 48 | if self.transforms is not None: 49 | sample = self.transforms(sample) 50 | return sample, target 51 | except Exception as e: 52 | print(f"Skipping file {path} due to error: {e}") 53 | return None 54 | 55 | def clip_transform(n_px: int) -> Compose: 56 | return Compose([ 57 | Resize(n_px, interpolation=Image.BICUBIC), 58 | CenterCrop(n_px), 59 | ToTensor(), 60 | Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), 61 | ]) 62 | 63 | def crafter(new_files: list[str], device: device, batch_size: int=128, num_workers: int=4): 64 | with torch.no_grad(): 65 | imagefiles=DatasetImagePaths(new_files, clip_transform(224)) 66 | img_loader=DataLoader(imagefiles, batch_size=batch_size, shuffle=False, num_workers=num_workers) 67 | return(img_loader) 68 | 69 | def preproc(img: Tensor) -> Compose: 70 | transformed = clip_transform(224)(img) 71 | return(transformed) -------------------------------------------------------------------------------- /install/cuda_install.py: -------------------------------------------------------------------------------- 1 | import platform 2 | import subprocess 3 | import torch 4 | 5 | 6 | def cuda_check(required_cuda_version: str): 7 | if torch.cuda.is_available(): 8 | print(f"Detected CUDA version: {torch.version.cuda}, torch: {torch.__version__}") 9 | else: 10 | # Only call check_and_install_cuda if CUDA is not available 11 | cuda_installed = check_and_install_cuda(required_cuda_version) 12 | if cuda_installed and torch.cuda.is_available(): 13 | print(f"Detected CUDA version: {torch.version.cuda}, torch: {torch.__version__}") 14 | 15 | def check_for_nvidia_gpu(): 16 | try: 17 | result = subprocess.run(["nvidia-smi", "-L"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) 18 | return "GPU" in result.stdout 19 | except subprocess.CalledProcessError: 20 | return False 21 | 22 | def download_and_install_cuda(required_cuda_version: str): 23 | system = platform.system() 24 | if system == "Linux": 25 | subprocess.run(["wget", f"https://developer.nvidia.com/cuda-{required_cuda_version}-download-archive"], check=True) 26 | elif system == "Windows": 27 | subprocess.run(["start", f"https://developer.nvidia.com/cuda-{required_cuda_version}-download-archive"], shell=True, check=True) 28 | elif system == "Darwin": 29 | print("Sorry, CUDA is not supported on macOS.") 30 | return False 31 | else: 32 | print("Unsupported OS.") 33 | return False 34 | print("Please follow the instructions on the opened webpage to install CUDA. After CUDA has been installed you will need to run the following commands") 35 | print("pip install torch==1.11.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html") 36 | print("pip install -e .") 37 | return True 38 | 39 | def check_and_install_cuda(required_cuda_version: str): 40 | if not check_for_nvidia_gpu(): 41 | print("No CUDA-compatible GPU detected. You must use CPU mode.") 42 | return False 43 | 44 | try: 45 | import torch 46 | installed_cuda_version = torch.version.cuda 47 | if installed_cuda_version == required_cuda_version: 48 | return True 49 | except ImportError: 50 | print("PyTorch is not installed. Unable to check CUDA version.") 51 | return False 52 | 53 | # Print in red 54 | print(f"\033[91mDetected CUDA {installed_cuda_version}, requires {required_cuda_version} for gpu.\033[0m") 55 | 56 | # Collect user input 57 | user_input = input("\033[94mUsing cpu mode by default. Install CUDA version for gpu mode? [y/N]: \033[0m\n\n") 58 | 59 | if user_input.lower() == 'y': 60 | return download_and_install_cuda(required_cuda_version) 61 | else: 62 | print("Proceeding with CPU mode.") 63 | return False 64 | -------------------------------------------------------------------------------- /windows-install.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | SETLOCAL 3 | 4 | REM Check if Python is installed and in PATH 5 | where python >nul 2>nul 6 | if %errorlevel% neq 0 ( 7 | echo Python is not installed or not in PATH. 8 | goto InstallPython 9 | ) 10 | 11 | REM Check Python version if installed 12 | for /f "tokens=*" %%i in ('python --version 2^>^&1') do set PYTHON_VERSION=%%i 13 | 14 | REM Check if the version string actually contains "Python" 15 | echo %PYTHON_VERSION% | find "Python" > nul 16 | if %errorlevel% neq 0 ( 17 | echo Python is not installed or not you do not have python > 3.9 in PATH. 18 | goto InstallPython 19 | ) 20 | 21 | set PYTHON_VERSION=%PYTHON_VERSION:~7% 22 | for /f "tokens=1,2,3 delims=." %%a in ("%PYTHON_VERSION%") do ( 23 | set Major=%%a 24 | set Minor=%%b 25 | set Patch=%%c 26 | ) 27 | echo Detected Python version: %Major%.%Minor%.%Patch% 28 | 29 | REM Perform numerical comparison to check if version is adequate 30 | if %Major% geq 3 ( 31 | if %Major% gtr 3 ( 32 | echo Python version is adequate. 33 | goto End 34 | ) else ( 35 | if %Minor% geq 9 ( 36 | echo Python version is adequate. 37 | goto End 38 | ) 39 | ) 40 | ) 41 | 42 | echo Python version is not adequate. 43 | goto InstallPython 44 | 45 | :InstallPython 46 | echo Installing Python 3.10.6... 47 | REM Note: Internet access is required for this part, and it's disabled in this session. Make sure you're connected. 48 | REM Check if Python installer already exists 49 | if not exist "%CD%\python-3.10.6-amd64.exe" ( 50 | REM Download Python 3.10.6 using curl. 51 | curl -O https://www.python.org/ftp/python/3.10.6/python-3.10.6-amd64.exe 52 | ) 53 | start /wait python-3.10.6-amd64.exe InstallAllUsers=1 PrependPath=1 54 | if %errorlevel% neq 0 ( 55 | echo Failed to install Python. 56 | exit /b 1 57 | ) 58 | echo Python has been installed, run the install script again. 59 | echo This will refresh the environment variables, ensure all other command windows are closed. 60 | pause 61 | 62 | 63 | :End 64 | ENDLOCAL 65 | 66 | echo Upgrade pip 67 | python -m pip install --upgrade pip 68 | if %errorlevel% neq 0 ( 69 | echo Failed to upgrade pip 70 | pause 71 | ) 72 | 73 | REM Install Python dependencies from requirements.txt 74 | pip install -r ./install/requirements.txt 75 | if %errorlevel% neq 0 ( 76 | echo Failed to install Python dependencies from requirements.txt. 77 | pause 78 | ) 79 | 80 | REM Check if Poetry is installed and in PATH 81 | where poetry >nul 2>nul 82 | if %errorlevel% neq 0 ( 83 | echo Poetry is not installed or not in PATH. 84 | pause 85 | ) 86 | 87 | REM Run 'poetry install' to install dependencies via Poetry 88 | poetry install 89 | if %errorlevel% neq 0 ( 90 | echo Failed to install Python dependencies via Poetry. 91 | pause 92 | ) 93 | 94 | REM Use local build folder 95 | echo Use local build folder 96 | pip install -e . 97 | 98 | REM Run the Python script 99 | python ./install/windows-install.py 100 | 101 | REM Check the exit code of the Python script 102 | if %errorlevel% neq 0 ( 103 | echo Failed to execute Python script. 104 | pause 105 | ) 106 | echo Success Installed. 107 | pause -------------------------------------------------------------------------------- /memery/gui.py: -------------------------------------------------------------------------------- 1 | # import ipywidgets as widgets 2 | 3 | # from .core import query_flow 4 | # from pathlib import Path 5 | # from IPython.display import clear_output 6 | 7 | 8 | 9 | # def get_image(file_loc): 10 | # filepath = Path(file_loc) 11 | # file = open(filepath, 'rb') 12 | # image = widgets.Image(value=file.read(),width=200) 13 | 14 | # return(image) 15 | 16 | # def get_grid(filepaths, n=4): 17 | # imgs = [get_image(f) for f in filepaths[:n] if Path(f).exists()] 18 | # grid = widgets.GridBox(imgs, layout=widgets.Layout(grid_template_columns="repeat(auto-fit, 200px)")) 19 | # return(grid) 20 | 21 | # from PIL import Image 22 | # from io import BytesIO 23 | 24 | # def update_tabs(path, query, n_images, searches, tabs, logbox, im_display_zone, image_query=None): 25 | # stem = Path(path.value).stem 26 | # slug = f"{stem}:{str(query.value)}" 27 | # if slug not in searches.keys(): 28 | # with logbox: 29 | # print(slug) 30 | # if image_query: 31 | # im_queries = [name for name, data in image_query.items()] 32 | 33 | # img = [Image.open(BytesIO(file_info['content'])).convert('RGB') for name, file_info in image_query.items()] 34 | # ranked = query_flow(path.value, query.value, image_query=img[-1]) 35 | # slug = slug + f'/{im_queries}' 36 | 37 | # if len(im_queries) > 0: 38 | # with im_display_zone: 39 | # clear_output() 40 | # display(img[-1]) 41 | # else: 42 | # ranked = query_flow(path.value, query.value) 43 | # searches[f'{slug}'] = ranked 44 | 45 | # tabs.children = [get_grid(v, n=n_images.value) for v in searches.values()] 46 | # for i, k in enumerate(searches.keys()): 47 | # tabs.set_title(i, k) 48 | # tabs.selected_index = len(searches)-1 49 | 50 | 51 | # # return(True) 52 | 53 | # class appPage(): 54 | 55 | # def __init__(self): 56 | # self.inputs_layout = widgets.Layout(max_width='80%') 57 | 58 | # self.path = widgets.Text(placeholder='path/to/image/folder', value='images/', layout=self.inputs_layout) 59 | # self.query = widgets.Text(placeholder='a funny dog meme', value='a funny dog meme', layout=self.inputs_layout) 60 | 61 | # self.image_query = widgets.FileUpload() 62 | # self.im_display_zone = widgets.Output(max_height='5rem') 63 | 64 | # self.n_images = widgets.IntSlider(description='#', value=4, layout=self.inputs_layout) 65 | # self.go = widgets.Button(description="Search", layout=self.inputs_layout) 66 | # self.logbox = widgets.Output(layout=widgets.Layout(max_width='80%', height="3rem", overflow="none")) 67 | # self.all_inputs_layout = widgets.Layout(max_width='80vw', min_height='40vh', flex_flow='row wrap', align_content='flex-start') 68 | 69 | # self.inputs = widgets.Box([self.path, self.query, self.image_query, self.n_images, self.go, self.im_display_zone, self.logbox], layout=self.all_inputs_layout) 70 | # self.tabs = widgets.Tab() 71 | # self.page = widgets.AppLayout(left_sidebar=self.inputs, center=self.tabs) 72 | 73 | # self.searches = {} 74 | # self.go.on_click(self.page_update) 75 | 76 | # display(self.page) 77 | 78 | # def page_update(self, b): 79 | 80 | # update_tabs(self.path, self.query, self.n_images, self.searches, self.tabs, self.logbox, self.im_display_zone, self.image_query.value) 81 | 82 | 83 | -------------------------------------------------------------------------------- /notebooks/07_cli.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#default_exp cli" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#hide\n", 19 | "from nbdev.showdoc import *" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# CLI" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "#export\n", 36 | "import typer\n", 37 | "import memery.core\n", 38 | "import streamlit.cli" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "#export\n", 48 | "app = typer.Typer()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "Sometimes you just want to be able to pipe information through the terminal. This is that command" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "#export\n", 65 | "@app.command()\n", 66 | "def recall(path: str, query: str, n: int = 10):\n", 67 | " \"\"\"Search recursively over a folder from the command line\"\"\"\n", 68 | " ranked = memery.core.query_flow(path, query=query)\n", 69 | " print(ranked[:n])\n", 70 | "# return(ranked)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "recall('./images', 'a funny dog meme')" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "More often, though, you probably want to sift through image visually. The `memery serve` command will open a browser app on your local device, using Streamlit library." 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "#export\n", 96 | "@app.command()\n", 97 | "def serve():\n", 98 | " \"\"\"Runs the streamlit GUI in your browser\"\"\"\n", 99 | " path = memery.__file__.replace('__init__.py','streamlit_app.py')\n", 100 | " streamlit.cli.main(['run',path])" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "# serve()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "#export \n", 119 | "def __main__():\n", 120 | " app()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [] 136 | } 137 | ], 138 | "metadata": { 139 | "kernelspec": { 140 | "display_name": "Python 3", 141 | "language": "python", 142 | "name": "python3" 143 | } 144 | }, 145 | "nbformat": 4, 146 | "nbformat_minor": 4 147 | } 148 | -------------------------------------------------------------------------------- /notebooks/05_ranker.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#default_exp ranker" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#hide\n", 19 | "from nbdev.showdoc import *" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# Ranker\n", 27 | "\n", 28 | "Takes a query and an index and finds the nearest neighbors or most similar scores. Ideally this is just a simple Annoy `get_nns_by_vector`, or in the simple case a similarity score across all the vectors." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import torch\n", 38 | "\n", 39 | "\n", 40 | "from pathlib import Path\n", 41 | "\n", 42 | "from memery.loader import treemap_loader, db_loader\n", 43 | "from memery.encoder import text_encoder" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "treemap = treemap_loader(Path('./images/memery.ann'))" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "if treemap:\n", 62 | " treemap.get_n_items()" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "#export\n", 72 | "def ranker(query_vec, treemap):\n", 73 | " nn_indexes = treemap.get_nns_by_vector(query_vec[0], treemap.get_n_items())\n", 74 | " return(nn_indexes)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "#export\n", 84 | "def nns_to_files(db, indexes):\n", 85 | "# return([[v['fpath'] for k,v in db.items() if v['index'] == ind][0] for ind in indexes])\n", 86 | " return([db[ind]['fpath'] for ind in indexes])" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 96 | "db = db_loader(Path('images/memery.pt'), device)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "query = 'dog'" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "query_vec = text_encoder(query, device)\n", 115 | "indexes = ranker(query_vec, treemap)\n", 116 | "ranked_files = nns_to_files(db, indexes)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "ranked_files[:5]" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [] 141 | } 142 | ], 143 | "metadata": { 144 | "kernelspec": { 145 | "display_name": "Python 3", 146 | "language": "python", 147 | "name": "python3" 148 | } 149 | }, 150 | "nbformat": 4, 151 | "nbformat_minor": 4 152 | } 153 | -------------------------------------------------------------------------------- /notebooks/03_encoder.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#default_exp encoder" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#hide\n", 19 | "from nbdev.showdoc import *" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# Encoder\n", 27 | "\n", 28 | "\n", 29 | "This is just a wrapper around CLIP functions. Cool thing here is we can use the one model for both image and text!\n" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "#export\n", 39 | "import torch\n", 40 | "import clip\n", 41 | "from tqdm import tqdm\n", 42 | "\n", 43 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 44 | "model, _ = clip.load(\"ViT-B/32\", device, jit=False) \n", 45 | "model = model.float()" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "#export\n", 55 | "def image_encoder(img_loader, device):\n", 56 | " image_embeddings = torch.tensor(()).to(device)\n", 57 | " with torch.no_grad():\n", 58 | " for images, labels in tqdm(img_loader):\n", 59 | " batch_features = model.encode_image(images.to(device))\n", 60 | " image_embeddings = torch.cat((image_embeddings, batch_features)).to(device)\n", 61 | " \n", 62 | " image_embeddings = image_embeddings / image_embeddings.norm(dim=-1, keepdim=True)\n", 63 | " return(image_embeddings)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "new_files = [('images/memes/Wholesome-Meme-8.jpg', 'Wholesome-Meme-8'), ('images/memes/Wholesome-Meme-1.jpg', 'Wholesome-Meme-1')]" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "from memery.crafter import crafter" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "img_loader = crafter(new_files, device)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "for images, labels in img_loader:\n", 100 | " print(images)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "image_embeddings = image_encoder(img_loader, device)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "image_embeddings.shape" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "The text encoder returns a 512d vector just like the image encoder" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "#export\n", 135 | "def text_encoder(text, device):\n", 136 | " with torch.no_grad():\n", 137 | " text = clip.tokenize(text).to(device)\n", 138 | " text_features = model.encode_text(text)\n", 139 | " text_features = text_features / text_features.norm(dim=-1, keepdim=True)\n", 140 | " return(text_features)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "text_embedding = text_encoder('a funny dog meme', device)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "text_embedding.shape" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "#export\n", 168 | "def image_query_encoder(image, device):\n", 169 | " with torch.no_grad():\n", 170 | " image_embed = model.encode_image(image.unsqueeze(0).to(device))\n", 171 | " image_embed = image_embed / image_embed.norm(dim=-1, keepdim=True)\n", 172 | " return(image_embed)" 173 | ] 174 | } 175 | ], 176 | "metadata": { 177 | "kernelspec": { 178 | "display_name": "Python 3", 179 | "language": "python", 180 | "name": "python3" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 4 185 | } 186 | -------------------------------------------------------------------------------- /memery/streamlit_app.py: -------------------------------------------------------------------------------- 1 | # Builtins 2 | from pathlib import Path 3 | from PIL import Image 4 | from io import StringIO 5 | import sys 6 | import argparse 7 | from threading import current_thread 8 | from contextlib import contextmanager 9 | 10 | # Local 11 | from memery.core import Memery 12 | 13 | # Dependencies 14 | import streamlit as st 15 | from streamlit.report_thread import REPORT_CONTEXT_ATTR_NAME 16 | 17 | 18 | # Parses the args from the command line 19 | def parse_args(args: list[str]): 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('root', help='starting directory to search') 22 | return parser.parse_args(args) 23 | 24 | # Initalize session state 25 | args = parse_args(sys.argv[1:]) 26 | if 'memery' not in st.session_state: 27 | st.session_state['memery'] = Memery() 28 | memery: Memery = st.session_state['memery'] 29 | 30 | # Configs 31 | st.set_page_config(page_title='Memery', layout="centered") 32 | 33 | # Draw the sidebar 34 | st.sidebar.title("Memery") 35 | 36 | settings = st.sidebar.expander(label="Settings", expanded=False) 37 | with settings: 38 | do_clear_cache = st.button(label="Clear Cache") 39 | num_workers = st.slider(label="Number of workers", max_value=8) 40 | 41 | dir_l, dir_r = st.sidebar.columns([3,1]) 42 | with dir_l: 43 | path = st.text_input(label='Directory', value=args.root) 44 | with dir_r: 45 | st.title("") 46 | do_index = st.button(label="Index", key='do_index') 47 | 48 | search_l, search_r = st.sidebar.columns([3,1]) 49 | with search_l: 50 | text_query = st.text_input(label='Text query', value='') 51 | negative_text_query = st.text_input(label='Negative Text query', value='') 52 | with search_r: 53 | st.title("") 54 | search_button = st.button(label="Search", key="search_button") 55 | 56 | 57 | image_query = st.sidebar.file_uploader(label='Image query') 58 | image_query_display = st.sidebar.container() 59 | if image_query: # Display the image query if there is one 60 | img = Image.open(image_query).convert('RGB') 61 | with image_query_display: 62 | st.image(img) 63 | logbox = st.sidebar.empty() 64 | skipped_files_box = st.sidebar.expander(label='Skipped files', expanded=False) 65 | 66 | # Draw the main page 67 | sizes = {'small': 115, 'medium':230, 'large':332, 'xlarge':600} 68 | l, m, r = st.columns([4,1,1]) 69 | with l: 70 | num_images = st.slider(label='Number of images',value=12) 71 | 72 | with m: 73 | size_choice = st.selectbox(label='Image width', options=[k for k in sizes.keys()], index=1) 74 | with r: 75 | captions_on = st.checkbox(label="Caption filenames", value=False) 76 | image_display_zone = st.container() 77 | 78 | # Index the directory 79 | def index(logbox, path, num_workers): 80 | if Path(path).exists(): 81 | with logbox: 82 | with st_stdout('info'): 83 | memery.index_flow(path, num_workers) 84 | else: 85 | with logbox: 86 | with st_stdout('warning'): 87 | print(f'{path} does not exist!') 88 | 89 | # Clears out the database and treemap files 90 | def clear_cache(root, logbox): 91 | memery.clean(root) 92 | with logbox: 93 | with st_stdout('info'): 94 | print("Cleaned database and index files") 95 | 96 | # Runs a search 97 | def search(root, text_query, negative_text_query, image_query, image_display_zone, skipped_files_box, num_images, captions_on, sizes, size_choice): 98 | if not Path(path).exists(): 99 | with logbox: 100 | with st_stdout('warning'): 101 | print(f'{path} does not exist!') 102 | return 103 | with logbox: 104 | with st_stdout('info'): 105 | ranked = memery.query_flow(root, text_query, negative_text_query, image_query) # Modified line 106 | ims_to_display = {} 107 | size = sizes[size_choice] 108 | for o in ranked[:num_images]: 109 | name = o.replace(path, '') 110 | try: 111 | ims_to_display[name] = Image.open(o).convert('RGB') 112 | except Exception as e: 113 | with skipped_files_box: 114 | st.warning(f'Skipping bad file: {name}\ndue to {type(e)}') 115 | pass 116 | with image_display_zone: 117 | if captions_on: 118 | st.image([o for o in ims_to_display.values()], width=size, channels='RGB', caption=[o for o in ims_to_display.keys()]) 119 | else: 120 | st.image([o for o in ims_to_display.values()], width=sizes[size_choice], channels='RGB') 121 | 122 | 123 | @contextmanager 124 | def st_redirect(src, dst): 125 | placeholder = st.empty() 126 | output_func = getattr(placeholder, dst) 127 | 128 | with StringIO() as buffer: 129 | old_write = src.write 130 | 131 | def new_write(b): 132 | if getattr(current_thread(), REPORT_CONTEXT_ATTR_NAME, None): 133 | buffer.write(b + '') 134 | output_func(buffer.getvalue() + '') 135 | else: 136 | old_write(b) 137 | 138 | try: 139 | src.write = new_write 140 | yield 141 | finally: 142 | src.write = old_write 143 | 144 | 145 | @contextmanager 146 | def st_stdout(dst): 147 | with st_redirect(sys.stdout, dst): 148 | yield 149 | 150 | 151 | @contextmanager 152 | def st_stderr(dst): 153 | with st_redirect(sys.stderr, dst): 154 | yield 155 | 156 | # Decide which actions to take 157 | if do_clear_cache: 158 | clear_cache(path, logbox) 159 | elif do_index: 160 | index(logbox, path, num_workers) 161 | elif search_button or text_query or image_query: 162 | search(path, text_query, negative_text_query, image_query, image_display_zone, skipped_files_box, num_images, captions_on, sizes, size_choice) # Modified line 163 | 164 | -------------------------------------------------------------------------------- /notebooks/04_indexer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#default_exp indexer" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#hide\n", 19 | "from nbdev.showdoc import *" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# Indexer\n", 27 | "\n", 28 | "Given a dataset of tensors, returns a dictionary archive and a treemap structure (and saves them to disk)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Joiner\n", 36 | "\n", 37 | "This executor `needs` both Encoder and Loader to send it the new and old vectors, respectively. So it needs to be preceded by the **join_all** component to make sure we're not missing new data before handing it over to the indexer -- or indexing old data that no longer exists!" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "#export\n", 47 | "def join_all(db, new_files, new_embeddings):\n", 48 | " start = len(db)\n", 49 | " for i, file in enumerate(new_files):\n", 50 | " path, slug = file\n", 51 | " index = i + start\n", 52 | " db[index] = {\n", 53 | " 'slug': slug,\n", 54 | " 'fpath': path,\n", 55 | " 'embed': new_embeddings[i],\n", 56 | " }\n", 57 | " return(db)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "import torch\n", 67 | "from pathlib import Path\n", 68 | "from memery.loader import get_image_files, db_loader, archive_loader\n", 69 | "from memery.crafter import crafter\n", 70 | "from memery.encoder import image_encoder\n", 71 | "\n", 72 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "root = Path('images/')" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "filepaths = get_image_files(root)\n", 91 | "archive_db = {}\n", 92 | "\n", 93 | "\n", 94 | "archive_db, new_files = archive_loader(filepaths, root, device)\n", 95 | "print(f\"Loaded {len(archive_db)} encodings\")\n", 96 | "print(f\"Encoding {len(new_files)} new images\")\n", 97 | "\n", 98 | "crafted_files = crafter(new_files, device)\n", 99 | "new_embeddings = image_encoder(crafted_files, device)\n", 100 | "\n", 101 | "db = join_all(archive_db, new_files, new_embeddings)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "# db = db_loader(root/'memery.pt',device)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "[o[0] for o in db.items()][:5]" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "len(db)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "Building treemap takes a long time. I don't think `annoy` uses the GPU at all?" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "#export\n", 145 | "from annoy import AnnoyIndex" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "#export\n", 155 | "def build_treemap(db):\n", 156 | " treemap = AnnoyIndex(512, 'angular')\n", 157 | " for k, v in db.items():\n", 158 | " treemap.add_item(k, v['embed'])\n", 159 | "\n", 160 | " # Build the treemap, with 5 trees rn\n", 161 | " treemap.build(5)\n", 162 | "\n", 163 | " return(treemap)\n", 164 | " " 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "t = build_treemap(db)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "t.get_n_items(), t.get_n_trees()" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "#export\n", 192 | "import torch" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "#export\n", 202 | "def save_archives(root, treemap, db):\n", 203 | " dbpath = root/'memery.pt'\n", 204 | " if dbpath.exists():\n", 205 | "# dbpath.rename(root/'memery-bak.pt')\n", 206 | " dbpath.unlink()\n", 207 | " torch.save(db, dbpath)\n", 208 | " \n", 209 | " treepath = root/'memery.ann'\n", 210 | " if treepath.exists():\n", 211 | "# treepath.rename(root/'memery-bak.ann')\n", 212 | " treepath.unlink()\n", 213 | " treemap.save(str(treepath))\n", 214 | " \n", 215 | " return(str(dbpath), str(treepath))" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "save_archives(root, t, db)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [] 233 | } 234 | ], 235 | "metadata": { 236 | "kernelspec": { 237 | "display_name": "Python 3", 238 | "language": "python", 239 | "name": "python3" 240 | } 241 | }, 242 | "nbformat": 4, 243 | "nbformat_minor": 4 244 | } 245 | -------------------------------------------------------------------------------- /memery/core.py: -------------------------------------------------------------------------------- 1 | # Builtins 2 | import time 3 | from pathlib import Path 4 | import logging 5 | 6 | # Dependencies 7 | import torch 8 | from torch import Tensor, device 9 | from torchvision.transforms import Compose 10 | from PIL import Image 11 | 12 | 13 | # Local imports 14 | from memery import loader, crafter, encoder, indexer, ranker 15 | 16 | class Memery(): 17 | def __init__(self, root: str = '.'): 18 | self.index_file = 'memery.ann' 19 | self.db_file = 'memery.pt' 20 | self.root = root 21 | self.index = None 22 | self.db = None 23 | self.model = None 24 | self.device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu") 25 | print(f"Using {self.device} for computation.") 26 | 27 | def index_flow(self, root: str, num_workers=0) -> tuple[str, str]: 28 | '''Indexes images in path, returns the location of save files''' 29 | 30 | start_time = time.time() 31 | if self.root != root: 32 | self.root = root 33 | self.reset_state() 34 | 35 | path = Path(root) 36 | if not path.is_dir(): 37 | logging.error("Invalid path: %s", root) 38 | return 39 | device = self.device 40 | 41 | # Check if we should re-index the files 42 | print("Checking files...") 43 | dbpath = path/self.db_file 44 | db = self.get_db(str(dbpath)) 45 | treepath = path/self.index_file 46 | treemap = self.get_index(str(treepath)) 47 | filepaths = loader.get_valid_images(path) 48 | 49 | db_set = set([o['hash'] for o in db.values()]) 50 | fp_set = set([o for _, o in filepaths]) 51 | 52 | if treemap == None or db_set != fp_set: 53 | archive_db = {} 54 | 55 | archive_db, new_files = loader.archive_loader(filepaths, db) 56 | print(f"Loaded {len(archive_db)} encodings") 57 | print(f"Encoding {len(new_files)} new images") 58 | 59 | # Crafting and encoding 60 | crafted_files = crafter.crafter(new_files, device, num_workers=num_workers) 61 | model = self.get_model() 62 | new_embeddings = encoder.image_encoder(crafted_files, device, model) 63 | 64 | # Reindexing 65 | db = indexer.join_all(archive_db, new_files, new_embeddings) 66 | print("Building treemap") 67 | treemap = indexer.build_treemap(db) 68 | 69 | print(f"Saving {len(db)} encodings") 70 | save_paths = indexer.save_archives(path, treemap, db) 71 | 72 | else: 73 | save_paths = (str(dbpath), str(treepath)) 74 | self.reset_state() 75 | print(f"Done in {time.time() - start_time} seconds") 76 | 77 | return(save_paths) 78 | 79 | def query_flow(self, root: str, query: str=None, negative_query: str=None, image_query: str=None, reindex: bool=False) -> list[str]: 80 | ''' 81 | Indexes a folder and returns file paths ranked by query. 82 | 83 | Parameters: 84 | path (str): Folder to search 85 | query (str): Positive search query text 86 | negative_query (str): Negative search query text 87 | image_query (Tensor): Search query image(s) 88 | reindex (bool): Reindex the folder if True 89 | Returns: 90 | list of file paths ranked by query 91 | ''' 92 | start_time = time.time() 93 | 94 | if self.root != root: 95 | self.root = root 96 | self.reset_state() 97 | path = Path(root) 98 | if not path.is_dir(): 99 | logging.error("Invalid path: %s", root) 100 | return 101 | device = self.device 102 | 103 | dbpath = path/self.db_file 104 | treepath = path/self.index_file 105 | treemap = self.get_index(treepath) 106 | db = self.get_db(dbpath) 107 | 108 | # Rebuild the tree if it doesn't exist 109 | if reindex==True or len(db) == 0 or treemap == None: 110 | print('Indexing') 111 | dbpath, treepath = self.index_flow(path) 112 | self.reset_state() 113 | treemap = self.get_index(treepath) 114 | db = self.get_db(dbpath) 115 | 116 | model = self.get_model() 117 | # Convert queries to vector 118 | print('Converting query') 119 | if image_query: 120 | image_query = Image.open(image_query).convert('RGB') 121 | img = crafter.preproc(image_query) 122 | if query and image_query: 123 | text_vec = encoder.text_encoder(query, device, model) 124 | image_vec = encoder.image_query_encoder(img, device, model) 125 | query_vec = text_vec + image_vec 126 | elif query: 127 | query_vec = encoder.text_encoder(query, device, model) 128 | if negative_query: 129 | negative_query_vec = encoder.text_encoder(negative_query, self.device, model) 130 | query_vec = query_vec - negative_query_vec # Subtract negative query vector from positive query vector 131 | elif image_query: 132 | query_vec = encoder.image_query_encoder(img, device, model) 133 | else: 134 | print('No query!') 135 | return "" 136 | 137 | # Rank db by query 138 | print(f"Searching {len(db)} images") 139 | indexes = ranker.ranker(query_vec, treemap) 140 | ranked_files = ranker.nns_to_files(db, indexes) 141 | print(f"Done in {time.time() - start_time} seconds") 142 | 143 | return(ranked_files) 144 | 145 | def clean(self, root: str) -> None: 146 | ''' 147 | Removes all files produced by Memery 148 | ''' 149 | path = Path(root) 150 | if not path.is_dir(): 151 | logging.error("Invalid path: %s", root) 152 | db_path = path/Path(self.db_file) 153 | treemap_path = path/Path(self.index_file) 154 | db_path.unlink(missing_ok=True), treemap_path.unlink(missing_ok=True) 155 | 156 | def get_model(self): 157 | ''' 158 | Gets a new clip model if not initialized 159 | ''' 160 | if self.model == None: 161 | self.model = encoder.load_model(self.device) 162 | return self.model 163 | 164 | def get_index(self, treepath: str): 165 | ''' 166 | Gets a new index if not initialized 167 | 168 | Parameters: 169 | path (str): Path to index 170 | ''' 171 | if self.index == None: 172 | self.index = loader.treemap_loader(treepath) 173 | return self.index 174 | 175 | def get_db(self, dbpath: str): 176 | ''' 177 | Gets a new db if not initialized 178 | 179 | Parameters: 180 | path (str): Path to db 181 | ''' 182 | if self.db == None: 183 | self.db = loader.db_loader(dbpath, self.device) 184 | return self.db 185 | 186 | def reset_state(self) -> None: 187 | ''' 188 | Resets the index and db 189 | ''' 190 | self.index = None 191 | self.db = None 192 | -------------------------------------------------------------------------------- /notebooks/09_streamlit_app.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#default_exp streamlit_app" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#hide\n", 19 | "from nbdev.showdoc import *" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# Streamlit app" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "Streamlit is a more convenient way to activate a quick user-facing GUI than Voila was, especially because of Voila having conflicting dependencies with nbdev.\n", 34 | "\n", 35 | "However, Streamlit wants a `.py` file instead of a notebook for development. This is kind of annoying, because to get the hot-reload effect from Streamlit we have to develop outside the notebook, but to maintain documentation (and compile with everything else) we have to keep the main source of truth right here. Perhaps a solution will present itself later; meanwhile, I have been using a scratch file `streamlit-app.py` for development and then copied it back here." 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "This is a workaround for the query_flow printing to stdout. Maybe it should be handled natively in Streamlit? " 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "#export \n", 52 | "import streamlit as st\n", 53 | "from memery import core\n", 54 | "\n", 55 | "from pathlib import Path\n", 56 | "from PIL import Image\n", 57 | "\n", 58 | "from streamlit.report_thread import REPORT_CONTEXT_ATTR_NAME\n", 59 | "from threading import current_thread\n", 60 | "from contextlib import contextmanager\n", 61 | "from io import StringIO\n", 62 | "import sys" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "#export \n", 72 | "@contextmanager\n", 73 | "def st_redirect(src, dst):\n", 74 | " placeholder = st.empty()\n", 75 | " output_func = getattr(placeholder, dst)\n", 76 | "\n", 77 | " with StringIO() as buffer:\n", 78 | " old_write = src.write\n", 79 | "\n", 80 | " def new_write(b):\n", 81 | " if getattr(current_thread(), REPORT_CONTEXT_ATTR_NAME, None):\n", 82 | " buffer.write(b + '')\n", 83 | " output_func(buffer.getvalue() + '')\n", 84 | " else:\n", 85 | " old_write(b)\n", 86 | "\n", 87 | " try:\n", 88 | " src.write = new_write\n", 89 | " yield\n", 90 | " finally:\n", 91 | " src.write = old_write\n", 92 | "\n", 93 | "\n", 94 | "@contextmanager\n", 95 | "def st_stdout(dst):\n", 96 | " with st_redirect(sys.stdout, dst):\n", 97 | " yield\n", 98 | "\n", 99 | "\n", 100 | "@contextmanager\n", 101 | "def st_stderr(dst):\n", 102 | " with st_redirect(sys.stderr, dst):\n", 103 | " yield" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "Trying to make good use of streamlit's caching service here; if the search query and folder are the same as a previous search, it will serve the cached version. Might present some breakage points though, yet to see." 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "#export\n", 120 | "@st.cache\n", 121 | "def send_image_query(path, text_query, image_query):\n", 122 | " ranked = core.query_flow(path, text_query, image_query=img)\n", 123 | " return(ranked)\n", 124 | "\n", 125 | "@st.cache\n", 126 | "def send_text_query(path, text_query):\n", 127 | " ranked = core.query_flow(path, text_query)\n", 128 | " return(ranked)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "This is the sidebar content" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "#export\n", 145 | "st.sidebar.title(\"Memery\")\n", 146 | "\n", 147 | "path = st.sidebar.text_input(label='Directory', value='./images')\n", 148 | "text_query = st.sidebar.text_input(label='Text query', value='')\n", 149 | "image_query = st.sidebar.file_uploader(label='Image query')\n", 150 | "im_display_zone = st.sidebar.beta_container()\n", 151 | "logbox = st.sidebar.beta_container()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "The image grid parameters" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "#export\n", 168 | "sizes = {'small': 115, 'medium':230, 'large':332, 'xlarge':600}\n", 169 | "\n", 170 | "l, m, r = st.beta_columns([4,1,1])\n", 171 | "with l:\n", 172 | " num_images = st.slider(label='Number of images',value=12)\n", 173 | "with m:\n", 174 | " size_choice = st.selectbox(label='Image width', options=[k for k in sizes.keys()], index=1)\n", 175 | "with r:\n", 176 | " captions_on = st.checkbox(label=\"Caption filenames\", value=False)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "And the main event loop, triggered every time the query parameters change.\n", 184 | "\n", 185 | "This doesn't really work in Jupyter at all. Hope it does once it's compiled." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "#export\n", 195 | "if text_query or image_query:\n", 196 | " with logbox:\n", 197 | " with st_stdout('info'):\n", 198 | " if image_query is not None:\n", 199 | " img = Image.open(image_query).convert('RGB')\n", 200 | " with im_display_zone:\n", 201 | " st.image(img)\n", 202 | " ranked = send_image_query(path, text_query, image_query)\n", 203 | " else:\n", 204 | " ranked = send_text_query(path, text_query)\n", 205 | " ims = [Image.open(o).convert('RGB') for o in ranked[:num_images]]\n", 206 | " names = [o.replace(path, '') for o in ranked[:num_images]]\n", 207 | "\n", 208 | " if captions_on:\n", 209 | " images = st.image(ims, width=sizes[size_choice], channels='RGB', caption=names)\n", 210 | " else:\n", 211 | " images = st.image(ims, width=sizes[size_choice], channels='RGB')" 212 | ] 213 | } 214 | ], 215 | "metadata": { 216 | "kernelspec": { 217 | "display_name": "Python 3", 218 | "language": "python", 219 | "name": "python3" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 4 224 | } 225 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Memery 2 | > Use human language to search your image folders! 3 | 4 | ## What is memery? 5 | 6 | ![meme about having too many memes](images/E2GoeMyWEAAkcLz.jpeg) 7 | 8 | The problem: you have a huge folder of images. Memes, screenshots, datasets, product photos, inspo albums, anything. You know that somewhere in that folder is the exact image you want, but you can't remember the filename or what day you saved it. There's nothing you can do but scroll through the folder, skimming hundreds of thumbnails, hoping you don't accidentally miss it, and that you'll recognize it when you do see it. 9 | 10 | Humans do this amazingly well. But even with computers, local image search is still a manual effort - you're still sorting through folders of images, like an archivist of old. 11 | 12 | **Now there's Memery**. 13 | 14 | The `memery` package provides natural language search over local images. You can use it to search for things like "a line drawing of a woman facing to the left" and get _reasonably good results!_ 15 | 16 | You can do this over thousands of images (it's not optimized for performance yet, but search times scale well under O(n)). 17 | 18 | You can view the images in a browser GUI, or pipe them through command line tools. 19 | 20 | You can use `memery` or its modules in Jupyter notebooks, including GUI functions! 21 | 22 | Under the hood, `memery` makes use of **CLIP**, the [Contrastive Language-Image Pretraining transformer](https://github.com/openai/CLIP), released by OpenAI in 2021. CLIP trains a vision transformer and a language transformer to find the same latent space for images and their captions. This makes it perfect for the purpose of natural language image search. CLIP is a giant achievement, and `memery` stands on its shoulders. 23 | 24 | Outline: 25 | - Usage 26 | - Install locally 27 | - Use GUI 28 | - Use CLI 29 | - Use the library 30 | - Development 31 | - Contributing 32 | - Who works on this project 33 | 34 | ## Quickstart-Windows 35 | - Run the `windows-install.bat` file 36 | - Run the `windows-run.bat` file 37 | 38 | ## Installation 39 | 40 | With Python 3.9 or greater: 41 | 42 | From github (recommended) 43 | ``` 44 | pip install git+https://github.com/deepfates/memery.git 45 | ``` 46 | or 47 | ``` 48 | git clone https://github.com/deepfates/memery.git 49 | cd memery 50 | poetry install 51 | ``` 52 | From PyPi 53 | ``` 54 | pip install memery 55 | pip install git+https://github.com/openai/CLIP.git 56 | ``` 57 | 58 | Currently memery defaults to GPU installation. This will 59 | probably be switched in a future version. 60 | 61 | For now, if you want to run CPU-only, run the following command after installing memery: 62 | 63 | `pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html` 64 | 65 | Someday memery will be packaged in an easy to use format, but since this is a Python project it is hard to predict when that day will be. 66 | 67 | If you want to help develop memery, you'll need to clone the repo. See below. 68 | 69 | ## Usage 70 | 71 | What's your use case? 72 | 73 | **I have images and want to search them with a GUI app** 74 | 75 | ↳ Use the Browser GUI 76 | 77 | **i have a program/workflow and want to use image search as one part of it** 78 | 79 | ↳ Use as a Python module 80 | 81 | ↳ Use from command line or shell scripts 82 | 83 | **i want to improve on and/or contribute to memery development** 84 | 85 | ↳ Start by cloning the repo 86 | 87 | ### Use GUI 88 | 89 | Currently memery has a rough browser-based GUI. To launch it, run the following in a command line: 90 | 91 | ```memery serve``` 92 | 93 | or set up a desktop shortcut that points to the above command. 94 | 95 | Optionally, you can pass a directory to open on startup, like so: 96 | 97 | ```memery serve home/user/Pictures/memes``` 98 | 99 | Relative directories will also work: 100 | 101 | ``` 102 | cd ~/Pictures 103 | memery serve memes 104 | ``` 105 | 106 | The default directory passed will be `./images`, which is memery's example meme directory. 107 | 108 | Memery will open in a browser window. The interface is pretty straightforward, but it has some quirks. 109 | 110 | ![screenshot of memery GUI displaying wholesome memes](images/streamlit-screenshot.png) 111 | 112 | The sidebar on the left controls the location and query for the search. The "Directory" box requires a full directory path; unfortunately, Streamlit does not yet have a folder-picker component. The path is relative to your current working directory when you run `memery serve`. 113 | 114 | The search will run once you enter a text or image query. If you enter both text and image queries, memery will search for the combination. 115 | 116 | Beneath these widgets is the output area for temporary messages displayed with each search. Mostly this can be ignored. 117 | 118 | The right hand panel displays the images and associated options. Major errors will appear here as giant stack traces; sometimes, changing variables in the other widgets will fix these errors live. If you get a large error here it's helpful to take a screenshot and share it with us in Github Issues. 119 | 120 | ### Use CLI 121 | 122 | The memery command line matches the core functionality of memery. 123 | 124 | Use the `recall` command to search for images, passing the path and optionally passing the -n flag to control how many images are returned (default 10). Use the -t flag to pass a text query and the -i flag to pass an image query or both 125 | 126 | ``` 127 | memery recall PATH/TO/IMAGE/FOLDER -t 'text_query' -i 'PATH/TO/IMAGE.jpg' -n 20 128 | ``` 129 | 130 | You can encode and index all the images with the `build` command, optionally specifying the number of workers to build the dataset with (default 0) 131 | 132 | ``` 133 | memery build PATH/TO/IMAGE/FOLDER --workers 4 134 | ``` 135 | 136 | Clear out the encodings and index using the `purge` command 137 | 138 | ``` 139 | memery purge PATH/TO/IMAGE/FOLDER` 140 | ``` 141 | 142 | ### Use as a library 143 | 144 | The core functionality of memery is wrapped into the Memery() class 145 | 146 | ``` 147 | from memery.core import Memery 148 | memery = Memery() 149 | ``` 150 | 151 | The function currently called `query_flow` accepts a folder name and a query and returns a ranked list of image files. You can query with text or a filepath to an image or both. 152 | 153 | 154 | ``` 155 | ranked = memery.query_flow('./images', 'dad joke') 156 | 157 | print(ranked[:5]) 158 | ``` 159 | ``` 160 | Converting query 161 | Searching 82 images 162 | Done in 4.014755964279175 seconds 163 | ['images/memes/Wholesome-Meme-68.jpg', 'images/memes/Wholesome-Meme-74.jpg', 'images/memes/Wholesome-Meme-88.jpg', 'images/memes/Wholesome-Meme-78.jpg', 'images/memes/Wholesome-Meme-23.jpg'] 164 | ``` 165 | 166 | Here's the first result from that list: 167 | 168 | ![](images/memes/Wholesome-Meme-68.jpg) 169 | 170 | 171 | So that's how to use memery. Let's look at how you can help make it better. 172 | 173 | ## Development 174 | 175 | ### Pull the repo 176 | 177 | Clone this repository from Github: 178 | 179 | `git clone https://github.com/deepfates/memery.git` 180 | 181 | 182 | ### Install dependencies and memery 183 | Enter the `memery` folder and install requirements: 184 | 185 | ``` 186 | cd memery 187 | poetry install 188 | ``` 189 | 190 | And finally install your local, editable copy of memery with 191 | 192 | `pip install -e .` 193 | 194 | ## Contributing 195 | 196 | Memery is open source and you can contribute. See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on how you can help. 197 | 198 | ### Who works on this project 199 | 200 | Memery was first written by Max Anton Brewer aka @deepfates in the summer of 2021. Some commits are listed from @robotface-io but that was just me using the wrong account when I first started. 201 | 202 | Many UI and back-end improvements were added by @wkrettek in 2022! 🙌🎉🌟 203 | 204 | I wrote this to solve my own needs and learn notebook-based development. I hope it helps other people too. If you can help me make it better, please do. I welcome any contribution, guidance or criticism. 205 | 206 | **The ideal way to get support is to open an issue on Github**. However, the *fastest* way to get a response from me is probably to [direct message me on twitter](twitter.com/deepfates). 207 | 208 | -------------------------------------------------------------------------------- /notebooks/02_crafter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#default_exp crafter" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#hide\n", 19 | "from nbdev.showdoc import *" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# Crafter\n", 27 | "\n", 28 | "Takes a list of image filenames and transforms them to batches of the correct dimensions for CLIP. \n", 29 | "\n", 30 | "This executor subclasses PyTorch's VisionDataset (for its file-loading expertise) and DataLoaders. The `DatasetImagePaths` takes a list of image paths and a transfom, returns the transformed tensors when called. DataLoader does batching internally so we pass it along to the encoder in that format.\n", 31 | "\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "#export\n", 41 | "import torch\n", 42 | "from torchvision.datasets import VisionDataset\n", 43 | "from PIL import Image\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "#export\n", 53 | "def make_dataset(new_files):\n", 54 | " '''Returns a list of samples of a form (path_to_sample, class) and in \n", 55 | " this case the class is just the filename'''\n", 56 | " samples = []\n", 57 | " slugs = []\n", 58 | " for i, f in enumerate(new_files):\n", 59 | " path, slug = f\n", 60 | " samples.append((str(path), i))\n", 61 | " slugs.append((slug, i))\n", 62 | " return(samples, slugs)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "#export\n", 72 | "def pil_loader(path: str) -> Image.Image:\n", 73 | " # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)\n", 74 | " with open(path, 'rb') as f:\n", 75 | " img = Image.open(f)\n", 76 | " return img.convert('RGB')" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "#export\n", 86 | "class DatasetImagePaths(VisionDataset):\n", 87 | " def __init__(self, new_files, transforms = None):\n", 88 | " super(DatasetImagePaths, self).__init__(new_files, transforms=transforms)\n", 89 | " samples, slugs = make_dataset(new_files)\n", 90 | " self.samples = samples\n", 91 | " self.slugs = slugs\n", 92 | " self.loader = pil_loader\n", 93 | " self.root = 'file dataset'\n", 94 | " def __len__(self):\n", 95 | " return(len(self.samples))\n", 96 | " \n", 97 | " def __getitem__(self, index):\n", 98 | " path, target = self.samples[index]\n", 99 | " sample = self.loader(path)\n", 100 | " if sample is not None:\n", 101 | " if self.transforms is not None:\n", 102 | " sample = self.transforms(sample)\n", 103 | " return sample, target" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "new_files = [('images/memes/Wholesome-Meme-8.jpg', 'Wholesome-Meme-8'), ('images/memes/Wholesome-Meme-1.jpg', 'Wholesome-Meme-1')]#, ('images/corrupted-file.jpeg', 'corrupted-file.jpeg')]" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "crafted = DatasetImagePaths(new_files)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "crafted[0][0]" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "Okay, that seems to work decently. Test with transforms, which I will just find in CLIP source code and copy over, to prevent having to import CLIP in this executor." 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "#export\n", 147 | "from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "#export\n", 157 | "def clip_transform(n_px):\n", 158 | " return Compose([\n", 159 | " Resize(n_px, interpolation=Image.BICUBIC),\n", 160 | " CenterCrop(n_px),\n", 161 | " ToTensor(),\n", 162 | " Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),\n", 163 | " ])" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "Put that all together, and wrap in a DataLoader for batching. In future, need to figure out how to pick batch size and number of workers programmatically bsed on device capabilities." 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "#export\n", 180 | "def crafter(new_files, device, batch_size=128, num_workers=4): \n", 181 | " with torch.no_grad():\n", 182 | " imagefiles=DatasetImagePaths(new_files, clip_transform(224))\n", 183 | " img_loader=torch.utils.data.DataLoader(imagefiles, batch_size=batch_size, shuffle=False, num_workers=num_workers)\n", 184 | " return(img_loader)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 194 | "device" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "crafted_files = crafter(new_files, device)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "crafted_files.batch_size, crafted_files.num_workers" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "file = new_files[1][0]" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "#export\n", 231 | "def preproc(img):\n", 232 | " transformed = clip_transform(224)(img)\n", 233 | " return(transformed)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "im = preproc([Image.open(file)][0])" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "# %matplotlib inline" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "# show_image(im)" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [] 269 | } 270 | ], 271 | "metadata": { 272 | "kernelspec": { 273 | "display_name": "Python 3", 274 | "language": "python", 275 | "name": "python3" 276 | } 277 | }, 278 | "nbformat": 4, 279 | "nbformat_minor": 4 280 | } 281 | -------------------------------------------------------------------------------- /notebooks/01_loader.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#default_exp loader" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#hide\n", 19 | "from nbdev.showdoc import *" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# Loader\n", 27 | "> Functions for finding and loading image files and saved embeddings\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## File manipulation" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "#export\n", 44 | "from pathlib import Path\n", 45 | "from PIL import Image\n", 46 | "from tqdm import tqdm" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "**NB: A lot of this implementation is too specific, especially the slugified filenames being used for dictionary IDs. Should be replaced with a better database implementation.**" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "#export\n", 63 | "def slugify(filepath):\n", 64 | " return f'{filepath.stem}_{str(filepath.stat().st_mtime).split(\".\")[0]}'\n", 65 | "\n", 66 | "def get_image_files(path):\n", 67 | " img_extensions = {'.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'}\n", 68 | " return [(f, slugify(f)) for f in tqdm(path.rglob('*')) if f.suffix in img_extensions]\n", 69 | "\n", 70 | "def get_valid_images(path):\n", 71 | " filepaths = get_image_files(path)\n", 72 | " return [f for f in filepaths if verify_image(f[0])]\n", 73 | "\n", 74 | "# This returns boolean and should be called is_valid_image or something like that\n", 75 | "def verify_image(f):\n", 76 | " try:\n", 77 | " img = Image.open(f)\n", 78 | " img.verify() \n", 79 | " return(True)\n", 80 | " except Exception as e:\n", 81 | " print(f'Skipping bad file: {f}\\ndue to {type(e)}')\n", 82 | " pass\n", 83 | " " 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "Demonstrating the usage here, not a great test though:" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "root = Path('./images')\n", 100 | "\n", 101 | "\n", 102 | "filepaths = get_image_files(root)\n", 103 | "\n", 104 | "len(filepaths)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "filepaths[:3]" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Loaders\n", 121 | "\n", 122 | "So we have a list of paths and slugified filenames from the folder. We want to see if there's an archive, so that we don't have to recalculate tensors for images we've seen before. Then we want to pass that directly to the indexer, but send the new images through the crafter and encoder first.\n", 123 | "\n" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "#export\n", 133 | "import torch\n", 134 | "import torchvision" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "We want to use the GPU, if possible, for all the pyTorch functions. But if we can't get access to it we need to fallback to CPU. Either way we call it `device` and pass it to each function in the executors that use torch." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "#export\n", 151 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 152 | "device" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "The `archive_loader` is only called in `indexFlow`. It takes the list of image files and the folder they're in (and the torch device), opens an archive if there is one" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "#export\n", 169 | "def archive_loader(filepaths, root, device):\n", 170 | " dbpath = root/'memery.pt'\n", 171 | "# dbpath_backup = root/'memery.pt'\n", 172 | " db = db_loader(dbpath, device)\n", 173 | " \n", 174 | " current_slugs = [slug for path, slug in filepaths] \n", 175 | " archive_db = {i:db[item[0]] for i, item in enumerate(db.items()) if item[1]['slug'] in current_slugs} \n", 176 | " archive_slugs = [v['slug'] for v in archive_db.values()]\n", 177 | " new_files = [(str(path), slug) for path, slug in filepaths if slug not in archive_slugs and verify_image(path)]\n", 178 | " \n", 179 | " return(archive_db, new_files)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "The `db_loader` takes a location and returns either the archive dictionary or an empty dictionary. Decomposed to its own function so it can be called separately from `archive_loader` or `queryFlow`. " 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "#export\n", 196 | "def db_loader(dbpath, device):\n", 197 | "\n", 198 | " # check for savefile or backup and extract\n", 199 | " if Path(dbpath).exists():\n", 200 | " db = torch.load(dbpath, device)\n", 201 | "# elif dbpath_backup.exists():\n", 202 | "# db = torch.load(dbpath_backup)\n", 203 | " else:\n", 204 | " db = {}\n", 205 | " return(db)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "The library `annoy`, [Approximate Nearest Neighbors Oh Yeah!](https://github.com/spotify/annoy) allows us to search through vector space for approximate matches instead of exact best-similarity matches. We sacrifice accuracy for speed, so we can search through tens of thousands of images in less than a thousand times the time it would take to search through tens of images. There's got to be a better way to put that." 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "#export\n", 222 | "from annoy import AnnoyIndex" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "#export\n", 232 | "def treemap_loader(treepath):\n", 233 | " treemap = AnnoyIndex(512, 'angular')\n", 234 | "\n", 235 | " if treepath.exists():\n", 236 | " treemap.load(str(treepath))\n", 237 | " else:\n", 238 | " treemap = None\n", 239 | " return(treemap)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "treepath = Path('images/memery.ann')" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "treemap = AnnoyIndex(512, 'angular')" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "if treepath.exists():\n", 267 | " treemap.load(str(treepath))\n", 268 | "else:\n", 269 | " treemap = None" 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": {}, 275 | "source": [ 276 | "Here we just test on the local image folder" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "archive_db, new_files = archive_loader(get_image_files(root), root, device)" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "len(archive_db), len(new_files), treemap.get_n_items()" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": null, 300 | "metadata": {}, 301 | "outputs": [], 302 | "source": [ 303 | "dbpath = root/'memery.pt'\n", 304 | "# dbpath_backup = root/'memery.pt'\n", 305 | "db = db_loader(dbpath, device)\n", 306 | "\n", 307 | "current_slugs = [slug for path, slug in filepaths] " 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "archive_db = {i:db[item[0]] for i, item in enumerate(db.items()) if item[1]['slug'] in current_slugs} " 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "len(archive_db)" 326 | ] 327 | } 328 | ], 329 | "metadata": { 330 | "kernelspec": { 331 | "display_name": "Python 3", 332 | "language": "python", 333 | "name": "python3" 334 | } 335 | }, 336 | "nbformat": 4, 337 | "nbformat_minor": 4 338 | } 339 | -------------------------------------------------------------------------------- /notebooks/08_jupyter_gui.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#default_exp gui" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#hide\n", 19 | "from nbdev.showdoc import *" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# GUI" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "#export\n", 36 | "import ipywidgets as widgets\n", 37 | "\n", 38 | "from memery.core import query_flow\n", 39 | "from pathlib import Path\n", 40 | "from IPython.display import clear_output\n", 41 | "\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "## App design\n", 49 | "\n", 50 | "So what zones do we need for a proper image search app? Two examples come to mind: https://same.energy and https://images.google.com. One is minimalist and brutalist while the other is maximalist in features and refined in design.\n", 51 | "\n", 52 | "Same.energy proves that all you need for image search is a text box, a button, and images. (At least, that's how it started off, and sometimes how it is today. They're A/B testing heavily right now, and we'll see what it evolves into.) If you click on an image result, you are now searching for that image. If you add text, it asks if you want to search for the image with text or just the image. This can lead in any hill-climbing direction the user wants, I suppose. \n", 53 | "\n", 54 | "Google Images has up to six toolbars overhanging the images, and a complicated lightbox selection window that shows the individual image with a subset of similar images below it. Nested and stacked, providing lots of specific search and filtering capabilities. Not as likely to induce a wikiwalk. They've introduced \"collections\" now, which are presumably meant to replace the \"download to random image folder\" functionality of current browsers.\n", 55 | "\n", 56 | "There's also Pinterest, of course, though their engineering is geared more toward gaming Google results than finding the right image by search. Thye have a great browse mode though, and save features. Best of all, they have a goodreads-style user tagging function that allows for a whole different way of sorting images than availableon the other sites.\n", 57 | "\n", 58 | "The functions available from these sites include:\n", 59 | "\n", 60 | "- Text query\n", 61 | "- Image query\n", 62 | "- Text and image query (totally doable with CLIP vectors)\n", 63 | "- Browse visually similar images\n", 64 | "- Save images (to cloud mostly)\n", 65 | "- Filter images by:\n", 66 | " - Size\n", 67 | " - Color\n", 68 | " - Type\n", 69 | " - Time\n", 70 | " - Usage rights\n", 71 | "- Visit homepage for image\n", 72 | "- Tagging images\n", 73 | "- Searching by tags additively\n", 74 | "- Filtering out by tags\n", 75 | "\n", 76 | "Tags and filter categories can both be simulated with CLIP vectors of text tokens like \"green\" or \"noisy\" or \"illustration\" or \"menswear\". Size of image can be inferred directly from filesize or recorded from bitmap data in the `crafter`. Images as search queries and visually similar image browser are the same function but in different user interaction modes. And image links can be to local files, rather than homepages. Saving images not as relevant in this context, though easily sending them somewhere else is. \n", 77 | "\n", 78 | "Thus there are really three projects here:\n", 79 | "- Basic app functionality with search and grid\n", 80 | "- Visually simillar image browsing and search\n", 81 | "- Tagging and filtering, auto and manual\n", 82 | "\n" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "## Basic app functionality\n", 90 | "\n", 91 | "We want a unified search bar (variable inputs and a button) and an image grid. And each search should remain accessible after it's run, so we can navigate between and compare. It would be nice to use browser-native navigation but for now, with the plan to run a notebook in Voila and serve locally, better to use `ipywidgets` Tabs mode. Eventually it would also be good to replace or upgrade `ipyplot` or better navigation, but first we should sketch out the new-tab functionality.\n", 92 | "\n", 93 | "Need a tabs output, an event loop, a dictionary of searches run, each search returning a list of filenames to be printed in a sub-output within the tab. All wrapped in a VBox with the inputs.\n" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "filepaths = ['images/memes/Wholesome-Meme-8.jpg', 'images/memes/Wholesome-Meme-1.jpg']" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "#export\n", 112 | "def get_image(file_loc):\n", 113 | " filepath = Path(file_loc)\n", 114 | " file = open(filepath, 'rb')\n", 115 | " image = widgets.Image(value=file.read(),width=200)\n", 116 | " \n", 117 | " return(image)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "display(get_image(filepaths[0]))" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "imgs = [get_image(f) for f in filepaths]" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "#export\n", 145 | "def get_grid(filepaths, n=4):\n", 146 | " imgs = [get_image(f) for f in filepaths[:n] if Path(f).exists()]\n", 147 | " grid = widgets.GridBox(imgs, layout=widgets.Layout(grid_template_columns=\"repeat(auto-fit, 200px)\"))\n", 148 | " return(grid)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "get_grid(filepaths)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "#export\n", 167 | "from PIL import Image\n", 168 | "from io import BytesIO" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "#export\n", 178 | "def update_tabs(path, query, n_images, searches, tabs, logbox, im_display_zone, image_query=None):\n", 179 | " stem = Path(path.value).stem\n", 180 | " slug = f\"{stem}:{str(query.value)}\"\n", 181 | " if slug not in searches.keys():\n", 182 | " with logbox:\n", 183 | " print(slug)\n", 184 | " if image_query:\n", 185 | " im_queries = [name for name, data in image_query.items()]\n", 186 | " \n", 187 | " img = [Image.open(BytesIO(file_info['content'])).convert('RGB') for name, file_info in image_query.items()]\n", 188 | " ranked = query_flow(path.value, query.value, image_query=img[-1])\n", 189 | " slug = slug + f'/{im_queries}'\n", 190 | " \n", 191 | " if len(im_queries) > 0:\n", 192 | " with im_display_zone:\n", 193 | " clear_output()\n", 194 | " display(img[-1])\n", 195 | " else:\n", 196 | " ranked = query_flow(path.value, query.value)\n", 197 | " searches[f'{slug}'] = ranked\n", 198 | " \n", 199 | " tabs.children = [get_grid(v, n=n_images.value) for v in searches.values()]\n", 200 | " for i, k in enumerate(searches.keys()):\n", 201 | " tabs.set_title(i, k)\n", 202 | " tabs.selected_index = len(searches)-1\n", 203 | "\n", 204 | " \n", 205 | "# return(True)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "#export\n", 215 | "class appPage():\n", 216 | " \n", 217 | " def __init__(self):\n", 218 | " self.inputs_layout = widgets.Layout(max_width='80%')\n", 219 | "\n", 220 | " self.path = widgets.Text(placeholder='path/to/image/folder', value='images/', layout=self.inputs_layout)\n", 221 | " self.query = widgets.Text(placeholder='a funny dog meme', value='a funny dog meme', layout=self.inputs_layout)\n", 222 | " \n", 223 | " self.image_query = widgets.FileUpload()\n", 224 | " self.im_display_zone = widgets.Output(max_height='5rem')\n", 225 | "\n", 226 | " self.n_images = widgets.IntSlider(description='#', value=4, layout=self.inputs_layout)\n", 227 | " self.go = widgets.Button(description=\"Search\", layout=self.inputs_layout)\n", 228 | " self.logbox = widgets.Output(layout=widgets.Layout(max_width='80%', height=\"3rem\", overflow=\"none\"))\n", 229 | " self.all_inputs_layout = widgets.Layout(max_width='80vw', min_height='40vh', flex_flow='row wrap', align_content='flex-start')\n", 230 | "\n", 231 | " self.inputs = widgets.Box([self.path, self.query, self.image_query, self.n_images, self.go, self.im_display_zone, self.logbox], layout=self.all_inputs_layout)\n", 232 | " self.tabs = widgets.Tab()\n", 233 | " self.page = widgets.AppLayout(left_sidebar=self.inputs, center=self.tabs)\n", 234 | "\n", 235 | " self.searches = {}\n", 236 | " self.go.on_click(self.page_update)\n", 237 | " \n", 238 | " display(self.page)\n", 239 | "\n", 240 | " def page_update(self, b):\n", 241 | " \n", 242 | " update_tabs(self.path, self.query, self.n_images, self.searches, self.tabs, self.logbox, self.im_display_zone, self.image_query.value)\n", 243 | "\n", 244 | " \n" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "app = appPage()" 254 | ] 255 | } 256 | ], 257 | "metadata": { 258 | "kernelspec": { 259 | "display_name": "Python 3", 260 | "language": "python", 261 | "name": "python3" 262 | } 263 | }, 264 | "nbformat": 4, 265 | "nbformat_minor": 4 266 | } 267 | -------------------------------------------------------------------------------- /notebooks/00_core.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# default_exp core" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#hide\n", 19 | "from nbdev.showdoc import *" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# Core\n", 27 | "\n", 28 | "> Index, query and save embeddings of images by folder" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Rationale\n", 36 | "\n", 37 | "**Memery takes a folder of images, and a search query, and returns a list of ranked images.**\n", 38 | "\n", 39 | "The images and query are both projected into a high-dimensional semantic space, courtesy of OpenAI's [https://github.com/openai/CLIP](https://openai.com/blog/clip/). These embeddings are indexed and treemapped using the [Annoy](https://github.com/spotify/annoy) library, which provides nearest-neighbor results for the search query. These results are then transmitted to the user interface (currently as a list of file locations).\n", 40 | "\n", 41 | "We provide various interfaces for the end user, which all call upon the function `query_flow` and `index_flow` below.\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "## Modular flow system" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "Memery uses the Neural Search design pattern as described by Han Xiao in e.g. [General Neural Elastic Search and Go Way Beyond](https://hanxiao.io/2019/07/29/Generic-Neural-Elastic-Search-From-bert-as-service-and-Go-Way-Beyond)&c.\n", 56 | "\n", 57 | "This is a system designed to be scalable and distributed if necessary. Even for a single-machine scenario, I like the functional style of it: grab data, transform it and pass it downstream, all the way from the folder to the output widget.\n", 58 | "\n", 59 | "There are two main types of operater in this pattern: **flows** and **executors**.\n", 60 | "\n", 61 | "**Flows** are specific patterns of data manipulation and storage. **Executors** are the operators that transform the data within the flow. \n", 62 | "\n", 63 | "There are two core flows to any search system: indexing, and querying. The plan here is to make executors that can be composed into flows and then compose the flows into a UI that supports querying and, to some extent, indexing as well.\n", 64 | "\n", 65 | "The core executors for this use case are:\n", 66 | " - Loader\n", 67 | " - Crafter\n", 68 | " - Encoder\n", 69 | " - Indexer\n", 70 | " - Ranker\n", 71 | " - Gateway\n", 72 | " \n", 73 | "\n", 74 | "**NB: The executors are currently implemented as functions. A future upgrade will change the names to verbs to match, or change their implementation to classes if they're going to act as nouns.**\n", 75 | "\n", 76 | "These executors are being implemented ad hoc in the flow functions, but should probably be given single entry points and have their specific logic happen within their own files. Deeper abstractions with less coupling." 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "## Flows" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "#export\n", 93 | "import time\n", 94 | "import torch\n", 95 | "\n", 96 | "from pathlib import Path\n", 97 | "from memery.loader import get_image_files, get_valid_images, archive_loader, db_loader, treemap_loader \n", 98 | "from memery.crafter import crafter, preproc\n", 99 | "from memery.encoder import image_encoder, text_encoder, image_query_encoder\n", 100 | "from memery.indexer import join_all, build_treemap, save_archives\n", 101 | "from memery.ranker import ranker, nns_to_files" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "#### Indexing" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "#export\n", 118 | "def index_flow(path):\n", 119 | " '''Indexes images in path, returns the location of save files'''\n", 120 | " root = Path(path)\n", 121 | " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 122 | " \n", 123 | " # Loading\n", 124 | " filepaths = loader.get_image_files(root)\n", 125 | " archive_db = {}\n", 126 | " \n", 127 | " archive_db, new_files = loader.archive_loader(filepaths, root, device)\n", 128 | " print(f\"Loaded {len(archive_db)} encodings\")\n", 129 | " print(f\"Encoding {len(new_files)} new images\")\n", 130 | "\n", 131 | " # Crafting and encoding\n", 132 | " crafted_files = crafter.crafter(new_files, device)\n", 133 | " new_embeddings = encoder.image_encoder(crafted_files, device)\n", 134 | " \n", 135 | " # Reindexing\n", 136 | " db = indexer.join_all(archive_db, new_files, new_embeddings)\n", 137 | " print(\"Building treemap\")\n", 138 | " t = indexer.build_treemap(db)\n", 139 | " \n", 140 | " print(f\"Saving {len(db)} encodings\")\n", 141 | " save_paths = indexer.save_archives(root, t, db)\n", 142 | "\n", 143 | " return(save_paths)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "show_doc(index_flow)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "We can index the local `images` folder to test" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "\n", 169 | "# delete the current savefile for testing purposes\n", 170 | "Path('images/memery.pt').unlink()\n", 171 | "Path('images/memery.ann').unlink()\n", 172 | "\n", 173 | "# run the index flow. returns the path\n", 174 | "save_paths = index_flow('./images')" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "assert save_paths # Returns True if the path exists\n", 184 | "save_paths" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "#### Querying" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "#export\n", 201 | "def query_flow(path, query=None, image_query=None):\n", 202 | " '''\n", 203 | " Indexes a folder and returns file paths ranked by query.\n", 204 | " \n", 205 | " Parameters:\n", 206 | " path (str): Folder to search\n", 207 | " query (str): Search query text\n", 208 | " image_query (Tensor): Search query image(s)\n", 209 | "\n", 210 | " Returns:\n", 211 | " list of file paths ranked by query\n", 212 | " '''\n", 213 | " start_time = time.time()\n", 214 | " root = Path(path)\n", 215 | " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 216 | " \n", 217 | " # Check if we should re-index the files\n", 218 | " print(\"Checking files\")\n", 219 | " dbpath = root/'memery.pt'\n", 220 | " db = loader.db_loader(dbpath, device)\n", 221 | " treepath = root/'memery.ann'\n", 222 | " treemap = treemap_loader(treepath)\n", 223 | " filepaths = get_valid_images(root)\n", 224 | "\n", 225 | " # # Rebuild the tree if it doesn't \n", 226 | " # if treemap == None or len(db) != len(filepaths):\n", 227 | " # print('Indexing')\n", 228 | " # dbpath, treepath = index_flow(root)\n", 229 | " # treemap = loader.treemap_loader(Path(treepath))\n", 230 | " # db = loader.db_loader(dbpath, device)\n", 231 | " \n", 232 | " # Convert queries to vector\n", 233 | " print('Converting query')\n", 234 | " if image_query:\n", 235 | " img = crafter.preproc(image_query)\n", 236 | " if query and image_query:\n", 237 | " text_vec = encoder.text_encoder(query, device)\n", 238 | " image_vec = encoder.image_query_encoder(img, device)\n", 239 | " query_vec = text_vec + image_vec\n", 240 | " elif query:\n", 241 | " query_vec = encoder.text_encoder(query, device)\n", 242 | " elif image_query:\n", 243 | " query_vec = encoder.image_query_encoder(img, device)\n", 244 | " else:\n", 245 | " print('No query!')\n", 246 | "\n", 247 | " # Rank db by query \n", 248 | " print(f\"Searching {len(db)} images\")\n", 249 | " indexes = ranker.ranker(query_vec, treemap)\n", 250 | " ranked_files = ranker.nns_to_files(db, indexes)\n", 251 | " \n", 252 | " print(f\"Done in {time.time() - start_time} seconds\")\n", 253 | " \n", 254 | " return(ranked_files)\n", 255 | "\n", 256 | " " 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "show_doc(query_flow)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "ranked = query_flow('./images', 'dog')\n", 275 | "\n", 276 | "print(ranked[0])\n" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "assert ranked[0] == \"images/memes/Wholesome-Meme-8.jpg\"" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "![](images/memes/Wholesome-Meme-8.jpg)\n", 293 | "\n", 294 | "*Then what?! What are the limitations of this system? What are its options? What configuration can i do if i'm a power user? Why did you organize things this way instead of a different way?*\n", 295 | "\n", 296 | "*This, and probably each of the following notebooks, would benefit from a small recording session where I try to explain it to an imaginary audience. So that I can get the narrative of how it works, and then arrange the code around that.*\n" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [] 303 | } 304 | ], 305 | "metadata": { 306 | "kernelspec": { 307 | "display_name": "Python 3", 308 | "language": "python", 309 | "name": "python3" 310 | }, 311 | "language_info": { 312 | "name": "python", 313 | "version": "3.7.7" 314 | } 315 | }, 316 | "nbformat": 4, 317 | "nbformat_minor": 4 318 | } 319 | -------------------------------------------------------------------------------- /notebooks/_visualize.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#default_exp visualizer" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#hide\n", 19 | "from nbdev.showdoc import *\n", 20 | "%matplotlib inline" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Visualize" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "---\n", 35 | "## Dimensionality Reduction\n", 36 | "\n", 37 | "One use-case for `memery` is to explore large image datasets, for cleaning and curation purposes. Sifting images by hand takes a long time, and it's near impossible to keep all the images in your mind at noce.\n", 38 | "\n", 39 | "Even with semantic search capabilities, it's hard to get an overview of all the images. CLIP sees things in many more dimensions than humans do, so no matter how many searches you run you can't be sure if you're missing some outliers you don't even know to search for.\n", 40 | "\n", 41 | "The ideal overview would be a map of all the images along all the dimensions, but we don't know how to visualize or parse 512-dimensional spaces for human brains. So we have to do dimensional reduction: find a function in some space with ≤ 3 dimensions that best emulates the 512-dim embeddings we have, and map that instead.\n", 42 | "\n", 43 | "The recent advance in dimensional reduction is Minimum Distortion Embedding, an abstraction over all types of embeddings like PCA, t-SNE, or k-means clustering. We can use the `pymde` library to embed them and `matplotlib` to draw the images as their own markers on the graph. We'll also need `torch` to process the tensors, and `memery` functions to process the database" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "import pymde\n", 53 | "import torch\n", 54 | "from pathlib import Path\n", 55 | "from memery.loader import db_loader\n", 56 | "\n", 57 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "Let's get a database of embeddings from the local folder" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "db = db_loader('images/memery.pt', device)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "db[0].keys()" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "embeds = torch.stack([v['embed'] for v in db.values()], 0)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "There are two methods to invoke with `pymde`: `preserve_neighbors` and `preserve_distances`. They create different textures in the final product. Let's see what each looks like on our sample dataset." 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "mde_n = pymde.preserve_neighbors(embeds, verbose=False, device='cuda')\n", 108 | "mde_d = pymde.preserve_distances(embeds, verbose=False, device='cuda')" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "embed_n = mde_n.embed(verbose=False, snapshot_every=1)\n", 118 | "embed_d = mde_d.embed(verbose=False, snapshot_every=1)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "pymde.plot(embed_n)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "pymde.plot(embed_d)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "mde_n.play(savepath='./graphs/mde_n.gif')" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "mde_d.play(savepath='./graphs/mde_d.gif')" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "assert embed_n.shape" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "---\n", 171 | "Now I want to plot images as markers, instead of little dots. Haven't figured out yet how to merge this with `pymde.plot` functions, so I'm doing it right in matplotlib. \n", 172 | "\n", 173 | "If we just plot the images at their coordinates, they will overlap (especially on the `preserve_neighbors` plot) so eventually maybe I can normalize the x and y axes and plot things on a grid? at least a little bit" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "import matplotlib.pyplot as plt\n", 183 | "from matplotlib.offsetbox import OffsetImage, AnnotationBbox\n", 184 | "from tqdm import tqdm" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "def plot_images_from_tensors(coords, image_paths, dpi=600, savefile = 'default.jpg', zoom=0.03):\n", 194 | " fig, ax = plt.subplots()\n", 195 | " fig.dpi = dpi\n", 196 | " fig.set_size_inches(8,8)\n", 197 | " \n", 198 | " ax.xaxis.set_visible(False) \n", 199 | " ax.yaxis.set_visible(False)\n", 200 | " \n", 201 | " cc = coords.cpu()\n", 202 | " x_max, y_max = cc.argmax(0)\n", 203 | " x_min, y_min = cc.argmin(0)\n", 204 | " \n", 205 | " low = min(cc[x_min][0], cc[y_min][1])\n", 206 | " high = max(cc[x_max][0], cc[y_max][1])\n", 207 | " sq_lim = max(abs(low), abs(high))\n", 208 | " \n", 209 | " plt.xlim(low, high)\n", 210 | " plt.ylim(low, high)\n", 211 | " \n", 212 | "# plt.xlim(-sq_lim, sq_lim)\n", 213 | "# plt.ylim(-sq_lim, sq_lim)\n", 214 | "\n", 215 | " for i, coord in tqdm(enumerate(coords)):\n", 216 | " try:\n", 217 | " x, y = coord\n", 218 | "\n", 219 | " path = str(image_paths[i])\n", 220 | " with open(path, 'rb') as image_file:\n", 221 | " image = plt.imread(image_file)\n", 222 | "\n", 223 | " im = OffsetImage(image, zoom=zoom, resample=False)\n", 224 | " im.image.axes = ax\n", 225 | " ab = AnnotationBbox(im, (x,y), frameon=False, pad=0.0,)\n", 226 | " ax.add_artist(ab)\n", 227 | " except SyntaxError:\n", 228 | " pass\n", 229 | " print(\"Drawing images as markers...\")\n", 230 | " plt.savefig(savefile)\n", 231 | " print(f'Saved image to {savefile}')\n" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [ 240 | "filenames = [v['fpath'] for v in db.values()]" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "savefile = 'graphs/embed_n.jpg'\n", 250 | "\n", 251 | "plot_images_from_tensors(embed_n, filenames, savefile=savefile)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "savefile = 'graphs/embed_d.jpg'\n", 261 | "\n", 262 | "plot_images_from_tensors(embed_d, filenames, savefile=savefile)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "I suppose it makes sense that the `preserve_neighbors` function clumps things together and the `preserve_distances` spreads them out. It's nice to see the actual distances and texture of the data, for sure. But I'd also like to be able to see them bigger, with only relative data about where they are to each other. Let's see if we can implement a normalization function and plot them again.\n", 270 | "\n", 271 | "Currently the embedding tensor is basically a list pairs of floats. Can I convert those to a set of integers that's the length of the amount of images? I don't know how to do this in matrix math so I'll try it more simply first." 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "len(embed_n)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "embed_list = [(float(x),float(y)) for x,y in embed_n]\n", 290 | "embed_dict = {k: v for k, v in zip(filenames, embed_list)}\n", 291 | "len(embed_dict)" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "def normalize_embeds(embed_dict):\n", 301 | " sort_x = {k: v[0] for k, v in sorted(embed_dict.items(), key=lambda item: item[1][0])}\n", 302 | " norm_x = {item[0]: i for i, item in enumerate(sort_x.items())}\n", 303 | " \n", 304 | " sort_y = {k: v[1] for k, v in sorted(embed_dict.items(), key=lambda item: item[1][1])}\n", 305 | " norm_y = {item[0]: i for i, item in enumerate(sort_y.items())}\n", 306 | "\n", 307 | " normalized_dict = {k: (norm_x[k], norm_y[k]) for k in embed_dict.keys()}\n", 308 | " return(normalized_dict)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "norm_dict = normalize_embeds(embed_dict)" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "len(norm_dict)" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "I probably could do that all in torch but right now I'm just going to pipe it back into tensors and put it through my plotting function:" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "norms = torch.stack([torch.tensor([x, y]) for x, y in norm_dict.values()])\n" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "plot_images_from_tensors(norms, filenames, savefile='graphs/normalized.jpg')" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "It worked!! The clusters still exist but their distances are relaxed so they can be displayed better on the graph. It's removing some information, for sure. but unclear if that is information a human needs.\n", 359 | "\n", 360 | "I wonder if it works on the `preserve_distances` method..." 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "embed_list = [(float(x),float(y)) for x,y in embed_d]\n", 377 | "embed_dict = {k: v for k, v in zip(filenames, embed_list)}\n", 378 | "norm_dict = normalize_embeds(embed_dict)\n", 379 | "norms = torch.stack([torch.tensor([x, y]) for x, y in norm_dict.values()])" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [ 388 | "plot_images_from_tensors(norms, filenames, savefile='graphs/normalized-d.jpg')" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "This looks okay. It reduces overall distances but keeps relative distances? Still not sure what the actionalbe difference between these two methods is. \n", 396 | "\n", 397 | "Well, it works okay for now. The next question is, how to incorporate it into a working GUI?\n", 398 | "\n", 399 | "I wonder how matplotlib does natively, for a much larger dataset. Let's see:\n", 400 | "\n", 401 | "# Large dataset" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "def normalize_tensors(embdgs, names):\n", 411 | " embed_list = [(float(x),float(y)) for x,y in embdgs]\n", 412 | " embed_dict = {k: v for k, v in zip(names, embed_list)}\n", 413 | " norm_dict = normalize_embeds(embed_dict)\n", 414 | " norms = torch.stack([torch.tensor([x, y]) for x, y in norm_dict.values()])\n", 415 | " return(norms)" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "metadata": {}, 422 | "outputs": [], 423 | "source": [ 424 | "db = db_loader('/home/mage/Pictures/memes/memery.pt', device)" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [ 433 | "filenames = [v['fpath'] for v in db.values()]" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": null, 439 | "metadata": {}, 440 | "outputs": [], 441 | "source": [ 442 | "clips = torch.stack([v['embed'] for v in db.values()])" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": null, 448 | "metadata": {}, 449 | "outputs": [], 450 | "source": [ 451 | "filenames[:5]" 452 | ] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "execution_count": null, 457 | "metadata": {}, 458 | "outputs": [], 459 | "source": [ 460 | "mde_lg = pymde.preserve_neighbors(clips, verbose=False, device='cuda')" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": {}, 467 | "outputs": [], 468 | "source": [ 469 | "embed_lg = mde_lg.embed(verbose=False, snapshot_every=1)" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": null, 475 | "metadata": {}, 476 | "outputs": [], 477 | "source": [ 478 | "norms_lg = normalize_tensors(embed_lg,filenames)\n", 479 | "len(norms_lg)" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": null, 485 | "metadata": {}, 486 | "outputs": [], 487 | "source": [ 488 | "plot_images_from_tensors(embed_lg, filenames, savefile='graphs/normalized-lg.jpg')" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": {}, 494 | "source": [ 495 | "---\n", 496 | "\n", 497 | "### Be careful here\n", 498 | "\n", 499 | "It is possible to use embeddings as target coordinates to delete sections of the data:" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": {}, 506 | "outputs": [], 507 | "source": [ 508 | "to_delete = []\n", 509 | "for coord, img in zip(#embedding, filenames):\n", 510 | " x, y = coord\n", 511 | " if x < -2 or y < -1:\n", 512 | " to_delete.append(img)" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": null, 518 | "metadata": {}, 519 | "outputs": [], 520 | "source": [ 521 | "len(to_delete)" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": null, 527 | "metadata": {}, 528 | "outputs": [], 529 | "source": [ 530 | "for img in to_delete:\n", 531 | " imgpath = Path(img)\n", 532 | " imgpath.unlink()" 533 | ] 534 | }, 535 | { 536 | "cell_type": "markdown", 537 | "metadata": {}, 538 | "source": [ 539 | "It worked! A better distribution and fewer of the wrong things" 540 | ] 541 | } 542 | ], 543 | "metadata": { 544 | "kernelspec": { 545 | "display_name": "Python 3", 546 | "language": "python", 547 | "name": "python3" 548 | } 549 | }, 550 | "nbformat": 4, 551 | "nbformat_minor": 4 552 | } 553 | -------------------------------------------------------------------------------- /notebooks/_working_pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Modular flow system\n", 8 | "\n", 9 | "I have decided to adapt the design system from Jina into this repo, at least for prototyping purposes. Their distributed systems approach seems quite good but is too muc complexity for me to add right away. Insetead I'm going to replicate the essential design pattern, that of Flows and Executors.\n", 10 | "\n", 11 | "**Flows** are specific patterns of data manipulation and storage. **Executors** are the operators that transform the data within the flow. \n", 12 | "\n", 13 | "There are two core flows to any search system: indexing, and querying. The plan here is to make executors that can be composed into flows and then compose the flows into a UI that supports querying and, to some extent, indexing as well.\n", 14 | "\n", 15 | "The core executors for this use case are:\n", 16 | " - Loader\n", 17 | " - Crafter\n", 18 | " - Encoder\n", 19 | " - Indexer\n", 20 | " - Ranker\n", 21 | " - Gateway\n", 22 | " \n", 23 | "In this file I try to build these so that the Jupyter notebook itself can be run as a Flow for indexing and then querying. From there it should be easy to abstract the functions and classes and messaging or whatever is necessary for microservices etc." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "execution": { 31 | "iopub.execute_input": "2021-05-17T23:26:12.215480Z", 32 | "iopub.status.busy": "2021-05-17T23:26:12.215153Z", 33 | "iopub.status.idle": "2021-05-17T23:26:12.218180Z", 34 | "shell.execute_reply": "2021-05-17T23:26:12.217661Z", 35 | "shell.execute_reply.started": "2021-05-17T23:26:12.215435Z" 36 | } 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "# move these to main function eventually but for now we're going in notebook order\n", 41 | "args = {\n", 42 | " \"path\": \"/home/mage/Pictures/memes/\",\n", 43 | " \"query\": \"scary cat\",\n", 44 | "}" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Loader\n", 52 | "\n", 53 | "The loader takes a directory or list of image files and checks them against database or checkpoint. If there is a saved checkpoint and the files haven't changed, it loads the checkpoint and sends the data directly to Ranker. If not, it sends them to Crafter. Ideally it could send new images to Crafter and load dictionary of old images at the same time, without re-encoding old images.\n", 54 | "\n", 55 | "The process of indexing could actually happen in the background while querying happens on the old index! This means putting the logic in the Flow rather than the Loader, I suppose.\n", 56 | "\n", 57 | "Maybe build dictionary `{filename_timestamp : vector}` to databse as a simple version control mechanism. Then, if any filenames exist but with a different timestamp, we load those under their own key. And we can throw out any filename_timestamp that doesn't exist, before indexing. " 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "execution": { 65 | "iopub.execute_input": "2021-05-17T23:26:12.219219Z", 66 | "iopub.status.busy": "2021-05-17T23:26:12.218988Z", 67 | "iopub.status.idle": "2021-05-17T23:26:12.221615Z", 68 | "shell.execute_reply": "2021-05-17T23:26:12.221131Z", 69 | "shell.execute_reply.started": "2021-05-17T23:26:12.219201Z" 70 | } 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "from pathlib import Path\n", 75 | "\n", 76 | "root = Path(args['path'])" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "execution": { 84 | "iopub.execute_input": "2021-05-17T23:26:12.222765Z", 85 | "iopub.status.busy": "2021-05-17T23:26:12.222612Z", 86 | "iopub.status.idle": "2021-05-17T23:26:12.225218Z", 87 | "shell.execute_reply": "2021-05-17T23:26:12.224478Z", 88 | "shell.execute_reply.started": "2021-05-17T23:26:12.222747Z" 89 | } 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "def slugify(filepath):\n", 94 | " return f'{filepath.stem}_{str(filepath.stat().st_mtime).split(\".\")[0]}'" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "execution": { 102 | "iopub.execute_input": "2021-05-17T23:26:12.226412Z", 103 | "iopub.status.busy": "2021-05-17T23:26:12.226182Z", 104 | "iopub.status.idle": "2021-05-17T23:26:12.229182Z", 105 | "shell.execute_reply": "2021-05-17T23:26:12.228619Z", 106 | "shell.execute_reply.started": "2021-05-17T23:26:12.226386Z" 107 | } 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "# filenames = path.iterdir()\n", 112 | "def get_image_files(path):\n", 113 | " return [(f, slugify(f)) for f in path.rglob('*') if f.suffix in ['.jpg', '.png', '.jpeg']]" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "execution": { 121 | "iopub.execute_input": "2021-05-17T23:26:12.230161Z", 122 | "iopub.status.busy": "2021-05-17T23:26:12.229994Z", 123 | "iopub.status.idle": "2021-05-17T23:26:12.271774Z", 124 | "shell.execute_reply": "2021-05-17T23:26:12.271310Z", 125 | "shell.execute_reply.started": "2021-05-17T23:26:12.230139Z" 126 | } 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "filepaths = get_image_files(root)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "execution": { 138 | "iopub.execute_input": "2021-05-17T23:26:12.272506Z", 139 | "iopub.status.busy": "2021-05-17T23:26:12.272342Z", 140 | "iopub.status.idle": "2021-05-17T23:26:12.279507Z", 141 | "shell.execute_reply": "2021-05-17T23:26:12.279149Z", 142 | "shell.execute_reply.started": "2021-05-17T23:26:12.272456Z" 143 | } 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "len(filepaths)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": { 154 | "execution": { 155 | "iopub.execute_input": "2021-05-17T23:26:12.280416Z", 156 | "iopub.status.busy": "2021-05-17T23:26:12.280299Z", 157 | "iopub.status.idle": "2021-05-17T23:26:12.283291Z", 158 | "shell.execute_reply": "2021-05-17T23:26:12.282872Z", 159 | "shell.execute_reply.started": "2021-05-17T23:26:12.280400Z" 160 | } 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "filepaths[:5]" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "So we have a list of paths and slugified filenames from the folder. We want to see if there's an archive, so that we don't have to recalculate tensors for images we've seen before. Then we want to pass that directly to the indexer, but send the new images through the crafter and encoder first.\n", 172 | "\n" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "execution": { 179 | "iopub.execute_input": "2021-05-12T23:37:14.029490Z", 180 | "iopub.status.busy": "2021-05-12T23:37:14.028825Z", 181 | "iopub.status.idle": "2021-05-12T23:37:14.080913Z", 182 | "shell.execute_reply": "2021-05-12T23:37:14.080380Z", 183 | "shell.execute_reply.started": "2021-05-12T23:37:14.029406Z" 184 | } 185 | }, 186 | "source": [ 187 | "But I need to separate out the logic for the crafter and encoder from the simple loading of archives and pictures. This component should only provide the dictionary of archived CLIP embeddings, the treemap (eventually) and the locations of the new images to review, and let the downstream components deal with them." 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "execution": { 195 | "iopub.execute_input": "2021-05-17T23:26:12.283975Z", 196 | "iopub.status.busy": "2021-05-17T23:26:12.283864Z", 197 | "iopub.status.idle": "2021-05-17T23:26:12.768725Z", 198 | "shell.execute_reply": "2021-05-17T23:26:12.768101Z", 199 | "shell.execute_reply.started": "2021-05-17T23:26:12.283960Z" 200 | } 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "import torch\n", 205 | "import torchvision\n", 206 | "\n", 207 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 208 | "device" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "execution": { 216 | "iopub.execute_input": "2021-05-17T23:26:12.770648Z", 217 | "iopub.status.busy": "2021-05-17T23:26:12.770436Z", 218 | "iopub.status.idle": "2021-05-17T23:26:12.775477Z", 219 | "shell.execute_reply": "2021-05-17T23:26:12.774783Z", 220 | "shell.execute_reply.started": "2021-05-17T23:26:12.770627Z" 221 | } 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "def files_archive_loader(filepaths, root, device):\n", 226 | " dbpath = root/'memery.pt'\n", 227 | "# dbpath_backup = root/'memery.pt'\n", 228 | " db = db_loader(dbpath)\n", 229 | " \n", 230 | " current_slugs = [slug for path, slug in filepaths] \n", 231 | " archive_db = {k:db[k] for k in db if k in current_slugs} \n", 232 | " archive_slugs = [v['slug'] for v in archive_db.values()]\n", 233 | " new_files = [(str(path), slug) for path, slug in filepaths if slug not in archive_slugs]\n", 234 | " \n", 235 | " return(archive_db, new_files)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "execution": { 243 | "iopub.execute_input": "2021-05-17T23:26:12.778681Z", 244 | "iopub.status.busy": "2021-05-17T23:26:12.778540Z", 245 | "iopub.status.idle": "2021-05-17T23:26:12.781397Z", 246 | "shell.execute_reply": "2021-05-17T23:26:12.780882Z", 247 | "shell.execute_reply.started": "2021-05-17T23:26:12.778662Z" 248 | } 249 | }, 250 | "outputs": [], 251 | "source": [ 252 | "def db_loader(dbpath):\n", 253 | " # check for savefile or backup and extract\n", 254 | " if dbpath.exists():\n", 255 | " db = torch.load(dbpath)\n", 256 | "# elif dbpath_backup.exists():\n", 257 | "# db = torch.load(dbpath_backup)\n", 258 | " else:\n", 259 | " db = {}\n", 260 | " return(db)" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "execution": { 268 | "iopub.execute_input": "2021-05-17T23:26:12.782479Z", 269 | "iopub.status.busy": "2021-05-17T23:26:12.782283Z", 270 | "iopub.status.idle": "2021-05-17T23:26:12.785981Z", 271 | "shell.execute_reply": "2021-05-17T23:26:12.785150Z", 272 | "shell.execute_reply.started": "2021-05-17T23:26:12.782459Z" 273 | } 274 | }, 275 | "outputs": [], 276 | "source": [ 277 | "def treemap_loader(treepath):\n", 278 | " treemap = AnnoyIndex(512, 'angular')\n", 279 | "\n", 280 | " if treepath.exists():\n", 281 | " treemap.load(str(treepath))\n", 282 | " else:\n", 283 | " treemap = None\n", 284 | " return(treemap)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": { 291 | "execution": { 292 | "iopub.execute_input": "2021-05-17T23:26:12.787044Z", 293 | "iopub.status.busy": "2021-05-17T23:26:12.786880Z", 294 | "iopub.status.idle": "2021-05-17T23:26:14.981021Z", 295 | "shell.execute_reply": "2021-05-17T23:26:14.980482Z", 296 | "shell.execute_reply.started": "2021-05-17T23:26:12.787026Z" 297 | } 298 | }, 299 | "outputs": [], 300 | "source": [ 301 | "archive_db, new_files = files_archive_loader(get_image_files(Path(args['path'])), root, device)" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": { 308 | "execution": { 309 | "iopub.execute_input": "2021-05-17T23:26:14.982030Z", 310 | "iopub.status.busy": "2021-05-17T23:26:14.981859Z", 311 | "iopub.status.idle": "2021-05-17T23:26:14.986891Z", 312 | "shell.execute_reply": "2021-05-17T23:26:14.986449Z", 313 | "shell.execute_reply.started": "2021-05-17T23:26:14.982010Z" 314 | } 315 | }, 316 | "outputs": [], 317 | "source": [ 318 | "len(archive_db)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": { 325 | "execution": { 326 | "iopub.execute_input": "2021-05-17T23:26:14.987764Z", 327 | "iopub.status.busy": "2021-05-17T23:26:14.987610Z", 328 | "iopub.status.idle": "2021-05-17T23:26:14.991252Z", 329 | "shell.execute_reply": "2021-05-17T23:26:14.990625Z", 330 | "shell.execute_reply.started": "2021-05-17T23:26:14.987747Z" 331 | } 332 | }, 333 | "outputs": [], 334 | "source": [ 335 | "len(new_files)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": { 342 | "execution": { 343 | "iopub.execute_input": "2021-05-17T23:26:14.992366Z", 344 | "iopub.status.busy": "2021-05-17T23:26:14.992157Z", 345 | "iopub.status.idle": "2021-05-17T23:26:14.996332Z", 346 | "shell.execute_reply": "2021-05-17T23:26:14.995383Z", 347 | "shell.execute_reply.started": "2021-05-17T23:26:14.992343Z" 348 | } 349 | }, 350 | "outputs": [], 351 | "source": [ 352 | "\n", 353 | "len(new_files),len(archive_db)" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "## Crafter\n", 361 | "\n", 362 | "Takes a list of image filenames and transforms them to batches of the correct dimensions for CLIP. Need to figure out a way around torchvision's loader idiosyncrasies here: currently it just loads images from subfolders, needs to operate okay if pointed at a single folder of images, or recursively, or an arbitrary list of files.\n", 363 | "\n", 364 | "Then, too, it would be nice to eventually putthis work on the client computer using torchscript or something. So that it only sends 224x224x3 images over the wire. And we only have to compute those once per image, since we're storing a database of finished vectors which should be even smaller\n" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": { 371 | "execution": { 372 | "iopub.execute_input": "2021-05-17T23:26:14.997972Z", 373 | "iopub.status.busy": "2021-05-17T23:26:14.997652Z", 374 | "iopub.status.idle": "2021-05-17T23:26:15.000660Z", 375 | "shell.execute_reply": "2021-05-17T23:26:15.000138Z", 376 | "shell.execute_reply.started": "2021-05-17T23:26:14.997945Z" 377 | } 378 | }, 379 | "outputs": [], 380 | "source": [ 381 | "from torchvision.datasets import VisionDataset\n", 382 | "from PIL import Image" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": { 389 | "execution": { 390 | "iopub.execute_input": "2021-05-17T23:26:15.005865Z", 391 | "iopub.status.busy": "2021-05-17T23:26:15.005459Z", 392 | "iopub.status.idle": "2021-05-17T23:26:15.010256Z", 393 | "shell.execute_reply": "2021-05-17T23:26:15.009011Z", 394 | "shell.execute_reply.started": "2021-05-17T23:26:15.005837Z" 395 | } 396 | }, 397 | "outputs": [], 398 | "source": [ 399 | "def make_dataset(new_files):\n", 400 | " '''Returns a list of samples of a form (path_to_sample, class) and in \n", 401 | " this case the class is just the filename'''\n", 402 | " samples = []\n", 403 | " slugs = []\n", 404 | " for i, f in enumerate(new_files):\n", 405 | " path, slug = f\n", 406 | " samples.append((str(path), i))\n", 407 | " slugs.append((slug, i))\n", 408 | " return(samples, slugs)" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": { 415 | "execution": { 416 | "iopub.execute_input": "2021-05-17T23:26:15.011746Z", 417 | "iopub.status.busy": "2021-05-17T23:26:15.011203Z", 418 | "iopub.status.idle": "2021-05-17T23:26:15.015031Z", 419 | "shell.execute_reply": "2021-05-17T23:26:15.014629Z", 420 | "shell.execute_reply.started": "2021-05-17T23:26:15.011681Z" 421 | } 422 | }, 423 | "outputs": [], 424 | "source": [ 425 | "def pil_loader(path: str) -> Image.Image:\n", 426 | " # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)\n", 427 | " with open(path, 'rb') as f:\n", 428 | " img = Image.open(f)\n", 429 | " return img.convert('RGB')" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": null, 435 | "metadata": { 436 | "execution": { 437 | "iopub.execute_input": "2021-05-17T23:26:15.015970Z", 438 | "iopub.status.busy": "2021-05-17T23:26:15.015807Z", 439 | "iopub.status.idle": "2021-05-17T23:26:15.020168Z", 440 | "shell.execute_reply": "2021-05-17T23:26:15.019612Z", 441 | "shell.execute_reply.started": "2021-05-17T23:26:15.015951Z" 442 | } 443 | }, 444 | "outputs": [], 445 | "source": [ 446 | "class DatasetImagePaths(VisionDataset):\n", 447 | " def __init__(self, new_files, transforms = None):\n", 448 | " super(DatasetImagePaths, self).__init__(new_files, transforms=transforms)\n", 449 | " samples, slugs = make_dataset(new_files)\n", 450 | " self.samples = samples\n", 451 | " self.slugs = slugs\n", 452 | " self.loader = pil_loader\n", 453 | " self.root = 'file dataset'\n", 454 | " def __len__(self):\n", 455 | " return(len(self.samples))\n", 456 | " \n", 457 | " def __getitem__(self, index):\n", 458 | " path, target = self.samples[index]\n", 459 | " sample = self.loader(path)\n", 460 | " if self.transforms is not None:\n", 461 | " sample = self.transforms(sample)\n", 462 | " return sample, target" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": null, 468 | "metadata": { 469 | "execution": { 470 | "iopub.execute_input": "2021-05-17T23:26:15.021137Z", 471 | "iopub.status.busy": "2021-05-17T23:26:15.020930Z", 472 | "iopub.status.idle": "2021-05-17T23:26:15.024133Z", 473 | "shell.execute_reply": "2021-05-17T23:26:15.023597Z", 474 | "shell.execute_reply.started": "2021-05-17T23:26:15.021117Z" 475 | } 476 | }, 477 | "outputs": [], 478 | "source": [ 479 | "crafted = DatasetImagePaths(new_files)" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": null, 485 | "metadata": { 486 | "execution": { 487 | "iopub.execute_input": "2021-05-17T23:27:10.327359Z", 488 | "iopub.status.busy": "2021-05-17T23:27:10.327061Z", 489 | "iopub.status.idle": "2021-05-17T23:27:10.331376Z", 490 | "shell.execute_reply": "2021-05-17T23:27:10.330348Z", 491 | "shell.execute_reply.started": "2021-05-17T23:27:10.327324Z" 492 | } 493 | }, 494 | "outputs": [], 495 | "source": [ 496 | "if len(crafted) > 0:\n", 497 | " crafted[0][0].show()" 498 | ] 499 | }, 500 | { 501 | "cell_type": "markdown", 502 | "metadata": {}, 503 | "source": [ 504 | "Okay, that seems to work decently. Test with transforms, which I will just find in CLIP source code and copy over, to prevent having to import CLIP in this executor." 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": null, 510 | "metadata": { 511 | "execution": { 512 | "iopub.execute_input": "2021-05-17T23:27:10.532077Z", 513 | "iopub.status.busy": "2021-05-17T23:27:10.531910Z", 514 | "iopub.status.idle": "2021-05-17T23:27:10.535139Z", 515 | "shell.execute_reply": "2021-05-17T23:27:10.534199Z", 516 | "shell.execute_reply.started": "2021-05-17T23:27:10.532056Z" 517 | } 518 | }, 519 | "outputs": [], 520 | "source": [ 521 | "from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": null, 527 | "metadata": { 528 | "execution": { 529 | "iopub.execute_input": "2021-05-17T23:27:10.672197Z", 530 | "iopub.status.busy": "2021-05-17T23:27:10.672025Z", 531 | "iopub.status.idle": "2021-05-17T23:27:10.675311Z", 532 | "shell.execute_reply": "2021-05-17T23:27:10.674703Z", 533 | "shell.execute_reply.started": "2021-05-17T23:27:10.672178Z" 534 | } 535 | }, 536 | "outputs": [], 537 | "source": [ 538 | "def clip_transform(n_px):\n", 539 | " return Compose([\n", 540 | " Resize(n_px, interpolation=Image.BICUBIC),\n", 541 | " CenterCrop(n_px),\n", 542 | " ToTensor(),\n", 543 | " Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),\n", 544 | " ])" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": null, 550 | "metadata": { 551 | "execution": { 552 | "iopub.execute_input": "2021-05-17T23:27:10.783218Z", 553 | "iopub.status.busy": "2021-05-17T23:27:10.783066Z", 554 | "iopub.status.idle": "2021-05-17T23:27:10.785719Z", 555 | "shell.execute_reply": "2021-05-17T23:27:10.785213Z", 556 | "shell.execute_reply.started": "2021-05-17T23:27:10.783202Z" 557 | } 558 | }, 559 | "outputs": [], 560 | "source": [ 561 | "crafted_transformed = DatasetImagePaths(new_files, clip_transform(224))" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": null, 567 | "metadata": { 568 | "execution": { 569 | "iopub.execute_input": "2021-05-17T23:27:10.914257Z", 570 | "iopub.status.busy": "2021-05-17T23:27:10.914086Z", 571 | "iopub.status.idle": "2021-05-17T23:27:10.916361Z", 572 | "shell.execute_reply": "2021-05-17T23:27:10.915817Z", 573 | "shell.execute_reply.started": "2021-05-17T23:27:10.914238Z" 574 | } 575 | }, 576 | "outputs": [], 577 | "source": [ 578 | "# crafted_transformed[0][0].shape" 579 | ] 580 | }, 581 | { 582 | "cell_type": "code", 583 | "execution_count": null, 584 | "metadata": { 585 | "execution": { 586 | "iopub.execute_input": "2021-05-17T23:27:11.049878Z", 587 | "iopub.status.busy": "2021-05-17T23:27:11.049661Z", 588 | "iopub.status.idle": "2021-05-17T23:27:11.052757Z", 589 | "shell.execute_reply": "2021-05-17T23:27:11.052083Z", 590 | "shell.execute_reply.started": "2021-05-17T23:27:11.049856Z" 591 | } 592 | }, 593 | "outputs": [], 594 | "source": [ 595 | "# to_pil = torchvision.transforms.ToPILImage()\n", 596 | "# img = to_pil(crafted_transformed[0][0])\n", 597 | "# img.show()" 598 | ] 599 | }, 600 | { 601 | "cell_type": "markdown", 602 | "metadata": {}, 603 | "source": [ 604 | "Put that all together, and wrap in a DataLoader for batching. In future, need to figure out how to pick batch size and number of workers programmatically bsed on device capabilities." 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": null, 610 | "metadata": { 611 | "execution": { 612 | "iopub.execute_input": "2021-05-17T23:27:11.510935Z", 613 | "iopub.status.busy": "2021-05-17T23:27:11.510756Z", 614 | "iopub.status.idle": "2021-05-17T23:27:11.514316Z", 615 | "shell.execute_reply": "2021-05-17T23:27:11.513561Z", 616 | "shell.execute_reply.started": "2021-05-17T23:27:11.510917Z" 617 | } 618 | }, 619 | "outputs": [], 620 | "source": [ 621 | "def crafter(new_files, device, batch_size=128, num_workers=4): \n", 622 | " with torch.no_grad():\n", 623 | " imagefiles=DatasetImagePaths(new_files, clip_transform(224))\n", 624 | " img_loader=torch.utils.data.DataLoader(imagefiles, batch_size=batch_size, shuffle=False, num_workers=num_workers)\n", 625 | " return(img_loader)" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": null, 631 | "metadata": { 632 | "execution": { 633 | "iopub.execute_input": "2021-05-17T23:27:11.876682Z", 634 | "iopub.status.busy": "2021-05-17T23:27:11.876512Z", 635 | "iopub.status.idle": "2021-05-17T23:27:11.880238Z", 636 | "shell.execute_reply": "2021-05-17T23:27:11.879080Z", 637 | "shell.execute_reply.started": "2021-05-17T23:27:11.876665Z" 638 | } 639 | }, 640 | "outputs": [], 641 | "source": [ 642 | "img_loader = crafter(new_files, device)" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": null, 648 | "metadata": { 649 | "execution": { 650 | "iopub.execute_input": "2021-05-17T23:27:12.182811Z", 651 | "iopub.status.busy": "2021-05-17T23:27:12.182645Z", 652 | "iopub.status.idle": "2021-05-17T23:27:12.186305Z", 653 | "shell.execute_reply": "2021-05-17T23:27:12.185653Z", 654 | "shell.execute_reply.started": "2021-05-17T23:27:12.182794Z" 655 | } 656 | }, 657 | "outputs": [], 658 | "source": [ 659 | "img_loader" 660 | ] 661 | }, 662 | { 663 | "cell_type": "markdown", 664 | "metadata": {}, 665 | "source": [ 666 | "## Encoder\n", 667 | "\n", 668 | "CLIP wrapper takes batched tensors or text queries and returns batched 512-dim vectors. size of batch depends on GPU, but if we're putting all that on a server anyway it's a matter of accounting. Does batching go here though? Or in the crafter?\n", 669 | "\n", 670 | "cool thing here is we can use one encoder for both image and text, just check type on the way in. but first probably keep it simple and make two functions.\n", 671 | "\n", 672 | "could index previous queries as vectors in a different map and use for predictive/history -- keep a little database of previous queries already in vector format and their ranked NNs, so that the user can see history offline?" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": null, 678 | "metadata": { 679 | "execution": { 680 | "iopub.execute_input": "2021-05-17T23:27:13.353998Z", 681 | "iopub.status.busy": "2021-05-17T23:27:13.353324Z", 682 | "iopub.status.idle": "2021-05-17T23:27:14.660546Z", 683 | "shell.execute_reply": "2021-05-17T23:27:14.659900Z", 684 | "shell.execute_reply.started": "2021-05-17T23:27:13.353916Z" 685 | } 686 | }, 687 | "outputs": [], 688 | "source": [ 689 | "import clip\n", 690 | "from tqdm import tqdm\n", 691 | "model, _ = clip.load(\"ViT-B/32\", device)" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "metadata": { 698 | "execution": { 699 | "iopub.execute_input": "2021-05-17T23:27:14.661653Z", 700 | "iopub.status.busy": "2021-05-17T23:27:14.661465Z", 701 | "iopub.status.idle": "2021-05-17T23:27:14.665037Z", 702 | "shell.execute_reply": "2021-05-17T23:27:14.664655Z", 703 | "shell.execute_reply.started": "2021-05-17T23:27:14.661618Z" 704 | } 705 | }, 706 | "outputs": [], 707 | "source": [ 708 | "def image_encoder(img_loader, device):\n", 709 | " image_embeddings = torch.tensor(()).to(device)\n", 710 | " with torch.no_grad():\n", 711 | " for images, labels in tqdm(img_loader):\n", 712 | " batch_features = model.encode_image(images)\n", 713 | " image_embeddings = torch.cat((image_embeddings, batch_features)).to(device)\n", 714 | " \n", 715 | " image_embeddings = image_embeddings / image_embeddings.norm(dim=-1, keepdim=True)\n", 716 | " return(image_embeddings)" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": null, 722 | "metadata": { 723 | "execution": { 724 | "iopub.execute_input": "2021-05-17T23:27:15.166413Z", 725 | "iopub.status.busy": "2021-05-17T23:27:15.166108Z", 726 | "iopub.status.idle": "2021-05-17T23:27:15.300321Z", 727 | "shell.execute_reply": "2021-05-17T23:27:15.299842Z", 728 | "shell.execute_reply.started": "2021-05-17T23:27:15.166374Z" 729 | } 730 | }, 731 | "outputs": [], 732 | "source": [ 733 | "new_embeddings = image_encoder(img_loader, device)" 734 | ] 735 | }, 736 | { 737 | "cell_type": "code", 738 | "execution_count": null, 739 | "metadata": { 740 | "execution": { 741 | "iopub.execute_input": "2021-05-17T23:27:16.366074Z", 742 | "iopub.status.busy": "2021-05-17T23:27:16.365827Z", 743 | "iopub.status.idle": "2021-05-17T23:27:16.370513Z", 744 | "shell.execute_reply": "2021-05-17T23:27:16.369708Z", 745 | "shell.execute_reply.started": "2021-05-17T23:27:16.366042Z" 746 | } 747 | }, 748 | "outputs": [], 749 | "source": [ 750 | "def text_encoder(text, device):\n", 751 | " with torch.no_grad():\n", 752 | " text = clip.tokenize(text).to(device)\n", 753 | " text_features = model.encode_text(text)\n", 754 | " text_features = text_features / text_features.norm(dim=-1, keepdim=True)\n", 755 | " return(text_features)" 756 | ] 757 | }, 758 | { 759 | "cell_type": "markdown", 760 | "metadata": {}, 761 | "source": [ 762 | "## Indexer\n", 763 | "\n", 764 | "Annoy treemap or FAISS or other solutions. Given a dataset of tensors, returns a dictionary or database or treemap structure, something that is searchable for later. It would be nice to be able to diff this somehow, or make sure that it's up-to-date. Maybe keeping two copies is okay? One for backup and quick-searching, one for main search once it's indexed any new images. \n", 765 | "\n", 766 | "This executor `needs` both Encoder and Loader to send it the new and old vectors, respectively. So it needs to be preceded by some kind of **join_all** component that can makesure we're not missing new data before handing it over to the indexer. Hm" 767 | ] 768 | }, 769 | { 770 | "cell_type": "code", 771 | "execution_count": null, 772 | "metadata": { 773 | "execution": { 774 | "iopub.execute_input": "2021-05-17T23:27:22.830650Z", 775 | "iopub.status.busy": "2021-05-17T23:27:22.829915Z", 776 | "iopub.status.idle": "2021-05-17T23:27:22.835413Z", 777 | "shell.execute_reply": "2021-05-17T23:27:22.834944Z", 778 | "shell.execute_reply.started": "2021-05-17T23:27:22.830565Z" 779 | } 780 | }, 781 | "outputs": [], 782 | "source": [ 783 | "root = Path(args['path'])" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": null, 789 | "metadata": { 790 | "execution": { 791 | "iopub.execute_input": "2021-05-17T23:27:23.548128Z", 792 | "iopub.status.busy": "2021-05-17T23:27:23.547975Z", 793 | "iopub.status.idle": "2021-05-17T23:27:23.551213Z", 794 | "shell.execute_reply": "2021-05-17T23:27:23.550679Z", 795 | "shell.execute_reply.started": "2021-05-17T23:27:23.548112Z" 796 | } 797 | }, 798 | "outputs": [], 799 | "source": [ 800 | "def join_all(db, new_files, new_embeddings):\n", 801 | " for i, file in enumerate(new_files):\n", 802 | " path, slug = file\n", 803 | " start = len(db)\n", 804 | " index = i + start\n", 805 | " archive_db[slug] = {\n", 806 | " 'slug': slug,\n", 807 | " 'fpath': path,\n", 808 | " 'embed': new_embeddings[i],\n", 809 | " 'index': index\n", 810 | " }\n", 811 | " return(db)" 812 | ] 813 | }, 814 | { 815 | "cell_type": "code", 816 | "execution_count": null, 817 | "metadata": { 818 | "execution": { 819 | "iopub.execute_input": "2021-05-17T23:27:26.689841Z", 820 | "iopub.status.busy": "2021-05-17T23:27:26.689681Z", 821 | "iopub.status.idle": "2021-05-17T23:27:26.692632Z", 822 | "shell.execute_reply": "2021-05-17T23:27:26.691974Z", 823 | "shell.execute_reply.started": "2021-05-17T23:27:26.689825Z" 824 | } 825 | }, 826 | "outputs": [], 827 | "source": [ 828 | "db = join_all(archive_db,\n", 829 | " new_files,\n", 830 | " new_embeddings\n", 831 | " )" 832 | ] 833 | }, 834 | { 835 | "cell_type": "code", 836 | "execution_count": null, 837 | "metadata": { 838 | "execution": { 839 | "iopub.execute_input": "2021-05-17T23:27:27.321954Z", 840 | "iopub.status.busy": "2021-05-17T23:27:27.321741Z", 841 | "iopub.status.idle": "2021-05-17T23:27:27.326550Z", 842 | "shell.execute_reply": "2021-05-17T23:27:27.325029Z", 843 | "shell.execute_reply.started": "2021-05-17T23:27:27.321935Z" 844 | } 845 | }, 846 | "outputs": [], 847 | "source": [ 848 | "len(db)" 849 | ] 850 | }, 851 | { 852 | "cell_type": "markdown", 853 | "metadata": {}, 854 | "source": [ 855 | "And build treemap" 856 | ] 857 | }, 858 | { 859 | "cell_type": "code", 860 | "execution_count": null, 861 | "metadata": { 862 | "execution": { 863 | "iopub.execute_input": "2021-05-17T23:27:28.602453Z", 864 | "iopub.status.busy": "2021-05-17T23:27:28.601731Z", 865 | "iopub.status.idle": "2021-05-17T23:27:28.613957Z", 866 | "shell.execute_reply": "2021-05-17T23:27:28.611655Z", 867 | "shell.execute_reply.started": "2021-05-17T23:27:28.602368Z" 868 | } 869 | }, 870 | "outputs": [], 871 | "source": [ 872 | "from annoy import AnnoyIndex" 873 | ] 874 | }, 875 | { 876 | "cell_type": "code", 877 | "execution_count": null, 878 | "metadata": { 879 | "execution": { 880 | "iopub.execute_input": "2021-05-17T23:27:29.075028Z", 881 | "iopub.status.busy": "2021-05-17T23:27:29.074813Z", 882 | "iopub.status.idle": "2021-05-17T23:27:29.078199Z", 883 | "shell.execute_reply": "2021-05-17T23:27:29.077644Z", 884 | "shell.execute_reply.started": "2021-05-17T23:27:29.075010Z" 885 | } 886 | }, 887 | "outputs": [], 888 | "source": [ 889 | "def build_treemap(db):\n", 890 | " treemap = AnnoyIndex(512, 'angular')\n", 891 | " for v in db.values():\n", 892 | " treemap.add_item(v['index'], v['embed'])\n", 893 | "\n", 894 | " # Build the treemap, with 5 trees rn\n", 895 | " treemap.build(5)\n", 896 | "\n", 897 | " return(treemap)\n", 898 | " " 899 | ] 900 | }, 901 | { 902 | "cell_type": "code", 903 | "execution_count": null, 904 | "metadata": { 905 | "execution": { 906 | "iopub.execute_input": "2021-05-17T23:27:29.615962Z", 907 | "iopub.status.busy": "2021-05-17T23:27:29.615800Z", 908 | "iopub.status.idle": "2021-05-17T23:27:47.259986Z", 909 | "shell.execute_reply": "2021-05-17T23:27:47.259488Z", 910 | "shell.execute_reply.started": "2021-05-17T23:27:29.615943Z" 911 | } 912 | }, 913 | "outputs": [], 914 | "source": [ 915 | "t = build_treemap(db)" 916 | ] 917 | }, 918 | { 919 | "cell_type": "code", 920 | "execution_count": null, 921 | "metadata": { 922 | "execution": { 923 | "iopub.execute_input": "2021-05-17T23:27:47.261342Z", 924 | "iopub.status.busy": "2021-05-17T23:27:47.261093Z", 925 | "iopub.status.idle": "2021-05-17T23:27:47.265327Z", 926 | "shell.execute_reply": "2021-05-17T23:27:47.264924Z", 927 | "shell.execute_reply.started": "2021-05-17T23:27:47.261322Z" 928 | } 929 | }, 930 | "outputs": [], 931 | "source": [ 932 | "t.get_n_items(), t.get_n_trees()" 933 | ] 934 | }, 935 | { 936 | "cell_type": "code", 937 | "execution_count": null, 938 | "metadata": { 939 | "execution": { 940 | "iopub.execute_input": "2021-05-17T23:27:47.266399Z", 941 | "iopub.status.busy": "2021-05-17T23:27:47.266168Z", 942 | "iopub.status.idle": "2021-05-17T23:27:47.269406Z", 943 | "shell.execute_reply": "2021-05-17T23:27:47.269053Z", 944 | "shell.execute_reply.started": "2021-05-17T23:27:47.266382Z" 945 | } 946 | }, 947 | "outputs": [], 948 | "source": [ 949 | "def save_archives(root, treemap, db):\n", 950 | " dbpath = root/'memery.pt'\n", 951 | " if dbpath.exists():\n", 952 | "# dbpath.rename(root/'memery-bak.pt')\n", 953 | " dbpath.unlink()\n", 954 | " torch.save(db, dbpath)\n", 955 | " \n", 956 | " treepath = root/'memery.ann'\n", 957 | " if treepath.exists():\n", 958 | "# treepath.rename(root/'memery-bak.ann')\n", 959 | " treepath.unlink()\n", 960 | " treemap.save(str(treepath))\n", 961 | " \n", 962 | " return(str(dbpath), str(treepath))" 963 | ] 964 | }, 965 | { 966 | "cell_type": "code", 967 | "execution_count": null, 968 | "metadata": { 969 | "execution": { 970 | "iopub.execute_input": "2021-05-17T23:27:47.270195Z", 971 | "iopub.status.busy": "2021-05-17T23:27:47.270078Z", 972 | "iopub.status.idle": "2021-05-17T23:27:47.361769Z", 973 | "shell.execute_reply": "2021-05-17T23:27:47.361432Z", 974 | "shell.execute_reply.started": "2021-05-17T23:27:47.270180Z" 975 | } 976 | }, 977 | "outputs": [], 978 | "source": [ 979 | "save_archives(root, t, db)" 980 | ] 981 | }, 982 | { 983 | "cell_type": "markdown", 984 | "metadata": {}, 985 | "source": [ 986 | "## Ranker\n", 987 | "\n", 988 | "Takes a query and an index and finds the nearest neighbors or most similar scores. Ideally this is just a simple Annoy `get_nns_by_vector`, or in the simple case a similarity score across all the vectors." 989 | ] 990 | }, 991 | { 992 | "cell_type": "code", 993 | "execution_count": null, 994 | "metadata": { 995 | "execution": { 996 | "iopub.execute_input": "2021-05-17T23:27:55.387079Z", 997 | "iopub.status.busy": "2021-05-17T23:27:55.386363Z", 998 | "iopub.status.idle": "2021-05-17T23:27:55.397260Z", 999 | "shell.execute_reply": "2021-05-17T23:27:55.394454Z", 1000 | "shell.execute_reply.started": "2021-05-17T23:27:55.386997Z" 1001 | } 1002 | }, 1003 | "outputs": [], 1004 | "source": [ 1005 | "def ranker(query_vec, treemap):\n", 1006 | " nn_indexes = treemap.get_nns_by_vector(query_vec[0], treemap.get_n_items())\n", 1007 | " return(nn_indexes)" 1008 | ] 1009 | }, 1010 | { 1011 | "cell_type": "code", 1012 | "execution_count": null, 1013 | "metadata": { 1014 | "execution": { 1015 | "iopub.execute_input": "2021-05-17T23:26:15.001671Z", 1016 | "iopub.status.busy": "2021-05-17T23:26:15.001534Z", 1017 | "iopub.status.idle": "2021-05-17T23:26:15.004383Z", 1018 | "shell.execute_reply": "2021-05-17T23:26:15.003536Z", 1019 | "shell.execute_reply.started": "2021-05-17T23:26:15.001654Z" 1020 | } 1021 | }, 1022 | "outputs": [], 1023 | "source": [ 1024 | "from IPython.display import Image as IMG" 1025 | ] 1026 | }, 1027 | { 1028 | "cell_type": "code", 1029 | "execution_count": null, 1030 | "metadata": { 1031 | "execution": { 1032 | "iopub.execute_input": "2021-05-17T23:27:56.008469Z", 1033 | "iopub.status.busy": "2021-05-17T23:27:56.008293Z", 1034 | "iopub.status.idle": "2021-05-17T23:27:56.012267Z", 1035 | "shell.execute_reply": "2021-05-17T23:27:56.011056Z", 1036 | "shell.execute_reply.started": "2021-05-17T23:27:56.008450Z" 1037 | } 1038 | }, 1039 | "outputs": [], 1040 | "source": [ 1041 | "def printi(filenames, n=5):\n", 1042 | " for im in filenames[:n]:\n", 1043 | " display(IMG(filename=im[0], width=200))" 1044 | ] 1045 | }, 1046 | { 1047 | "cell_type": "code", 1048 | "execution_count": null, 1049 | "metadata": { 1050 | "execution": { 1051 | "iopub.execute_input": "2021-05-17T23:27:55.520152Z", 1052 | "iopub.status.busy": "2021-05-17T23:27:55.519884Z", 1053 | "iopub.status.idle": "2021-05-17T23:27:55.524543Z", 1054 | "shell.execute_reply": "2021-05-17T23:27:55.523632Z", 1055 | "shell.execute_reply.started": "2021-05-17T23:27:55.520126Z" 1056 | } 1057 | }, 1058 | "outputs": [], 1059 | "source": [ 1060 | "def rank_5(text):\n", 1061 | " query_vec = text_encoder(text, device)\n", 1062 | " indexes = ranker(query_vec, t)\n", 1063 | " filenames =[[v['fpath'] for k,v in db.items() if v['index'] == ind] for ind in indexes]\n", 1064 | " return(filenames)" 1065 | ] 1066 | }, 1067 | { 1068 | "cell_type": "code", 1069 | "execution_count": null, 1070 | "metadata": { 1071 | "execution": { 1072 | "iopub.execute_input": "2021-05-17T23:27:56.551897Z", 1073 | "iopub.status.busy": "2021-05-17T23:27:56.551621Z", 1074 | "iopub.status.idle": "2021-05-17T23:27:57.496956Z", 1075 | "shell.execute_reply": "2021-05-17T23:27:57.496325Z", 1076 | "shell.execute_reply.started": "2021-05-17T23:27:56.551836Z" 1077 | } 1078 | }, 1079 | "outputs": [], 1080 | "source": [ 1081 | "printi(rank_5(args['query']))" 1082 | ] 1083 | }, 1084 | { 1085 | "cell_type": "markdown", 1086 | "metadata": {}, 1087 | "source": [ 1088 | "I think we have to call that a success!" 1089 | ] 1090 | }, 1091 | { 1092 | "cell_type": "markdown", 1093 | "metadata": {}, 1094 | "source": [ 1095 | "## Gateway\n", 1096 | "\n", 1097 | "Takes a query and processes it through either Indexing Flow or Querying Flow, passing along arguments. The main entrypoint for each iteration of the index/query process.\n", 1098 | "\n", 1099 | "Querying Flow can technically process either text or image search, becuase the CLIP encoder will put them into the same embedding space. So we might as well build in a method for either, and make it available to the user, since it's impressive and useful and relatively easy to build.\n", 1100 | "\n", 1101 | "Eventually the Gateway process probably needs to be quite complicated, for serving all the different users and for delivering REST APIs to different clients. For now we will run this locally, in a notebook. Then build out a GUI from there using `mediapy` or `widgets`. That should reveal the basic necessities of the UI, and then we can separate out the GUI client from the server." 1102 | ] 1103 | }, 1104 | { 1105 | "cell_type": "code", 1106 | "execution_count": null, 1107 | "metadata": { 1108 | "execution": { 1109 | "iopub.execute_input": "2021-05-17T23:28:07.161111Z", 1110 | "iopub.status.busy": "2021-05-17T23:28:07.160442Z", 1111 | "iopub.status.idle": "2021-05-17T23:28:07.173215Z", 1112 | "shell.execute_reply": "2021-05-17T23:28:07.171845Z", 1113 | "shell.execute_reply.started": "2021-05-17T23:28:07.161028Z" 1114 | } 1115 | }, 1116 | "outputs": [], 1117 | "source": [ 1118 | "def indexFlow(path):\n", 1119 | " root = Path(path)\n", 1120 | " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 1121 | " \n", 1122 | " filepaths = get_image_files(root)\n", 1123 | " archive_db, new_files = files_archive_loader(filepaths, root, device)\n", 1124 | " print(f\"Loaded {len(archive_db)} encodings\")\n", 1125 | " print(f\"Encoding {len(new_files)} new images\")\n", 1126 | " crafted_files = crafter(new_files, device)\n", 1127 | " new_embeddings = image_encoder(crafted_files, device)\n", 1128 | " \n", 1129 | " db = join_all(archive_db, new_files, new_embeddings)\n", 1130 | " print(\"Building treemap\")\n", 1131 | " t = build_treemap(db)\n", 1132 | " \n", 1133 | " print(f\"Saving {len(db)}images\")\n", 1134 | " save_paths = save_archives(root, t, db)\n", 1135 | " print(\"Done\")\n", 1136 | " return(save_paths)" 1137 | ] 1138 | }, 1139 | { 1140 | "cell_type": "code", 1141 | "execution_count": null, 1142 | "metadata": { 1143 | "execution": { 1144 | "iopub.execute_input": "2021-05-17T23:28:10.313351Z", 1145 | "iopub.status.busy": "2021-05-17T23:28:10.313166Z", 1146 | "iopub.status.idle": "2021-05-17T23:28:28.543108Z", 1147 | "shell.execute_reply": "2021-05-17T23:28:28.542515Z", 1148 | "shell.execute_reply.started": "2021-05-17T23:28:10.313334Z" 1149 | } 1150 | }, 1151 | "outputs": [], 1152 | "source": [ 1153 | "save_paths = indexFlow(args['path'])" 1154 | ] 1155 | }, 1156 | { 1157 | "cell_type": "code", 1158 | "execution_count": null, 1159 | "metadata": { 1160 | "execution": { 1161 | "iopub.execute_input": "2021-05-17T23:28:28.544063Z", 1162 | "iopub.status.busy": "2021-05-17T23:28:28.543945Z", 1163 | "iopub.status.idle": "2021-05-17T23:28:28.547992Z", 1164 | "shell.execute_reply": "2021-05-17T23:28:28.547123Z", 1165 | "shell.execute_reply.started": "2021-05-17T23:28:28.544047Z" 1166 | } 1167 | }, 1168 | "outputs": [], 1169 | "source": [ 1170 | "save_paths" 1171 | ] 1172 | }, 1173 | { 1174 | "cell_type": "markdown", 1175 | "metadata": {}, 1176 | "source": [ 1177 | "To search:" 1178 | ] 1179 | }, 1180 | { 1181 | "cell_type": "code", 1182 | "execution_count": null, 1183 | "metadata": { 1184 | "execution": { 1185 | "iopub.execute_input": "2021-05-17T23:08:36.054349Z", 1186 | "iopub.status.busy": "2021-05-17T23:08:36.054132Z", 1187 | "iopub.status.idle": "2021-05-17T23:08:36.059544Z", 1188 | "shell.execute_reply": "2021-05-17T23:08:36.058990Z", 1189 | "shell.execute_reply.started": "2021-05-17T23:08:36.054324Z" 1190 | } 1191 | }, 1192 | "outputs": [], 1193 | "source": [ 1194 | "def queryFlow(path, query): \n", 1195 | " root = Path(path)\n", 1196 | " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", 1197 | " \n", 1198 | " dbpath = root/'memery.pt'\n", 1199 | " db = db_loader(dbpath)\n", 1200 | " treepath = root/'memery.ann'\n", 1201 | " treemap = treemap_loader(treepath)\n", 1202 | " \n", 1203 | " if treemap == None or db == {}:\n", 1204 | " dbpath, treepath = indexFlow(root)\n", 1205 | " treemap = treemap_loader(treepath)\n", 1206 | " db = file\n", 1207 | " \n", 1208 | " print(f\"Searching {len(db)} images\")\n", 1209 | " query_vec = text_encoder(query, device)\n", 1210 | " indexes = ranker(query_vec, treemap)\n", 1211 | " ranked_files = [[v['fpath'] for k,v in db.items() if v['index'] == ind] for ind in indexes]\n", 1212 | " return(ranked_files)\n", 1213 | "\n", 1214 | " " 1215 | ] 1216 | }, 1217 | { 1218 | "cell_type": "code", 1219 | "execution_count": null, 1220 | "metadata": { 1221 | "execution": { 1222 | "iopub.execute_input": "2021-05-17T23:12:15.974818Z", 1223 | "iopub.status.busy": "2021-05-17T23:12:15.974655Z", 1224 | "iopub.status.idle": "2021-05-17T23:12:16.791693Z", 1225 | "shell.execute_reply": "2021-05-17T23:12:16.791335Z", 1226 | "shell.execute_reply.started": "2021-05-17T23:12:15.974800Z" 1227 | } 1228 | }, 1229 | "outputs": [], 1230 | "source": [ 1231 | "ranked = queryFlow(args['path'], 'dog')" 1232 | ] 1233 | }, 1234 | { 1235 | "cell_type": "code", 1236 | "execution_count": null, 1237 | "metadata": { 1238 | "execution": { 1239 | "iopub.execute_input": "2021-05-17T23:12:16.792617Z", 1240 | "iopub.status.busy": "2021-05-17T23:12:16.792501Z", 1241 | "iopub.status.idle": "2021-05-17T23:12:16.808254Z", 1242 | "shell.execute_reply": "2021-05-17T23:12:16.807905Z", 1243 | "shell.execute_reply.started": "2021-05-17T23:12:16.792601Z" 1244 | } 1245 | }, 1246 | "outputs": [], 1247 | "source": [ 1248 | "printi(ranked)" 1249 | ] 1250 | }, 1251 | { 1252 | "cell_type": "markdown", 1253 | "metadata": {}, 1254 | "source": [ 1255 | "## Interactive process\n", 1256 | "Currently the objective is to take the following inputs:\n", 1257 | "- a location with images\n", 1258 | "- a text or image query,\n", 1259 | "\n", 1260 | "and return the following outputs:\n", 1261 | "- a list of image files within that location ranked by similarity to that query,\n", 1262 | "\n", 1263 | "with a minimum of duplicated effort, and a general ease-of-use for both the programmer and the casual API user." 1264 | ] 1265 | }, 1266 | { 1267 | "cell_type": "code", 1268 | "execution_count": null, 1269 | "metadata": {}, 1270 | "outputs": [], 1271 | "source": [] 1272 | }, 1273 | { 1274 | "cell_type": "markdown", 1275 | "metadata": {}, 1276 | "source": [ 1277 | "## TODO:\n", 1278 | "\n", 1279 | "- Cleanup repo\n", 1280 | "- Rough interactive GUI\n", 1281 | "\n", 1282 | "- Optimize the image loader and number of trees based on memory and db size\n", 1283 | "- Type annotations\n", 1284 | "\n", 1285 | "## DONE:\n", 1286 | "- _Code for joining archived data to new data_\n", 1287 | "- _Code for saving indexes to archive_\n", 1288 | "- _Flows_\n" 1289 | ] 1290 | }, 1291 | { 1292 | "cell_type": "code", 1293 | "execution_count": null, 1294 | "metadata": {}, 1295 | "outputs": [], 1296 | "source": [] 1297 | } 1298 | ], 1299 | "metadata": { 1300 | "kernelspec": { 1301 | "display_name": "Python 3", 1302 | "language": "python", 1303 | "name": "python3" 1304 | }, 1305 | "language_info": { 1306 | "codemirror_mode": { 1307 | "name": "ipython", 1308 | "version": 3 1309 | }, 1310 | "file_extension": ".py", 1311 | "mimetype": "text/x-python", 1312 | "name": "python", 1313 | "nbconvert_exporter": "python", 1314 | "pygments_lexer": "ipython3", 1315 | "version": "3.7.7" 1316 | } 1317 | }, 1318 | "nbformat": 4, 1319 | "nbformat_minor": 4 1320 | } 1321 | --------------------------------------------------------------------------------