├── tests
    ├── __init__.py
    └── test_ripple.py
├── katara.png
├── .gitignore
├── requirements.txt
├── ripple
    ├── __init__.py
    ├── text_search.py
    ├── image_tagger.py
    ├── utils.py
    ├── image_search.py
    └── image_embedder.py
├── setup.py
├── .github
    └── workflows
    │   └── publish.yml
├── pyproject.toml
├── ripple_cli.py
├── ripple_app.py
├── ripple_art.py
├── ripple_image.py
├── README.md
└── LICENSE


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_ripple.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/katara.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kelechi-c/ripple_net/HEAD/katara.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ignore
2 | ripple.ipynb
3 | poetry.lock
4 | ripple/__pycache__
5 | .ripple_env/
6 | .ruff_cache
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | sentence-transformers
 2 | datasets
 3 | matplotlib
 4 | faiss-cpu
 5 | faiss-gpu
 6 | streamlit
 7 | transformers
 8 | tqdm
 9 | ripple_net
10 | 


--------------------------------------------------------------------------------
/ripple/__init__.py:
--------------------------------------------------------------------------------
1 | from .image_search import ImageSearch
2 | from .image_embedder import ImageEmbedder
3 | from .text_search import TextSearch
4 | from .utils import image_loader, image_grid, get_all_images
5 | from .image_tagger import ImageTagger
6 | 


--------------------------------------------------------------------------------
/ripple/text_search.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from .utils import image_grid
 3 | from datasets import Dataset
 4 | from sentence_transformers import SentenceTransformer
 5 | 
 6 | 
 7 | class TextSearch:
 8 |     def __init__(self, dataset: Dataset, model: SentenceTransformer):
 9 |         self.embed_model = model
10 |         self.image_dataset = dataset
11 |         self.k_images = None
12 | 
13 |     def get_similar_images(self, query: str, k_images=5):
14 |         stime = time.time()
15 |         self.k_images = k_images
16 | 
17 |         prompt = self.embed_model.encode(query)
18 |         similarity_score, image_embeddings = self.image_dataset.get_nearest_examples(
19 |             "embeddings", prompt, k=k_images
20 |         )
21 |         latency = time.time() - stime
22 |         print("---")
23 |         print(
24 |             f"Retrieved {len(image_embeddings['image'])} and similarity scores in {latency:.4f}"
25 |         )
26 |         return similarity_score, image_embeddings
27 | 
28 |     def show_grid(self, images):
29 |         image_grid(images)
30 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as fh:
 4 |     description = fh.read()
 5 | 
 6 | 
 7 | setup(
 8 |     name="ripple_net",
 9 |     version="0.1.0",
10 |     author="Chibuzo Kelechi",
11 |     py_modules=["ripple"],
12 |     author_email="kelechichibuzo@gmail.com",
13 |     description="Text-image search and image tagging library",
14 |     packages=find_packages(),
15 |     long_description=description,
16 |     long_description_content_type="text/markdown",
17 |     url="https://github.com/kelechi-c/ripple_net",
18 |     keywords=["pypi", "image search", "datasets", "CLIP", "image tagging"],
19 |     license="Apache 2.0",
20 |     classifiers=[
21 |         "Programming Language :: Python :: 3",
22 |         "Operating System :: OS Independent",
23 |         "License :: OSI Approved :: Apache Software License",
24 |     ],
25 |     install_requires=[
26 |         "sentence-transformers",
27 |         "faiss-gpu",
28 |         "faiss-cpu",
29 |         "datasets",
30 |         "matplotlib",
31 |         "numpy",
32 |         "transformers",
33 |     ],
34 |     python_requires=">=3.6",
35 | )
36 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | jobs:
16 |   deploy:
17 | 
18 |     runs-on: ubuntu-latest
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v2
22 |     - name: Set up Python
23 |       uses: actions/setup-python@v2
24 |       with:
25 |         python-version: '3.x'
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         pip install build
30 |     - name: Build package
31 |       run: python -m build
32 |     - name: Publish package
33 |       uses: pypa/gh-action-pypi-publish@release/v1
34 |       with:
35 |         user: __token__
36 |         password: ${{ secrets.PYPI_API_TOKEN }}
37 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "ripple_net"
 3 | version = "0.1.5"
 4 | description = "Text-image search and image tagging library"
 5 | authors = [
 6 |     { name = "Chibuzo Kelechi", email = "kelechichibuzo7@gmail.com" }
 7 | ]
 8 | readme = "README.md"
 9 | requires-python = ">= 3.9"
10 | license = { file = "LICENSE" }
11 | keywords = [
12 | 	'machine learning',
13 | 	'image search',
14 | 	'multimodal AI',
15 | 	'image datasets',
16 | 	'vector embeddings'
17 | ]
18 | 
19 | classifiers=[
20 |     'Development Status :: 4 - Beta',
21 |     'Intended Audience :: Developers',
22 |     'Topic :: Scientific/Engineering :: Artificial Intelligence',
23 |     'Programming Language :: Python :: 3',
24 |     'License :: OSI Approved :: Apache Software License'
25 | ]
26 | 
27 | dependencies = [
28 |    'sentence-transformers', 'datasets', 
29 |    'faiss-cpu', 'faiss-gpu', 'matplotlib',
30 |    'transformers', 'numpy'
31 | ]
32 | 
33 | [project.urls]
34 | Homepage = "https://pypi.org/project/ripple_net/"
35 | Repository = "https://github.com/kelechi-c/ripple_net"
36 | 
37 | [project.optional-dependencies]
38 | examples = []
39 | test = [
40 |     "pytest"
41 | ]
42 | 
43 | [tool.pytest.ini_options]
44 | pythonpath = [
45 |   "."
46 | ]
47 | 
48 | [build-system]
49 | requires = ["hatchling"]
50 | build-backend = "hatchling.build"
51 | 
52 | [tool.rye]
53 | managed = true
54 | dev-dependencies = []
55 | 
56 | [tool.hatch.metadata]
57 | allow-direct-references = true
58 | 
59 | [tool.hatch.build.targets.wheel]
60 | packages = ["ripple"]
61 | 


--------------------------------------------------------------------------------
/ripple/image_tagger.py:
--------------------------------------------------------------------------------
 1 | from sentence_transformers import SentenceTransformer, util
 2 | import os
 3 | import shutil
 4 | from .utils import image_loader, get_all_images, latency
 5 | from tqdm.auto import tqdm
 6 | 
 7 | 
 8 | class ImageTagger:
 9 |     def __init__(self, folder, model_name="clip-ViT-B-32"):
10 |         self.clip_model = SentenceTransformer(model_name)
11 |         self.file_list = get_all_images(folder)
12 |         print("Init ripple tagger")
13 | 
14 |     def auto_tagger(self, captions):
15 |         for cap in captions:
16 |             os.makedirs(cap, exist_ok=True)
17 | 
18 |         caption_emb = self.clip_model.encode(captions)
19 | 
20 |         for k, image in enumerate(tqdm(self.file_list)):
21 |             self.rename_image(image, captions, caption_emb, k)
22 | 
23 |     @latency
24 |     def rename_image(self, image_path, captions, caption_emb, k):
25 |         try:
26 |             img_emb = self.clip_model.encode(image_loader(image_path))
27 |             similarities = util.cos_sim(img_emb, caption_emb)
28 |             tag = captions[similarities.argmax()]
29 | 
30 |             file_ext = os.path.splitext(image_path)[1]
31 |             new_name = f"{tag}_{k}{file_ext}"
32 |             new_path = os.path.join(tag, new_name)
33 | 
34 |             shutil.move(image_path, new_path)
35 |             print(f"Moved {image_path} to {new_path}")
36 |         except Exception as e:
37 |             print(f"Error processing {image_path}: {str(e)}")
38 | 
39 | 
40 | # Define your captions and use this function on your image files
41 | 


--------------------------------------------------------------------------------
/ripple_cli.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import argparse
 4 | from tqdm.auto import tqdm
 5 | import ripple
 6 | 
 7 | embedder = None
 8 | 
 9 | 
10 | def main():
11 |     # args
12 |     parser = argparse.ArgumentParser(
13 |         description="ripple: cli script for text-image, amd image similarity search :) "
14 |     )
15 |     parser.add_argument("-f", "--folder", help="the folder to load images from")
16 |     parser.add_argument(
17 |         "-a", "--all", action="store_true", help="use all image files on the device"
18 |     )
19 |     args = parser.parse_args()
20 | 
21 |     print(f"Loading images from folder{args.folder}")
22 |     if args.all:  # loads all the images on the device
23 |         print(f"getting all image files")
24 |         file_list = ripple.get_all_images("/home/")
25 |         embedder = ripple.ImageEmbedder(file_list, retrieval_type="text-image")
26 | 
27 |     else:
28 |         embedder = ripple.ImageEmbedder(args.folder, retrieval_type="text-image")
29 | 
30 |     print(f"creating embeddings using {embedder.embed_model}...")
31 |     embedded_data = embedder.create_embeddings(device="cpu")
32 | 
33 |     text_search = ripple.TextSearch(embedded_data, embedder.embed_model)
34 |     scores, ret_images = text_search.get_similar_images(
35 |         "girl wearing blue clothes", k_images=3
36 |     )
37 | 
38 |     for score, image in tqdm(zip(scores, ret_images)):
39 |         print(image.filename)
40 |         print(score)
41 |         print("----")
42 | 
43 |     text_search.show_grid(ret_images)
44 |     display_image = ripple.image_loader(ret_images["image"][0])
45 |     display_image.show()
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     main()
50 | 


--------------------------------------------------------------------------------
/ripple/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from functools import wraps
 3 | from matplotlib import pyplot as plt
 4 | from PIL import Image as pillow
 5 | import time
 6 | import os
 7 | import glob
 8 | 
 9 | 
10 | def latency(func):
11 |     @wraps(func)
12 |     def wrapper(*args, **kwargs):
13 |         start_time = time.time()
14 |         result = func(*args, **kwargs)
15 |         end_time = time.time()
16 |         print(f"latency => {func.__name__}: {end_time - start_time:.4f} seconds")
17 |         return result
18 | 
19 |     return wrapper
20 | 
21 | 
22 | def image_loader(img):
23 |     if isinstance(img, np.ndarray):
24 |         return pillow.fromarray(img)
25 | 
26 |     elif isinstance(img, str):
27 |         return pillow.open(img)
28 | 
29 |     elif isinstance(img, pillow):
30 |         return img
31 | 
32 | 
33 | def image_grid(images):
34 |     # check if image  count matches grid arrangement
35 |     try:
36 |         image_len = len(images["image"])
37 |         assert image_len % 2 == 0, "Choose an even number to enable grid-show"
38 | 
39 |         f, ax = plt.subplots(2, 2)
40 |         for index in range(image_len):
41 |             k, v = index // 2, index % 2
42 |             # ax[k, v].set_title(images["image"][index].filename)
43 |             ax[k, v].imshow(images["image"][index])
44 |             ax[k, v].axis("off")
45 | 
46 |         plt.show()
47 | 
48 |     except Exception as e:
49 |         print(f"Error in grid display ==> {e}")
50 | 
51 | 
52 | def get_all_images(root_dir, extensions=("*.jpg", "*.jpeg", "*.png", "*.gif", "*.bmp")):
53 |     image_files = []
54 |     for ext in extensions:
55 |         for directory, _, _ in os.walk(root_dir):
56 |             image_files.extend(glob.glob(os.path.join(directory, ext)))
57 |     print(f"found {len(image_files)} images in {root_dir}")
58 |     return image_files
59 | 


--------------------------------------------------------------------------------
/ripple/image_search.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | from datasets import Dataset
 3 | from PIL import Image as pillow
 4 | from .utils import image_grid, image_loader
 5 | from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
 6 | import time
 7 | 
 8 | 
 9 | class ImageSearch:
10 |     def __init__(
11 |         self, embedded_dataset: Dataset, device: Literal["cuda", "cpu"]
12 |     ) -> None:
13 |         # initalize class and CLIP models
14 |         self.model_id = "openai/clip-vit-large-patch14"
15 |         self.device_id = device
16 |         self.clip_model = AutoModelForZeroShotImageClassification.from_pretrained(
17 |             self.model_id, device_map=self.device_id
18 |         )
19 |         self.clip_processor = AutoProcessor.from_pretrained(self.model_id)
20 |         assert (
21 |             "embeddings" in embedded_dataset.column_names
22 |         ), "embeddings column missing in the input dataset. Ensure the dataset was embedded/indexed"
23 |         self.embedded_data = embedded_dataset
24 | 
25 |     def image_search(self, input_img, k_count: int):
26 |         if not isinstance(input_img, pillow):  # check if image type is PIL
27 |             print("Image not in PIL format, converting..")
28 |             input_img = image_loader(input_img)  # loads image in PIL format
29 | 
30 |         stime = time.time()
31 |         pixel_values = self.clip_processor(images=input_img, return_tensors="pt")[
32 |             "pixel_values"
33 |         ]
34 |         pixel_values = pixel_values.to(self.device_id)  # move tensors to device
35 |         img_embed = self.clip_model.get_image_features(pixel_values)[0]
36 |         img_embed = img_embed.detach().cpu().numpy()
37 | 
38 |         scores, retrieved_images = self.embedded_data.get_nearest_examples(
39 |             "embeddings", img_embed, k=k_count
40 |         )
41 |         exec_time = stime - time.time()
42 |         print(f"Retrieved {len(retrieved_images)} in {exec_time} seconds")
43 |         return scores, retrieved_images
44 | 
45 |     def show_grid(self, images):
46 |         image_grid(images)
47 | 


--------------------------------------------------------------------------------
/ripple_app.py:
--------------------------------------------------------------------------------
 1 | import ripple
 2 | import streamlit as stl
 3 | from tqdm.auto import tqdm
 4 | 
 5 | # streamlit app
 6 | stl.set_page_config(
 7 |     page_title="Ripple",
 8 | )
 9 | 
10 | stl.title("ripple search")
11 | stl.write(
12 |     "An app that uses text input to search for described images, using embeddings of selected image datasets. Uses contrastive learning models(CLIP) and the sentence transformers library"
13 | )
14 | stl.link_button(
15 |     label="link to github and full library code",
16 |     url="https://github.com/kelechi-c/ripple_net",
17 | )
18 | 
19 | dataset = stl.selectbox(
20 |     "choose huggingface dataset(bgger datasets take more time to embed..)",
21 |     options=[
22 |         "huggan/wikiart(1k)",
23 |         "huggan/wikiart(11k)",
24 |         "zh-plus/tiny-imagenet(110k)",
25 |         "lambdalabs/naruto-blip-captions(1k)",
26 |         "detection-datasets/fashionpedia(45k)",
27 |     ],
28 | )
29 | # initalized global variables
30 | 
31 | embedded_data = None
32 | embedder = None
33 | text_search = None
34 | 
35 | ret_images = []
36 | scores = []
37 | 
38 | 
39 | if dataset and stl.button("embed image dataset"):
40 |     with stl.spinner("Initializing and creating image embeddings from dataset"):
41 |         embedder = ripple.ImageEmbedder(
42 |             dataset, retrieval_type="text-image", dataset_type="huggingface"
43 |         )
44 | 
45 |         embedded_data = embedder.create_embeddings(device="cpu")
46 |         stl.success("Sucessfully embedded and dcreated image index")
47 | 
48 | if embedded_data is not None:
49 |     text_search = ripple.TextSearch(embedded_data, embedder.embed_model)
50 |     stl.success("Initialized text search class")
51 | 
52 | search_term = stl.text_input("Text description/search for image")
53 | 
54 | if search_term:
55 |     with stl.spinner("retrieving images with description.."):
56 |         scores, ret_images = text_search.get_similar_images(
57 |             search_term, k_images=4)
58 |         stl.success(f"sucessfully retrieved {len(ret_images)}")
59 | 
60 | for count, score, image in tqdm(zip(range(len(ret_images)), scores, ret_images)):
61 |     stl.image(image["image"][count])
62 |     stl.write(score)
63 | 


--------------------------------------------------------------------------------
/ripple_art.py:
--------------------------------------------------------------------------------
 1 | from sentence_transformers import SentenceTransformer
 2 | from datasets import load_dataset
 3 | from matplotlib import pyplot
 4 | import time
 5 | 
 6 | # load dataset
 7 | image_data = load_dataset(
 8 |     "keremberke/painting-style-classification", "full", split="train"
 9 | )
10 | 
11 | # define clip model for multimodal/contrastive image learning...and embeddings
12 | embed_model = SentenceTransformer("clip-ViT-B-32")
13 | 
14 | 
15 | # define helper functions
16 | def map_filenames(sample):
17 |     sample["image_file_path"] = sample["image_file_path"].split("/")[-1]
18 |     return sample["image_file_path"]
19 | 
20 | 
21 | def get_similar_images(query: str, dataset, k_images):
22 |     stime = time.time()
23 |     prompt = embed_model.encode(query)
24 |     similarity_score, images_embeddings = dataset.get_nearest_examples(
25 |         "embeddings", prompt, k=k_images
26 |     )
27 |     latency = time.time() - stime
28 |     print(f"Retrieved {k_images} and similarity scores in {latency}")
29 |     return similarity_score, images_embeddings
30 | 
31 | 
32 | def image_grid(image_list):
33 |     pyplot.figure(figsize=(20, 20))
34 |     columns = 2
35 |     for k in range(len(image_list)):
36 |         image = image_list["image"][0]
37 |         pyplot.subplot(len(image_list) / columns + 1, columns, k + 1)
38 |         pyplot.imshow(image)
39 | 
40 | 
41 | image_data = image_data.map(map_filenames)
42 | 
43 | image_data_embed = image_data.map(
44 |     lambda example: {"embeddings": embed_model.encode(
45 |         example["image"], device="cuda")},
46 |     batched=True,
47 |     batch_size=64,
48 | )
49 | 
50 | # print features and display sample images
51 | # print(image_data.features['labels'])
52 | # image_data[0]['image']
53 | 
54 | image_data_embed.add_faiss_index(column="embeddings")
55 | 
56 | # text prompt or search term
57 | prompt = embed_model.encode("men sitting together")
58 | 
59 | simscore, ret_images = image_data_embed.get_nearest_examples(
60 |     "embeddings", prompt, k=5
61 | )  # get similar images and scores
62 | 
63 | # ret_images[0]['image']
64 | # print(score[0])
65 | 
66 | scores, similar_images = get_similar_images(
67 |     "blue flowing river", image_data_embed, k_images=10
68 | )
69 | 
70 | image_grid(similar_images)
71 | 


--------------------------------------------------------------------------------
/ripple_image.py:
--------------------------------------------------------------------------------
 1 | from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
 2 | from datasets import load_dataset
 3 | import numpy as np
 4 | from PIL import Image as pillow
 5 | from matplotlib import pyplot as plt
 6 | 
 7 | 
 8 | # configs
 9 | image_folder = ""
10 | dataset_id = "huggan/few-shot-art-painting"
11 | model_id = "openai/clip-vit-large-patch14"
12 | batch_size = 32
13 | device_id = "cuda"
14 | 
15 | sample_data = load_dataset(dataset_id, split="train")
16 | # sample_data
17 | 
18 | # define models
19 | clip_processor = AutoProcessor.from_pretrained(model_id)
20 | 
21 | clip_model = AutoModelForZeroShotImageClassification(model_id, device_map="cuda")
22 | 
23 | 
24 | def image_loader(img):
25 |     if isinstance(img, np.ndarray):
26 |         return pillow.fromarray(img)
27 | 
28 |     elif isinstance(img, str):
29 |         return pillow.open(img)
30 | 
31 |     elif isinstance(img, pillow):
32 |         return img
33 | 
34 | 
35 | def grid(images):
36 |     # check if image  count matches grid arrangement
37 |     assert len(images) % 2 == 0, "Choose an even number to enable grid-show"
38 | 
39 |     _, ax = plt.subplots(2, 2)
40 |     for index in range(len(images)):
41 |         k, v = index // 2, index % 2
42 |         # ax[k, v].set_title(images["image"][index].filename)
43 |         ax[k, v].imshow(images["image"][index])
44 |         ax[k, v].axis("off")
45 |     plt.show()
46 | 
47 | 
48 | def embed_image_batch(batch):
49 |     pixels = clip_processor(images=batch["image"], return_tensors="pt")["pixel_values"]
50 |     pixels = pixels.to(device_id)
51 |     image_embedding = clip_model.get_image_features(pixels)
52 |     batch["embeddings"] = image_embedding
53 |     return batch
54 | 
55 | 
56 | embedded_data = sample_data.map(embed_image_batch, batched=True, batch_size=batch_size)
57 | embedded_data.add_faiss_index("embeddings")
58 | 
59 | 
60 | def image_search(input_img, k_count: int):
61 |     if not isinstance(input_img, pillow):  # check if image type is PIL
62 |         input_img = image_loader(input_img)  # loads pil image
63 | 
64 |     pixel_values = clip_processor(images=input_img, return_tensors="pt")["pixel_values"]
65 |     pixel_values = pixel_values.to(device_id)
66 |     img_embed = clip_model.get_image_features(pixel_values)[0]
67 |     img_embed = img_embed.detach().cpu().numpy()
68 | 
69 |     scores, retrieved_images = embedded_data.get_nearest_examples(
70 |         "embeddings", img_embed, k=k_count
71 |     )
72 | 
73 |     return retrieved_images
74 | 
75 | 
76 | image = "katara.png"
77 | similar_images = image_search(image, 6)  # search for similar images
78 | 
79 | grid(similar_images)  # display grid of similar images
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## ripple_net *(wip)*
 2 | 
 3 | A library for text/image based search/retrieval for image datasets and files. Uses multimodal AI techniques/models like vector embeddings and CLIP.
 4 | 
 5 | ## Install
 6 | 
 7 | ```bash
 8 | $ pip install ripple_net
 9 | ```
10 | 
11 | ## Usage
12 | 
13 | - For text description-based search
14 | 
15 | ```python
16 | from ripple import ImageEmbedder, TextSearch # import classes
17 | 
18 | # load from a huggingface image dataset or load from a local image directory
19 | embedder = ImageEmbedder('huggan/wikiart', retrieval_type='text-image', dataset_type='huggingface') 
20 | 
21 | # could also use 'cpu' if CUDA-enabled GPU isn't available
22 | embedded_images = embedder.create_embeddings(device="cuda", batch_size=32)
23 | 
24 | # initialize text - image search class
25 | text_search = TextSearch(embedded_images, embedder.embed_model)
26 | 
27 | # specify text/search query for image, and number of results to return
28 | scores, images = text_search.get_similar_images(query='painting of a river', k_images=10) 
29 | 
30 | images['image'][0].show()
31 | ```
32 | 
33 | - For image-based retrieval(image-image search)
34 | 
35 | ```python
36 | from ripple import ImageEmbedder, ImageSearch, image_loader
37 | 
38 |  # load dataset and initialize embedding class
39 | embedder = ImageEmbedder('lambdalabs/naruto-blip-captions', retrieval_type='image-image', dataset_type='huggingface',  device='cuda',
40 | )
41 | 
42 | # generate embeddings
43 | embedded_images = embedder.create_embeddings(device="cuda", batch_size=32)
44 | 
45 | # init image search class
46 | image_search = ImageSearch(embedded_images, embedder.embed_model)
47 | 
48 | # retrieve similar images with image input
49 | input_image = image_loader('katara.png') # use library function to load image in PIL format
50 | 
51 | scores, images = image_search.image_search(input_img=input_image, k_images=5) # specify input image, and number of results to return
52 | 
53 | # dislay one of retrieved images
54 | images['image'][0].show()
55 | # or using notebooks => images['image'][0]
56 | ```
57 | 
58 | - For auto image tagging/renaming
59 | 
60 | ```python
61 | from ripple import ImageTagger
62 | 
63 | # initialize the class with folder of choice
64 | folder = '/kaggle/working/images/drawings'
65 | 
66 | tagger = ImageTagger(folder)
67 | 
68 | # captions to label with
69 | captions = ['humans', 'animals', 'plants','land']
70 | 
71 | tagger.auto_tagger(captions) # rename all images and move to folders
72 | ```
73 | 
74 | ## Todo
75 | 
76 | - [ ] direct CLI usage
77 | 
78 | ## Acknowledgement
79 | 
80 | - <a href="https://sbert.net/">Sentence transformers </a> library by UKPLabs and Huggingface transformers.
81 | - <a href="https://huggingface.co/blog/not-lain/image-retriever">Image search engine</a>: article by <a href="https://github.com/not-lain">not-lain </a>
82 | - <a href="https://openai.com/index/clip/">CLIP (Contrastive Language–Image Pre-training)</a> research by OpenAI.
83 | 


--------------------------------------------------------------------------------
/ripple/image_embedder.py:
--------------------------------------------------------------------------------
  1 | from datasets import Dataset, load_dataset, Image
  2 | from sentence_transformers import SentenceTransformer
  3 | from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
  4 | from .utils import latency, get_all_images
  5 | from typing import Literal
  6 | import os
  7 | 
  8 | 
  9 | class ImageEmbedder:
 10 |     def __init__(
 11 |         self,
 12 |         image_data: str,
 13 |         retrieval_type: Literal["text-image", "image-image"],
 14 |         dataset_type: Literal["huggingface", "image folder"],
 15 |         device: Literal["cuda", "cpu"],
 16 |     ):
 17 |         assert retrieval_type in [
 18 |             "text-image",
 19 |             "image-image",
 20 |         ], "retrieval/search type must be either 'image-image' or 'text-image'"
 21 | 
 22 |         # initial variables
 23 |         # self.image_dataset = None
 24 |         self.dataset_type = dataset_type
 25 |         self.data_path = image_data
 26 |         self.retrieval_type = retrieval_type
 27 |         self.embed_model = None
 28 |         self.processor_model = None
 29 |         self.device = device
 30 | 
 31 |         # load dataset for different dataset types
 32 |         print(f"Loading huggingface dataset from {image_data}")
 33 |         if self.dataset_type == "huggingface":
 34 |             self.image_dataset = load_dataset(
 35 |                 image_data, split="train"
 36 |             )  # load from huggingface dataset instead
 37 | 
 38 |         elif self.dataset_type == "image folder":
 39 |             if os.path.exists(self.data_path):
 40 |                 image_list = get_all_images(image_data)
 41 |                 self.image_dataset = Dataset.from_dict(
 42 |                     {"image": image_list}
 43 |                 ).cast_column("image", Image())
 44 | 
 45 |         print(f"image dataset created from {image_data}")
 46 |         print("----")
 47 | 
 48 |         # define clip model for multimodal/contrastive image learning...and embeddings
 49 |         print("Initializing CLIP model")
 50 |         print("....")
 51 | 
 52 |         # load model based on retrieval type
 53 |         if self.retrieval_type == "text-image":
 54 |             self.embed_model = SentenceTransformer("clip-ViT-B-32")
 55 | 
 56 |         elif self.retrieval_type == "image-image":
 57 |             self.processor_model = AutoProcessor.from_pretrained(
 58 |                 "openai/clip-vit-large-patch14"
 59 |             )
 60 |             self.embed_model = AutoModelForZeroShotImageClassification.from_pretrained(
 61 |                 "openai/clip-vit-large-patch14", device_map=self.device
 62 |             )
 63 | 
 64 |         print(f"clip/embedding model -[{self.embed_model}] initialized")
 65 | 
 66 |     @latency
 67 |     def create_embeddings(self, device: Literal["cuda", "cpu"], batch_size: int = 32):
 68 |         assert device in [
 69 |             "cuda",
 70 |             "cpu",
 71 |         ], "Wrong id, device must must be either 'cuda' or 'cpu'"
 72 | 
 73 |         image_embeddings = None
 74 |         self.device = device
 75 | 
 76 |         # map embedding function to the dataset
 77 |         if self.retrieval_type == "text-image":
 78 |             image_embeddings = self.image_dataset.map(
 79 |                 lambda example: {
 80 |                     "embeddings": self.embed_model.encode(
 81 |                         example["image"], device=device
 82 |                     )
 83 |                 },
 84 |                 batched=True,
 85 |                 batch_size=batch_size,
 86 |             )
 87 | 
 88 |         elif self.retrieval_type == "image-image":
 89 |             image_embeddings = self.image_dataset.map(self._embed_image_batch)
 90 | 
 91 |         image_embeddings.add_faiss_index(column="embeddings")
 92 |         print(f"Image vector embeddings and FAISS-index created for {self.data_path}")
 93 |         return image_embeddings
 94 | 
 95 |     def _embed_image_batch(self, batch):
 96 |         pixels = self.processor_model(images=batch["image"], return_tensors="pt")[
 97 |             "pixel_values"
 98 |         ]
 99 |         pixels = pixels.to(self.device)
100 | 
101 |         image_embedding = self.embed_model.get_image_features(pixels)
102 |         batch["embeddings"] = image_embedding
103 | 
104 |         return batch
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [2024] [Chibuzo Kelechi]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------