├── .github └── workflows │ ├── ci.yml │ └── publish.yml ├── .gitignore ├── HISTORY.md ├── LICENSE ├── Makefile ├── README.md ├── check_version.py ├── image_embeddings ├── __init__.py ├── cli │ ├── __init__.py │ └── main.py ├── downloader │ ├── __init__.py │ └── tf_datasets_saver.py ├── inference │ ├── __init__.py │ └── inference.py ├── knn │ ├── __init__.py │ └── knn.py └── version.py ├── knn_example.png ├── notebooks ├── from_scratch.ipynb └── using_the_lib.ipynb ├── requirements-test.txt ├── requirements.txt ├── setup.py └── tests └── unit └── test_basic.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continuous integration 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | python-version: [3.6] 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v2 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install, lint and unit tests 26 | run: | 27 | make venv-lint-test -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to PyPI 2 | 3 | on: push 4 | 5 | jobs: 6 | build-n-publish: 7 | name: Build and publish Python 🐍 distributions 📦 to PyPI 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@master 11 | - name: Set up Python 3.6 12 | uses: actions/setup-python@v1 13 | with: 14 | python-version: 3.6 15 | - name: Build a binary wheel and a source tarball 16 | run: | 17 | make build-dist 18 | - name: Check version for publishing on PyPI 19 | if: startsWith(github.event.ref, 'refs/tags') 20 | run: | 21 | python check_version.py 22 | echo "Publishing !!!!!" 23 | - name: Publish distribution 📦 to PyPI 24 | if: ${{ success() && startsWith(github.event.ref, 'refs/tags') }} 25 | uses: pypa/gh-action-pypi-publish@master 26 | with: 27 | password: ${{ secrets.pypi_password }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.egg* 3 | .vscode 4 | .ipynb_checkpoints -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | ## 1.4.0 2 | 3 | * add feature to save embeddings as numpy 4 | 5 | ## 1.3.1 6 | 7 | * fix fashion mnist 8 | 9 | ## 1.3.0 10 | 11 | * add number of example param in downloader 12 | 13 | ## 1.2.1 14 | 15 | * add missing dependencies 16 | 17 | ## 1.2.0 18 | 19 | Full features 20 | 21 | * tf record 22 | * inference 23 | * knn 24 | 25 | Both in cli and api 26 | 27 | ## 1.1.3 28 | 29 | * Cli init 30 | 31 | ## 1.1.2 32 | 33 | * Fix cli import 34 | 35 | ## 1.1.1 36 | 37 | * Fix deps 38 | 39 | ## 1.1.0 40 | 41 | * Better packaging 42 | 43 | ## 1.0.0 44 | 45 | * Empty package that does nothing yet! 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Romain Beaumont 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | install: 2 | python -m pip install -U pip setuptools wheel 3 | python -m pip install -r requirements.txt 4 | python -m pip install -e . 5 | 6 | install-dev: ## [Local development] Install test requirements 7 | python -m pip install -r requirements-test.txt 8 | 9 | lint: ## [Local development] Run mypy, pylint and black 10 | python -m black --check -l 120 image_embeddings 11 | 12 | test: ## [Local development] Run unit tests 13 | python -m pytest -v tests/unit 14 | 15 | black: ## [Local development] Auto-format python code using black 16 | python -m black -l 120 . 17 | 18 | build-dist: ## [Continuous integration] Build package for pypi 19 | python3.6 -m venv .env 20 | . .env/bin/activate && pip install -U pip setuptools wheel 21 | . .env/bin/activate && python setup.py sdist 22 | rm -rf .env 23 | 24 | venv-lint-test: ## [Continuous integration] Install in venv and run lint and test 25 | python3.6 -m venv .env && . .env/bin/activate && make install install-dev lint test && rm -rf .env 26 | 27 | .PHONY: help 28 | 29 | help: # Run `make help` to get help on the make commands 30 | @grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # image_embeddings 2 | [![pypi](https://img.shields.io/pypi/v/image_embeddings.svg)](https://pypi.python.org/pypi/image_embeddings) 3 | [![ci](https://github.com/rom1504/image_embeddings/workflows/Continuous%20integration/badge.svg)](https://github.com/rom1504/image_embeddings/actions?query=workflow%3A%22Continuous+integration%22) 4 | 5 | 6 | Using efficientnet to provide embeddings for retrieval. Read the blog post at https://medium.com/@rom1504/image-embeddings-ed1b194d113e 7 | 8 | Why this repo ? Embeddings are a widely used technique that is well known in scientific circles. But it seems to be underused and not very well known for most engineers. I want to show how easy it is to represent things as embeddings, and how many application this unlocks. Checkout the [demo](https://rom1504.github.io/image_embeddings/) first! 9 | 10 | ![knn example](knn_example.png) 11 | 12 | ## Workflow 13 | 1. download some pictures 14 | 2. run inference on them to get embeddings 15 | 3. simple knn example, to understand what's the point : click on some pictures and see KNN 16 | 17 | ## Simple Install 18 | 19 | Run `pip install image_embeddings` 20 | 21 | ## Example workflow 22 | 23 | 1. run `image_embeddings save_examples_to_folder --images_count=1000 --output_folder=tf_flower_images`, this will retrieve 1000 image files from https://www.tensorflow.org/datasets/catalog/tf_flowers (but you can also pick any other dataset) 24 | 2. produce tf records with `image_embeddings write_tfrecord --image_folder=tf_flower_images --output_folder=tf_flower_tf_records --shards=10` 25 | 3. run the inference with `image_embeddings run_inference --tfrecords_folder=tf_flower_tf_records --output_folder=tf_flower_embeddings` 26 | 4. run a random knn search on them `image_embeddings random_search --path=tf_flower_embeddings` 27 | 28 | Optionally if you want to use the embeddings in numpy (in other languages), run `image_embeddings embeddings_to_numpy --input_path=tf_flower_embeddings --output_path=tf_flower_numpy`. In particular this can be used in the [web demo](https://github.com/rom1504/image_embeddings/tree/web) 29 | 30 | ``` 31 | $ image_embeddings random_search --path=tf_flower_embeddings 32 | image_roses_261 33 | 160.83 image_roses_261 34 | 114.36 image_roses_118 35 | 102.77 image_roses_537 36 | 92.95 image_roses_659 37 | 88.49 image_roses_197 38 | ``` 39 | 40 | Explore the [Simple notebook](notebooks/using_the_lib.ipynb) for more details. 41 | 42 | You can try it locally or [try it in colab](https://colab.research.google.com/github/rom1504/image_embeddings/blob/master/notebooks/using_the_lib.ipynb) 43 | 44 | The [From scratch](notebooks/from_scratch.ipynb) notebook provides an explanation on how to build this from scratch. 45 | 46 | ## API 47 | 48 | ### image_embeddings.downloader 49 | 50 | Downloader from tensorflow datasets. Any other set of images could be used instead 51 | 52 | #### image_embeddings.downloader.save_examples_to_folder(output_folder, images_count=1000, dataset="tf_flowers") 53 | 54 | Save https://www.tensorflow.org/datasets/catalog/tf_flowers to folder 55 | Also works with other tf datasets 56 | 57 | ### image_embeddings.inference 58 | 59 | Create tf recors from images files, and apply inference with an efficientnet model. Other models could be used. 60 | 61 | #### image_embeddings.inference.write_tfrecord(image_folder, output_folder, num_shards=100) 62 | 63 | Write tf records from an image folders 64 | 65 | #### image_embeddings.inference.run_inference(tfrecords_folder, output_folder, batch_size=1000) 66 | 67 | Run inference on provided tf records and save to folder the embeddings 68 | 69 | ### image_embeddings.knn 70 | 71 | Convenience methods to read, build indices and apply search on them. These methods are provided as example. 72 | Use [faiss](https://github.com/facebookresearch/faiss) directly for bigger datasets. 73 | 74 | #### image_embeddings.knn.read_embeddings(path) 75 | 76 | Run embeddings from path and return a tuple with 77 | * embeddings as a numpy matrix 78 | * an id to name dictionary 79 | * a name to id dictionary 80 | 81 | #### image_embeddings.knn.build_index(emb) 82 | 83 | Build a simple faiss inner product index using the provided matrix of embeddings 84 | 85 | #### image_embeddings.knn.search(index, id_to_name, emb, k=5) 86 | 87 | Search the query embeddings and return an array of (distance, name) images 88 | 89 | #### image_embeddings.knn.display_picture(image_path, image_name) 90 | 91 | Display one picture from the given path and image name in jupyter 92 | 93 | #### image_embeddings.knn.display_results(image_path, results) 94 | 95 | Display the results from search method 96 | 97 | #### image_embeddings.knn.random_search(path) 98 | 99 | Load the embeddings, apply a random search on them and display the result 100 | 101 | #### image_embeddings.knn.embeddings_to_numpy(input_path, output_folder) 102 | 103 | Load the embeddings from the input folder as parquet and save them as 104 | * json for the id -> name mapping 105 | * numpy for the embeddings 106 | 107 | Particularly useful to read the embeddings from other languages 108 | 109 | ## Advanced Installation 110 | 111 | ### Prerequisites 112 | 113 | Make sure you use `python>=3.6` and an up-to-date version of `pip` and 114 | `setuptools` 115 | 116 | python --version 117 | pip install -U pip setuptools 118 | 119 | It is recommended to install `image_embeddings` in a new virtual environment. For 120 | example 121 | 122 | python3 -m venv image_embeddings_env 123 | source image_embeddings_env/bin/activate 124 | pip install -U pip setuptools 125 | pip install image_embeddings 126 | 127 | ### Using Pip 128 | 129 | pip install image_embeddings 130 | 131 | ### From Source 132 | 133 | First, clone the `image_embeddings` repo on your local machine with 134 | 135 | git clone https://github.com/rom1504/image_embeddings.git 136 | cd image_embeddings 137 | make install 138 | 139 | To install development tools and test requirements, run 140 | 141 | make install-dev 142 | 143 | ## Test 144 | 145 | To run unit tests in your current environment, run 146 | 147 | make test 148 | 149 | To run lint + unit tests in a fresh virtual environment, 150 | run 151 | 152 | make venv-lint-test 153 | 154 | ## Lint 155 | 156 | To run `black --check`: 157 | 158 | make lint 159 | 160 | To auto-format the code using `black` 161 | 162 | make black 163 | 164 | ## Tasks 165 | 166 | * [x] simple downloader in python 167 | * [x] simple inference in python using https://github.com/qubvel/efficientnet 168 | * [x] build python basic knn example using https://github.com/facebookresearch/faiss 169 | * [x] build basic ui using lit element and some brute force knn to show what it does, put in github pages 170 | * [x] use to illustrate embeddings blogpost 171 | -------------------------------------------------------------------------------- /check_version.py: -------------------------------------------------------------------------------- 1 | """Check version and git tag script.""" 2 | 3 | from pathlib import Path 4 | import re 5 | import sys 6 | import subprocess 7 | 8 | 9 | if __name__ == "__main__": 10 | # Read package version 11 | with Path("image_embeddings/version.py").open(encoding="utf-8") as file: 12 | metadata = dict(re.findall(r'__([a-z]+)__\s*=\s*"([^"]+)"', file.read())) 13 | version = metadata["version"] 14 | 15 | # Read git tag 16 | with subprocess.Popen(["git", "describe", "--tags"], stdout=subprocess.PIPE) as process: 17 | tagged_version = process.communicate()[0].strip().decode(encoding="utf-8") 18 | 19 | # Exit depending on version and tagged_version 20 | if version == tagged_version: 21 | print(f"Tag and version are the same ({version}) !") 22 | sys.exit(0) 23 | else: 24 | print(f"Tag {tagged_version} and version {version} are not the same !") 25 | sys.exit(1) 26 | -------------------------------------------------------------------------------- /image_embeddings/__init__.py: -------------------------------------------------------------------------------- 1 | import image_embeddings.downloader 2 | import image_embeddings.inference 3 | import image_embeddings.knn 4 | import image_embeddings.cli 5 | -------------------------------------------------------------------------------- /image_embeddings/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rom1504/image_embeddings/285c6d78ca147eade188b97ce2b295a18cc479d8/image_embeddings/cli/__init__.py -------------------------------------------------------------------------------- /image_embeddings/cli/main.py: -------------------------------------------------------------------------------- 1 | from image_embeddings.downloader import save_examples_to_folder 2 | from image_embeddings.inference import write_tfrecord 3 | from image_embeddings.inference import run_inference 4 | from image_embeddings.knn import random_search, embeddings_to_numpy 5 | import fire 6 | import logging 7 | 8 | 9 | def main(): 10 | """Main entry point""" 11 | logging.basicConfig(level=logging.INFO) 12 | logging.getLogger("tensorflow").handlers = [] 13 | fire.Fire( 14 | { 15 | "save_examples_to_folder": save_examples_to_folder, 16 | "write_tfrecord": write_tfrecord, 17 | "run_inference": run_inference, 18 | "random_search": random_search, 19 | "embeddings_to_numpy": embeddings_to_numpy, 20 | } 21 | ) 22 | -------------------------------------------------------------------------------- /image_embeddings/downloader/__init__.py: -------------------------------------------------------------------------------- 1 | from image_embeddings.downloader.tf_datasets_saver import save_examples, save_examples_to_folder 2 | -------------------------------------------------------------------------------- /image_embeddings/downloader/tf_datasets_saver.py: -------------------------------------------------------------------------------- 1 | from absl import logging 2 | from pathlib import Path 3 | 4 | from tensorflow_datasets.core import dataset_utils 5 | from tensorflow_datasets.core import features as features_lib 6 | from PIL import Image 7 | from efficientnet.preprocessing import center_crop_and_resize 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | import tensorflow_datasets as tfds 12 | import fire 13 | import logging 14 | 15 | LOGGER = logging.getLogger(__name__) 16 | 17 | 18 | def save_examples(ds_info, ds, num_examples=10, folder=".", image_key=None): 19 | """Save images from an image classification dataset. 20 | 21 | Only works with datasets that have 1 image feature and optionally 1 label 22 | feature (both inferred from `ds_info`). Note the dataset should be unbatched. 23 | 24 | Usage: 25 | 26 | ```python 27 | ds, ds_info = tfds.load('cifar10', split='train', with_info=True) 28 | fig = save_examples(ds_info, ds) 29 | ``` 30 | 31 | Args: 32 | ds_info: The dataset info object to which extract the label and features 33 | info. Available either through `tfds.load('mnist', with_info=True)` or 34 | `tfds.builder('mnist').info` 35 | ds: `tf.data.Dataset`. The tf.data.Dataset object to visualize. Examples 36 | should not be batched. 37 | num_examples: `int`. Number of examples to save 38 | folder: `str`. Where to save images 39 | image_key: `string`, name of the feature that contains the image. If not 40 | set, the system will try to auto-detect it. 41 | 42 | Returns: 43 | """ 44 | 45 | if not image_key: 46 | # Infer the image and label keys 47 | image_keys = [k for k, feature in ds_info.features.items() if isinstance(feature, features_lib.Image)] 48 | 49 | if not image_keys: 50 | raise ValueError( 51 | "Visualisation not supported for dataset `{}`. Was not able to " 52 | "auto-infer image.".format(ds_info.name) 53 | ) 54 | 55 | if len(image_keys) > 1: 56 | raise ValueError( 57 | "Multiple image features detected in the dataset. Using the first one. You can " 58 | "use `image_key` argument to override. Images detected: %s" % (",".join(image_keys)) 59 | ) 60 | 61 | image_key = image_keys[0] 62 | 63 | label_keys = [k for k, feature in ds_info.features.items() if isinstance(feature, features_lib.ClassLabel)] 64 | 65 | label_key = label_keys[0] if len(label_keys) == 1 else None 66 | if not label_key: 67 | logging.info("Was not able to auto-infer label.") 68 | 69 | examples = list(dataset_utils.as_numpy(ds.take(num_examples))) 70 | 71 | for i, ex in enumerate(examples): 72 | if not isinstance(ex, dict): 73 | raise ValueError( 74 | "tfds.show_examples requires examples as `dict`, with the same " 75 | "structure as `ds_info.features`. It is currently not compatible " 76 | "with `as_supervised=True`. Received: {}".format(type(ex)) 77 | ) 78 | 79 | # Plot the image 80 | image = ex[image_key] 81 | if len(image.shape) != 3: 82 | raise ValueError( 83 | "Image dimension should be 3. tfds.show_examples does not support " "batched examples or video." 84 | ) 85 | _, _, c = image.shape 86 | if c == 1: 87 | image = image.reshape(image.shape[:2]) 88 | image = center_crop_and_resize(image, 224).astype(np.uint8) 89 | im = Image.fromarray(image) 90 | if label_key: 91 | label = ex[label_key] 92 | label_str = ds_info.features[label_key].int2str(label).replace("/", "_") 93 | else: 94 | label_str = "" 95 | im.save(f"{folder}/image_{label_str}_{i}.jpeg") 96 | 97 | 98 | def save_examples_to_folder(output_folder, images_count=1000, dataset="tf_flowers"): 99 | logging.basicConfig(level=logging.INFO) 100 | logging.getLogger("tensorflow").handlers = [] 101 | 102 | ds, ds_info = tfds.load(dataset, split="train", with_info=True) 103 | Path(output_folder).mkdir(parents=True, exist_ok=True) 104 | 105 | save_examples(ds_info, ds, images_count, output_folder) 106 | -------------------------------------------------------------------------------- /image_embeddings/inference/__init__.py: -------------------------------------------------------------------------------- 1 | from image_embeddings.inference.inference import write_tfrecord, run_inference 2 | -------------------------------------------------------------------------------- /image_embeddings/inference/inference.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import time 4 | from efficientnet.tfkeras import EfficientNetB0 5 | import pyarrow.parquet as pq 6 | import pyarrow as pa 7 | from pathlib import Path 8 | 9 | 10 | def _int64_feature(value): 11 | """Returns an int64_list from a bool / enum / int / uint.""" 12 | return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) 13 | 14 | 15 | def _bytes_feature(value): 16 | """Returns a bytes_list from a string / byte.""" 17 | if isinstance(value, type(tf.constant(0))): 18 | value = value.numpy() # BytesList won't unpack a string from an EagerTensor. 19 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 20 | 21 | 22 | def serialize_example(image, image_name): 23 | feature = {"image_name": _bytes_feature(image_name), "image": _bytes_feature(image)} 24 | 25 | example_proto = tf.train.Example(features=tf.train.Features(feature=feature)) 26 | return example_proto.SerializeToString() 27 | 28 | 29 | def tf_serialize_example(image, image_name): 30 | tf_string = tf.py_function(serialize_example, (image, image_name), tf.string) 31 | return tf.reshape(tf_string, ()) 32 | 33 | 34 | def process_path(file_path): 35 | parts = tf.strings.split(file_path, "/") 36 | image_name = tf.strings.split(parts[-1], ".")[0] 37 | raw = tf.io.read_file(file_path) 38 | return raw, image_name 39 | 40 | 41 | def read_image_file_write_tfrecord(files_ds, output_filename): 42 | image_ds = files_ds.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE) 43 | serialized_features_dataset = image_ds.map(tf_serialize_example, num_parallel_calls=tf.data.experimental.AUTOTUNE) 44 | writer = tf.data.experimental.TFRecordWriter(output_filename) 45 | writer.write(serialized_features_dataset) 46 | 47 | 48 | def image_files_to_tfrecords(list_ds, output_folder, num_shard): 49 | start = time.time() 50 | for shard_id in range(0, num_shard): 51 | shard_list = list_ds.shard(num_shards=num_shard, index=shard_id) 52 | read_image_file_write_tfrecord(shard_list, output_folder + "/part-" + "{:03d}".format(shard_id) + ".tfrecord") 53 | print("Shard " + str(shard_id) + " saved after " + str(int(time.time() - start)) + "s") 54 | 55 | 56 | feature_description = { 57 | "image_name": tf.io.FixedLenFeature([], tf.string), 58 | "image": tf.io.FixedLenFeature([], tf.string), 59 | } 60 | 61 | 62 | def _parse_function(example_proto): 63 | return tf.io.parse_single_example(example_proto, feature_description) 64 | 65 | 66 | def preprocess_image(d): 67 | image_name = d["image_name"] 68 | raw = d["image"] 69 | image = tf.image.decode_jpeg(raw) 70 | image = tf.image.convert_image_dtype(image, tf.float32) 71 | 72 | return image, image_name 73 | 74 | 75 | def read_tfrecord(filename): 76 | filenames = [filename] 77 | raw_dataset = tf.data.TFRecordDataset(filenames) 78 | return ( 79 | raw_dataset.map(_parse_function, num_parallel_calls=tf.data.experimental.AUTOTUNE) 80 | .map(preprocess_image, num_parallel_calls=tf.data.experimental.AUTOTUNE) 81 | .apply(tf.data.experimental.ignore_errors()) 82 | ) 83 | 84 | 85 | def tfrecords_to_write_embeddings(tfrecords_folder, output_folder, model, batch_size): 86 | tfrecords = [ 87 | f.numpy().decode("utf-8") for f in tf.data.Dataset.list_files(tfrecords_folder + "/*.tfrecord", shuffle=False) 88 | ] 89 | start = time.time() 90 | for shard_id, tfrecord in enumerate(tfrecords): 91 | shard = read_tfrecord(tfrecord) 92 | embeddings = images_to_embeddings(model, shard, batch_size) 93 | print("") 94 | print("Shard " + str(shard_id) + " done after " + str(int(time.time() - start)) + "s") 95 | save_embeddings_ds_to_parquet( 96 | embeddings, shard, output_folder + "/part-" + "{:03d}".format(shard_id) + ".parquet" 97 | ) 98 | print("Shard " + str(shard_id) + " saved after " + str(int(time.time() - start)) + "s") 99 | 100 | 101 | def list_files(images_path): 102 | return tf.data.Dataset.list_files(images_path + "/*", shuffle=False).cache() 103 | 104 | 105 | def process_path(file_path): 106 | parts = tf.strings.split(file_path, "/") 107 | image_name = tf.strings.split(parts[-1], ".")[0] 108 | raw = tf.io.read_file(file_path) 109 | return raw, image_name 110 | 111 | 112 | def read_data_from_files(list_ds): 113 | return list_ds.map( 114 | process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE 115 | ) # .apply(tf.data.experimental.ignore_errors()) 116 | 117 | 118 | def images_to_embeddings(model, dataset, batch_size): 119 | return model.predict(dataset.batch(batch_size).map(lambda image_raw, image_name: image_raw), verbose=1) 120 | 121 | 122 | def save_embeddings_ds_to_parquet(embeddings, dataset, path): 123 | embeddings = pa.array(embeddings.tolist(), type=pa.list_(pa.float32())) 124 | image_names = pa.array(dataset.map(lambda image_raw, image_name: image_name).as_numpy_iterator()) 125 | table = pa.Table.from_arrays([image_names, embeddings], ["image_name", "embedding"]) 126 | pq.write_table(table, path) 127 | 128 | 129 | def compute_save_embeddings(list_ds, folder, num_shards, model, batch_size): 130 | start = time.time() 131 | for shard_id in range(0, num_shards): 132 | shard_list = list_ds.shard(num_shards=num_shards, index=shard_id) 133 | shard = read_data_from_files(shard_list) 134 | embeddings = images_to_embeddings(model, shard, batch_size) 135 | print("Shard " + str(shard_id) + " done after " + str(int(time.time() - start)) + "s") 136 | save_embeddings_ds_to_parquet(embeddings, shard, folder + "/part-" + "{:03d}".format(shard_id) + ".parquet") 137 | print("Shard " + str(shard_id) + " saved after " + str(int(time.time() - start)) + "s") 138 | print("Total time : " + str(int(time.time() - start))) 139 | 140 | 141 | def run_inference_from_files(image_folder, output_folder, num_shards=10, batch_size=1000): 142 | model = EfficientNetB0(weights="imagenet", include_top=False, pooling="avg") 143 | list_ds = list_files(image_folder) 144 | compute_save_embeddings(list_ds, output_folder, num_shards, model, batch_size) 145 | 146 | 147 | def write_tfrecord(image_folder, output_folder, num_shards=10): 148 | Path(output_folder).mkdir(parents=True, exist_ok=True) 149 | list_ds = list_files(image_folder) 150 | image_files_to_tfrecords(list_ds, output_folder, num_shards) 151 | 152 | 153 | def run_inference(tfrecords_folder, output_folder, batch_size=1000): 154 | Path(output_folder).mkdir(parents=True, exist_ok=True) 155 | model = EfficientNetB0(weights="imagenet", include_top=False, pooling="avg") 156 | tfrecords_to_write_embeddings(tfrecords_folder, output_folder, model, batch_size) 157 | -------------------------------------------------------------------------------- /image_embeddings/knn/__init__.py: -------------------------------------------------------------------------------- 1 | from image_embeddings.knn.knn import ( 2 | read_embeddings, 3 | build_index, 4 | display_picture, 5 | display_results, 6 | search, 7 | random_search, 8 | embeddings_to_numpy, 9 | ) 10 | -------------------------------------------------------------------------------- /image_embeddings/knn/knn.py: -------------------------------------------------------------------------------- 1 | import pyarrow.parquet as pq 2 | 3 | from dataclasses import dataclass 4 | from IPython.display import Image, display 5 | from ipywidgets import widgets, HBox, VBox 6 | import faiss 7 | import numpy as np 8 | import random 9 | import json 10 | from pathlib import Path 11 | 12 | 13 | def read_embeddings(path): 14 | emb = pq.read_table(path).to_pandas() 15 | id_to_name = {k: v.decode("utf-8") for k, v in enumerate(list(emb["image_name"]))} 16 | name_to_id = {v: k for k, v in id_to_name.items()} 17 | embgood = np.stack(emb["embedding"].to_numpy()) 18 | return [id_to_name, name_to_id, embgood] 19 | 20 | 21 | def embeddings_to_numpy(input_path, output_path): 22 | emb = pq.read_table(input_path).to_pandas() 23 | 24 | Path(output_path).mkdir(parents=True, exist_ok=True) 25 | id_name = [{"id": k, "name": v.decode("utf-8")} for k, v in enumerate(list(emb["image_name"]))] 26 | json.dump(id_name, open(output_path + "/id_name.json", "w")) 27 | 28 | emb = np.stack(emb["embedding"].to_numpy()) 29 | np.save(open(output_path + "/embedding.npy", "wb"), emb) 30 | 31 | 32 | def build_index(emb): 33 | d = emb.shape[1] 34 | xb = emb 35 | index = faiss.IndexFlatIP(d) 36 | index.add(xb) 37 | return index 38 | 39 | 40 | def random_search(path): 41 | [id_to_name, name_to_id, embeddings] = read_embeddings(path) 42 | index = build_index(embeddings) 43 | p = random.randint(0, len(id_to_name) - 1) 44 | print(id_to_name[p]) 45 | results = search(index, id_to_name, embeddings[p]) 46 | for e in results: 47 | print(f"{e[0]:.2f} {e[1]}") 48 | 49 | 50 | def search(index, id_to_name, emb, k=5): 51 | D, I = index.search(np.expand_dims(emb, 0), k) # actual search 52 | return list(zip(D[0], [id_to_name[x] for x in I[0]])) 53 | 54 | 55 | def display_picture(image_path, image_name): 56 | display(Image(filename=f"{image_path}/{image_name}.jpeg")) 57 | 58 | 59 | def display_results(image_path, results): 60 | hbox = HBox( 61 | [ 62 | VBox( 63 | [ 64 | widgets.Label(f"{distance:.2f} {image_name}"), 65 | widgets.Image(value=open(f"{image_path}/{image_name}.jpeg", "rb").read()), 66 | ] 67 | ) 68 | for distance, image_name in results 69 | ] 70 | ) 71 | display(hbox) 72 | -------------------------------------------------------------------------------- /image_embeddings/version.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=all 2 | 3 | __version__ = "1.4.0" 4 | __author__ = "Romain Beaumont" 5 | -------------------------------------------------------------------------------- /knn_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rom1504/image_embeddings/285c6d78ca147eade188b97ce2b295a18cc479d8/knn_example.png -------------------------------------------------------------------------------- /notebooks/using_the_lib.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Import the image_embeddings lib" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "!pip install -U image_embeddings # don't run this line if running this directly in the repo" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import image_embeddings" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Download an example dataset\n", 33 | "\n", 34 | "For this notebook, I download a dataset from https://www.tensorflow.org/datasets/catalog/overview\n", 35 | "but any image can be used.\n", 36 | "For example :\n", 37 | "* tf_flowers\n", 38 | "* stanford_dogs\n", 39 | "* cats_vs_dogs\n", 40 | "* horses_or_humans\n", 41 | "* imagewang" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# Let's define some paths where to save images, tfrecords and embeddings\n", 51 | "from pathlib import Path\n", 52 | "home = str(Path.home())\n", 53 | "dataset = \"tf_flowers\"\n", 54 | "path_images = f\"{home}/{dataset}/images\"\n", 55 | "path_tfrecords = f\"{home}/{dataset}/tfrecords\"\n", 56 | "path_embeddings = f\"{home}/{dataset}/embeddings\"" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stderr", 66 | "output_type": "stream", 67 | "text": [ 68 | "INFO:absl:Load pre-computed DatasetInfo (eg: splits, num examples,...) from GCS: tf_flowers/3.0.1\n", 69 | "INFO:absl:Load dataset info from /tmp/tmpzi_nv2hetfds\n", 70 | "INFO:absl:Generating dataset tf_flowers (/home/rom1504/tensorflow_datasets/tf_flowers/3.0.1)\n" 71 | ] 72 | }, 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "\u001b[1mDownloading and preparing dataset tf_flowers/3.0.1 (download: 218.21 MiB, generated: 221.83 MiB, total: 440.05 MiB) to /home/rom1504/tensorflow_datasets/tf_flowers/3.0.1...\u001b[0m\n" 78 | ] 79 | }, 80 | { 81 | "name": "stderr", 82 | "output_type": "stream", 83 | "text": [ 84 | "WARNING:absl:Dataset tf_flowers is hosted on GCS. It will automatically be downloaded to your\n", 85 | "local data directory. If you'd instead prefer to read directly from our public\n", 86 | "GCS bucket (recommended if you're running on GCP), you can instead pass\n", 87 | "`try_gcs=True` to `tfds.load` or set `data_dir=gs://tfds-data/datasets`.\n", 88 | "\n" 89 | ] 90 | }, 91 | { 92 | "data": { 93 | "application/vnd.jupyter.widget-view+json": { 94 | "model_id": "754190ac3c84421282fbd7dbf7b37441", 95 | "version_major": 2, 96 | "version_minor": 0 97 | }, 98 | "text/plain": [ 99 | "HBox(children=(FloatProgress(value=0.0, description='Dl Completed...', max=5.0, style=ProgressStyle(descriptio…" 100 | ] 101 | }, 102 | "metadata": {}, 103 | "output_type": "display_data" 104 | }, 105 | { 106 | "name": "stderr", 107 | "output_type": "stream", 108 | "text": [ 109 | "INFO:absl:Load dataset info from /home/rom1504/tensorflow_datasets/tf_flowers/3.0.1.incompleteJG0WW4\n", 110 | "INFO:absl:Constructing tf.data.Dataset for split train, from /home/rom1504/tensorflow_datasets/tf_flowers/3.0.1\n" 111 | ] 112 | }, 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "\n", 118 | "\n", 119 | "\u001b[1mDataset tf_flowers downloaded and prepared to /home/rom1504/tensorflow_datasets/tf_flowers/3.0.1. Subsequent calls will reuse this data.\u001b[0m\n" 120 | ] 121 | } 122 | ], 123 | "source": [ 124 | "image_embeddings.downloader.save_examples_to_folder(output_folder=path_images, images_count=1000, dataset=dataset)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "## Transform image to tf records\n", 132 | "\n", 133 | "Tf record is an efficient format to store image, it's better to use than raw image file for inference" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 4, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "Shard 0 saved after 0s\n", 146 | "Shard 1 saved after 0s\n", 147 | "Shard 2 saved after 0s\n", 148 | "Shard 3 saved after 0s\n", 149 | "Shard 4 saved after 0s\n", 150 | "Shard 5 saved after 1s\n", 151 | "Shard 6 saved after 1s\n", 152 | "Shard 7 saved after 1s\n", 153 | "Shard 8 saved after 1s\n", 154 | "Shard 9 saved after 1s\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "image_embeddings.inference.write_tfrecord(image_folder=path_images,\n", 160 | " output_folder=path_tfrecords,\n", 161 | " num_shards=10)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "## Build embeddings\n", 169 | "\n", 170 | "Here, efficientnet is used, but the code is particularly simple, and any other model could be used\n", 171 | "The input is tfrecords and the output is embeddings" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 5, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "name": "stdout", 181 | "output_type": "stream", 182 | "text": [ 183 | "1/1 [==============================] - 0s 14ms/step\n", 184 | "\n", 185 | "Shard 0 done after 6s\n", 186 | "Shard 0 saved after 6s\n", 187 | "1/1 [==============================] - 0s 7ms/step\n", 188 | "\n", 189 | "Shard 1 done after 11s\n", 190 | "Shard 1 saved after 11s\n", 191 | "1/1 [==============================] - 0s 5ms/step\n", 192 | "\n", 193 | "Shard 2 done after 16s\n", 194 | "Shard 2 saved after 16s\n", 195 | "1/1 [==============================] - 0s 5ms/step\n", 196 | "\n", 197 | "Shard 3 done after 21s\n", 198 | "Shard 3 saved after 21s\n", 199 | "1/1 [==============================] - 0s 9ms/step\n", 200 | "\n", 201 | "Shard 4 done after 26s\n", 202 | "Shard 4 saved after 26s\n", 203 | "1/1 [==============================] - 0s 5ms/step\n", 204 | "\n", 205 | "Shard 5 done after 31s\n", 206 | "Shard 5 saved after 31s\n", 207 | "1/1 [==============================] - 0s 6ms/step\n", 208 | "\n", 209 | "Shard 6 done after 36s\n", 210 | "Shard 6 saved after 36s\n", 211 | "1/1 [==============================] - 0s 7ms/step\n", 212 | "\n", 213 | "Shard 7 done after 41s\n", 214 | "Shard 7 saved after 41s\n", 215 | "1/1 [==============================] - 0s 7ms/step\n", 216 | "\n", 217 | "Shard 8 done after 45s\n", 218 | "Shard 8 saved after 46s\n", 219 | "1/1 [==============================] - 0s 5ms/step\n", 220 | "\n", 221 | "Shard 9 done after 50s\n", 222 | "Shard 9 saved after 50s\n" 223 | ] 224 | } 225 | ], 226 | "source": [ 227 | "image_embeddings.inference.run_inference(tfrecords_folder=path_tfrecords,\n", 228 | " output_folder=path_embeddings,\n", 229 | " batch_size=1000)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "## Read the embeddings and build an index with it\n", 237 | "\n", 238 | "The knn index is built using https://github.com/facebookresearch/faiss which makes it possible\n", 239 | "to search embeddings in log(N) with lot of options to reduce memory footprint" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 31, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "[id_to_name, name_to_id, embeddings] = image_embeddings.knn.read_embeddings(path_embeddings)\n", 249 | "index = image_embeddings.knn.build_index(embeddings)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "## Search in the index\n", 257 | "\n", 258 | "Let's pick a random product by id, retrieve its embedding and search in the index\n", 259 | "\n", 260 | "Then let's display the closest products" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 21, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "name": "stdout", 270 | "output_type": "stream", 271 | "text": [ 272 | "image_dandelion_443\n" 273 | ] 274 | }, 275 | { 276 | "data": { 277 | "image/jpeg": "\n", 278 | "text/plain": [ 279 | "" 280 | ] 281 | }, 282 | "metadata": {}, 283 | "output_type": "display_data" 284 | }, 285 | { 286 | "data": { 287 | "application/vnd.jupyter.widget-view+json": { 288 | "model_id": "bf080f250d354dc0b119a42519eaa239", 289 | "version_major": 2, 290 | "version_minor": 0 291 | }, 292 | "text/plain": [ 293 | "HBox(children=(VBox(children=(Label(value='1.00 image_dandelion_443'), Image(value=b'\\xff\\xd8\\xff\\xe0\\x00\\x10J…" 294 | ] 295 | }, 296 | "metadata": {}, 297 | "output_type": "display_data" 298 | } 299 | ], 300 | "source": [ 301 | "p=225\n", 302 | "print(id_to_name[p])\n", 303 | "image_embeddings.knn.display_picture(path_images, id_to_name[p])\n", 304 | "results = image_embeddings.knn.search(index, id_to_name, embeddings[p])\n", 305 | "image_embeddings.knn.display_results(path_images, results)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": {}, 311 | "source": [ 312 | "## Combination of images ?\n", 313 | "\n", 314 | "Any vector in the same space can be used as query\n", 315 | "For example I could have 2 image and want to find some example that are closeby to the 2, \n", 316 | "Let's just average them and see that happens !" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 32, 322 | "metadata": {}, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "image/jpeg": "\n", 327 | "text/plain": [ 328 | "" 329 | ] 330 | }, 331 | "metadata": {}, 332 | "output_type": "display_data" 333 | }, 334 | { 335 | "data": { 336 | "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCADgAOADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD1u90dJPvknrhlrmbq2e0uPLJxg5Vh+hruzlVADZ9VNV7q2tbpPLkiUlhwO/4UpTUfiOGrhL6xOLU/LlECDu3932HpTUYcxksIj1x1HvWze6OI1LW0ZcfxLu549B3pjaabnToZbdF3qDvQDBfn19RXMpU909GYexmmx+nXzQQos0g/dMGXn+E9QP0rq1dZkODn2rggz+WsZJ25IKEDj6+la+n6nIluPMz+6+R/931NcuIqyoyu9U9H/mepgU61JxW8fyf/AATQuRHZuHfmMMDgdjU8t/I5Se1bdFghh1GfQ+lRvKlzm2kIIdd0b9mH/wBas/SZH024mjlIZHYZAOR9frXJhMxVGyqP3G7a9PJ/15nZKgqkG1pJfibsGoxyYD/u29+h/GrmQRn9apvYwTJujwuecjkGq4jubNvlJKZxxyPy7V9BZPY4btbkDTBbaHPP7xlx6jOK5jz/ADdSLKchZP0z/wDWrWvnZLFtpHyTvjvxn/69ZlpaGOz3OAZHJJJPv2/Ovlq9WMaST66HvYaMYxcn10KV/wDuNYuFPRiGz9TV9SIL2G45EcwKSEdm7VV1ZA99bS/89IufqP8A9VOs5Bc201vySBvAx7Vxp8+HhJ9rP8vwaR2tXpqXlr+R1GnH7Pd5DfJMACO24dD+VW9UkZ4/skL7ZZflHsD3/LNYOk3f2uyBJO9Tj3Bp9zLcRSXd1KxEpHlxgDpkYB/nW2XZg1Slg6796Lt/27v/AMD5nkzoP22u6/PoY2u3ImE8sRbZGQkZHHyj/HrVvSNQnlnSO7QF1TCS9yp6ZpjWpaO3U42l2PPsMVagg/dKUwGU7T9KyxGJhUoSpS2lden/AA1jvlyez5TG1i/NvbhVwHyQp7jp0981L4WgludSt4HY7YR5zgfwgcgH6nH61matA/8Ab8kJDSMCCi9gCOK7Dw0kWn6RdX8mC5Ygt/ex2H413YGjBRi3svef5m2JlGlhbR1cv1/pmvqF0sKyndhgMD/eNcNqMhkMQdiTLKMj2Bya0ri6lunihiBeUgs3sW659sYqSC0sorqEyj7RNEVQ/wB1Cx/U1nTdSvV59k3/AF+hy4eCw8bvVmLdKPNyVIZx+hr0TRSDpFrg5AjAz9K87v7lrnXJTzsDkAAdB2Fd74bYtokHGByB+detgFytx8jPMk/ZQb/rQ16KTvS16Z4wUUUUAZcqM0pfeuSvTNK8O6MeYMjHI7fgaVbVTarBMNwwVJ9q5+4hv9Ib/WO0OflkU8ewIPeuDE4z2N+aF4re3+R00aSqe6pa/mbOfLH7xsoSAHxyP971+tKRnI+6/bjjFZaapNw4WKXA+YY2k/0qa11Wyu5WiWYK2f8AUyfKyn29RXluth8Un7CXqndP+uz6eaFPC1YapXQtzHuDeaiEngyAZzWTORZ3qHhYnADAjIweK6JgTnBGexUcGsTV4IwsZKhshsrnAPtmuNqrzcs5Nrz6f1/Vy8Ckq1rbogaU2o+yscRg7ojnofSlvbhhJFebl3xjbL7qT1qi91bXMH2NN4njXLCQ8j0FV7K6ZpDZ3J27ztDsOh9K5KlOcYy5lddfNd/keyqN1zdV/VzsNN1FY448uDbt91h/Cf8ACtl5kSNnZhtHWvPLe5bSbhreUfuz/CeRz/jXT2OpQFEhmI8iT7jMehHODXpZTmMqbWGru8X8MvLs/wBGeXjMHKPvw1Rj3s+6O8j3ZZZCyr7Z5/pT4mQ2zbcZT5s54rL1W5W11i5VsgM7pgDrmtHTPKlsYm/vowb2I6/0rjzJeyp3fSR6EoctJS72/IqXY82ysph/DIynvxnrWeJvst5uJ43FT9DV4Mw8P3m05kgYtjvjrWPdnzTHMD8k0YIx6jg/zq8JFSjOm9k2vv8Ae/U7aEb3i9rtfr+psaev9l6l5IYmCddyE9j3FbF8xmW39nyT7YPFYBaS6sbaVcb0HIJ53L1/MYP41qpdCaEPyAqhueOtcMoS9tGt11UvldHLWg3JTe+zLMi4ubdMYCxljirccQWM5HfNNKh7lGA4KdasHhCMV4NXEylCMVp1frdnFOeiRzXiBRDdrKv35UEaH0Pc/lUiPLd6Ta6battCkvIxPCqDwT+pxV/WLMXtkqGRYjvX94w4UdzVG7ubO2tDp9gCynh5W+85719VlGIp1MEuZ2a0a72/TudcJc9OEUtU/kvN/foiKK5iijkit328ZeU9SO5Pp7CqySTJG18ciJGPkL3JIxuJ9cZq3Hp5dEthkA8yHu3tWlLZBoo7dQCqDc3pXPWzSFOpGz3/AAX/AASpVIRdu5x6GSRkRTulnbK465PSvVtMtfsWnwW3eNAD9e9cjoWjoPEvm7f3cEQkUejN/wDqNduua+uwCi6SqR+1qefmddTlGEdl+o6ikJxTWfaMnoOtdp5g6kJ4rNk1eIkiD96B1cH5B+Pf8KhWe5vASjBIu8hGF/D1rzcRmdKnP2NP359l09Xsvn9xoqUnq9BwuM2rSGQMMZB7VTh1i3lYxyspjf5GR/5fSsO4ujZiWxldlgkHDHoD/Q1hzN5ToJJQMcbuvy+tePiMc68k43i1uv63X9WPVpYJNO/yOov7VbBzIm77M7ffHLRH0PqKpz2FveqstzFHPGPuyKP8kVkQ61cSQvCJGPG3nn8D61FFqd1bSDymCtnnIyrV5WIwkpy56L5Zf1t2O2nQqpb6nQxafsG7TtQuIjx8rSFx+tLem9mSNLhUZl/iXjI9fSqFlqkF7L5cqi0ugflOfkf/AANbaTSZMVxF17j7pFcDxmMwsuWbv5P9H1MKilCScldrvv8Af1MOxt/MupndFbeR8+OVYVpPZo5CSqpPqw60luiw3kgjztLEhe9agAddrAFCKnGZhP2vMtmFaq1K62M2TT3lh8mZdyg/u2A5UenvXPSvc6XMYZVLxE/KpHX6ehrtliKD5GO3sDVK8WG5QxzRng/exnFLCY6UZ8lWN4vt+a/4AqGJs7SV0cZfkTxiWJy653ZJ5/z7Vd0DUlWU28v3g+9Affhh/KtA+H1kjZ7WdMk52nkE/wBK5bUILjR9RilaNomLcKTw3sDX0NKtQxkJUFK7/H8T04OlXg6SZ0limNTngJJjkjZGz0OMiueSV7eAQlcyW8pXn09KvSajunjuIX/uyY788H+VU9UKx63MfmEc8fmKQATnGen1AqsMnGq+b7SX3x0f4P8AA0oQak+bqvxWn5Mt2E7yu8MIyzN8igdWPT9Dj8BXQTxQWFkbOFg8i/6yT/bPYewrktCuDHqAxweVUseAex/KuiEL3NwUgwQrDqeWPrVYjloxnfrr92n4mOKp8tXV2S1N60kDpCQc4UiroG6N++Mc1mWg8i6W1JBZY8nHr3NaKtgMPUYr4mVo1ddmn+N7HiVVroU73DWzKfuhl3fTIrIv9W8y7aw0yJUlJ+abgBB60/xJqkVlZSwBh58qDavfryf0rltLndbkTs2Xd1OOpbHavZyzBc+HdWa9F0fn5r8D08Jheam6sltsj0fS7Ew2ytdSb3C/M3qaTVJo9M024vZU37VLCNTjNMtrlZIlwSSOSD1z71leK79E0a6jcks8fboOcUsJVoV6kKEqd5X1+e7fklpFdN9dDzYU51K6Uur1+8v+DNRfVBf3Uiqh3qoVewwK6onHSvP/AIcTOv2+HaSg2HP+16U7xP4zZrl9L0hvmX5Z7kchfVV9/ev0Kl7OjRSjpFLQnE4OdTGTpUlt9yVkdNqniOx0w+W0gkuO0SHJH19KwZtSuNSZRckiJ/u20f8AF7H1rlbBWkuUhtYzNcSnIGck/wC0T6e5/Cuziaw8NQLLcN597IOoIyfXbnoo9TXn4itVrq0XyQ79fkbzwlPDWjH3pv8Ar5Gnb6cEjWS72hBjbD/CPT603UNZgtSFALynhEHJ/LtXKXni2W8lMdqcyNwGUZAPouf5mrmladcRN50x2ytyXY5IHsDzn3NeRiKtPC0+Sn7kP/Jm+7ff0uyHg3Bc+Ifoh2ozwSRxLc4ktZxmG6Qfoaw20/y2zG4MecHHIHuPQ1ahWbSnuLZR9s08SFZ4SPmiYdwP1yPWnPao48/T596t93ceD/skdiK8+tam2otro79+z7P+tTvpe4rJ6P7v+A/w6or3PheZWFzZyxyqeSvTNQLpVzyZQCncZyR9a29FuJlna3lQpxkKw6/StG5tFL+YgCt9OG9jXnvMa9Gfs6jT8yXiJ05cktfMw4tOtzHvVPNX+8eSD71ahvJrM7APNg/ut1H+6f6GrMVvJBKXXGG6jsfaob22fJeD50P3kHUH0qnXp15clR3T/r5MTqKbtLVFtPIvwk8MmABhh/Evpn0q9GCoIYc5/OuSkulSTekzQyqdpIHI+o7itvT9W8+NI5CrTqOQv8Y9V9a5MZgalOHMtYr70ZVsPOMbrY11zkYOD60SRxSZD/K398VkXviXSdPk8u5utkvePaSw+oxUJ8W6aceXKZkP8URDY+o6j8qzw9DEqOlNtPy0f5fetfUwWGrSs4xf3GjJZqG320oWQDqp4P1FZGrCG8s5LLVoCFKnDrxn3U9iKztV8SQeUXBhhT+GSU4DH04PFZlv4meaFliaOTHVRJvB+gPP4V6lDL8TpVelttdV/wBvf53PQo4OtZSf+TKknhu7Fx5un3sdzgBPLk/dvgdsdD+GKNVvrZYbSK6kEN5CxR48Zwp9x71l6n4juI7yRRbwIMZRVzwMfWuannEyk5beDk7jnNfUUqFSq4yq9O2/bXoei3KDTm9V/W5091KYG8zeCJTlWXuPX8sV03hvWYLiYSTyrCLeNmkJIA4715jFdl7dAzE7MqQT+I/Q0NckJIA3HNb4jBKtQdKT12v/AF5Gdacavuvqj1Dwtro1a5ub3Kh2jLeWeWXJziugTVk23srSKLe2UBieuSCT/SvC7PUZLBluIXKugA4brnqK6G88SCfREtUJQzyNLcDscY2r9Bya8nGZGqtVOK0dl6Jb/ht6mM6FKrLmvbb5It61rgvtZuLtfuFlCKf7o6fp/OrFpcx/aEmSQAKc7HOA7non0/pmuPN0skrSc7c5A/pUcl68rohYhIzlVz0J7/WvYWCjGmqcdElY63iacY8kdtj123vXsrEPJJtAOWkY43Huea5bxF4kS5tDHFNvBb5nI42jnH51yU+oTXCqss8jIvADPkD8KsaZG010kxQbIWBUMMgt2+uOuPoO9Y4bLYUZ+2nuc8YQ5uaKuztRrU+heGU0m2Zl1O8/e3UnRoVP3U9m24z6HNZumwS3swsrBdz/APLSQnCqPc+n86bY6TLqc7AOyqXJuLg5JyT90erGu4iisdA03y9kccSfejLDc57bz/eP93rXW2qru9iZ1Y4ZOFPWcndvu/627CwNaeGdMLwtvmkXczsMGYjv/sxjsP8AJ5y1s9T8T3Ulw7slvK3zTy/x+yjuPatELDcM19rBBGQxjZsJ7A+wxwKWfxE0zeVp4CLjAcLz9FHYV5eLxlWb9nhY3a+09l/mZ0YVIXcFeT3k+hs21npfh+EiFS8/d2bLt9T2HsKJdVlmwoxGjHBNYcLM5Bc73bkKvJOP88+la2mQNIXuZgPLUYXI+VffNeMsBzz56z55d3t8jKpSjBOdR3Y3XonhvBq9h5u/aEuomTG9R0Yc9QOD6gCqCPHKTPbOI2Y4I/hJ9D/d/WujlnBZkChiDg7Xzj61zGr6bLC5vNLZgQP3kC9/p6/T8qHiKmKnzVo8kn5Oz9dxYWSsqctOz/R/oattdBdqS5hnXkeZ6e3/AOv8K2LS8julKH5ZVHzof5/SvPoNUkuU8tm3A8+XKCAD7HsfapPts0Lr5U7xSJ0WTt9D3H51y1sq9pdPR/h/wxvUy9z62Z38qbc/Nx24rBuZWgmaSOZ0cHAweD7e1W9I1kara7ZEQXKD5lDZDe4x2qC/tkcPuzE+OVc/KffNcWFg6FZ06yt/X5HJRi6dRwqGLeSxXbbLhTBMeQ46f/WrLujcWNu0jqXhXJVhyD9fT61buRLCMyRiSHuwO4fge1cjr12YDDFa3UnkurExOcj6V9XhafM1Fbf1s+h7VOPLC62/r7jOudSld3LsWLEnBNVDfSplo+R3x1qCYpJx9xvc8H8arETRtgH8+P8A9de7GEUjCpVvoy5Nem7YM8jNIBjLdRTYLqSNgVkaNh0ZTgiqhxMSMFJF6qeCDTdzBTvBbHJx1x61XLHYwdSUdU7mzd6tJeQRrNEnnIcrMoxx6YHrVCSQ5V1GB/XvTljZSgk+VZFDRv2IP9P5VOtjJIzw7ckjge/+f51kpQphKUqhXtkdriWMD+Ddj/PsaNxKvnOdvT6V0fhbSvtmqBXzza8jH3iGK/yAqHV/Ds2m6u1sQSGyUOOoxkf1/Ksfr1L2zot6pXMuW1l11OdwBCmfX+lLNKcheCQMVflsWRVYrx1x/IVEmnP5ZlfPzHHHP1A/z6CuqNaG9zOSlsiBCEhO70yM1GhYtnuTzViS2lO3hmLn5EA5Yj09gKnNqtuhLnLgckfwn0H+NX7SP3lQpuTsiGCKWeZI4kLSMcKo6k16NovheQJFA6MSRuKLwzH1J/hX3/SrPgPwzCLQaieZ5E3iRhmNFPTnuRz065qj4w8WtbiXRtHlKgnF1dKcux/u5/w6V50sRLEVvZQXur8bfotvNlOs1J06W5e1bxXp/hqM2OnCO51BMqTHxHD7A/16n17VyuoMbiOKe4vmurojKx7cIueuPT+ZrnNqqfmPGea1EhlW3SYxMsTdCf4voK65wUba2/U6MPTUb31ZdTcIkEkrOFyFDngD2FX7a7CbUTJJ+8yj7v096wvNeQ/LwO5rS0+5FhKlypjaZP8AViRsAN2OKmdNcvvbHbFymrRVzv7C3gs7MXGpgwwscrbhvnkx03ev06VJca293AYwEgt+ixJxx6GuZtLPWdYuA8gMpPck8fkOK6/TvC8Foon1WUbhyok4H4LnJrx8S1LSGvp+r2S+Z5tdUqL5qsry7L9F+rLt/wCHbbUrcyWjpBdgcKrHY/tjt+FcDqC3kMzW7GaOVTtdCSD/ADrv5pTbgzWzM0YPzKgyV/CmXsthrNv8rxPcKuMNwx9vWvPwmPdWne1mt7fqv6uc+GrzpfGuaP4r/gHkN8LvLZup+nKmUn9DWaTKrf6zcffmu21Hw/hmeCcycnasn+NcrdWFwkjb7Mn3Q19Bhpc8dNT21Vja6KsV9dWsyzwv5cqcq6ZUj8RWheeMtXv7P7LeyieP1KDP8v5VjyxOh/1MinqdzDmqyzQq+Lhpgu4fcAJAzz3rolhqc2pShdrbTUynOm2pPp/XYnW/mgk8y1nlt3I/hbFQ3d7LekNduzOOA+c02IC6lZIHVzyVVvlJH+NRAgkgr+Ga1UIp3tqZtuezIjkA7HBHoaj39ug/uk8f/Wqw1ujYPzIcdcVE1rKFLKBMi8kryR9R1q7o5KvPHdDlCyAJKpyPuEfeX2B7j2/KrSIV2luWz8sijhj6exqK2jbgKy7TyBJwD9DW3bIFjzJHkd9xBH59K469Tk2IUtLot6TBBcabLptzGpSMmSB+6A84HsDmrGnS2lo5iuiu1RgPkcfQ1wmpauJbxJrR5op4SY8hhtxnrnPNUDc3MUr7pN4fl9zfez3rH+zZ1U25WUtbdn1+85PrvJJ2R6nZeKNG0vxBcHKrtiAV88N1ckY+uKrap42tdedZ5oUhkiXLMp+UDsOeScGvM1bI5XBChY+Pu/5FOjkeFshcAjaQ/PNWskoKXPduSVr3MJY2TlzHZy61azXzWy5QLgCU9OeuMjrzV2S9sVMNqGDs/CKnK9cYJ7ckVxaRedEk01wsa7yMsjHBxnPTkc47471WZ/KTCAJIG3llfcCe2COuOTWzy2Dsk2rAsbNbnqKaYLPzLl8PdOoUvjAQf3VHYD9aytOsINa1dLd7qNIYXy0T/K0wHLcnjPse1c3caxdTwWdrPcSlLYbUCvypJyMnuQMVLJd2jRYcEXR3ZO8HIwcZPr71hRwVanfnleT7dO34fd+Jv9ZU1a9j1W08TRJ4ZkFnDN5tw7LbQTLtUKOknTlAuD9TXnzwHe7FsjJLyEfeY9TXXx6pba0vntgReUsSgHO2NRgKMdO5I96xdYXe/l28BJxwp4VR6muLCzjTrSpQVrvr/X9avqd1DkUXN7vUwTIgI5PBxubk/hVkahM0Iht1WNQNpfHzMPrWeYtlztkkV5e4Rg36jj+tOeRvuu4jc9IwMsPr6fjXt8kdOrNIYjdN2X4lmK6FsVfLGVTnIbAB+taq+MNVbCx3k24dDFhcf8C61zSxh2y4z7sc4/CtK3AUAqmB/eaidCM3eWrNIVYN2UU/XU6W01nxDeuol1i/CnnZHcMv65rpbSMxqryyOWbqxck/iTya5fTRcvhlKQr18yT+grdghQtgySXMnUDPH5Dp+NeJmOvuyei/r0NJqNvdSXy/4CO0aRrXEjRCUeuOBVW7u9FvCPOtnjkHV4WwV/Ct2HQpYcgTRuuf7pGfqOa5rxX4dmSxNzBGpkQ8fMMkeg7n6cmvJwuV43DVOWpD3e+jX53XyPDoVaM6iTlZ907GJrFvbwJ52n6p5seRujkXLD6+tc7cahIwKyQRyejRnBqtc3VxE2JlBA4yeP1rOluQOCrL7nmvoqFKy1PfhHljZu468lhck7HU+9YlxAGYlWFaLyufuk4Hp/hVSUhuG2En1GDXoRZlOKexmSWpxhkGfWoGVl6ZHoPStEqDkADg/wAMgz+tRvGxI4k+hwavmOSUF0KsUrKfnZh6MOQPrWpDZXEbxyEqC3zKwbB+vpVNYnU/6uTP/XM/0rRs4l4aSOVCOjhT+qkYNYV5aXQk3tLUXU1msrQXcccIfcA4YgK/uB/e+lcve6hLcSEFvJVeGSMYGffrmt/WtWtJrRtOlgmZ0c5aNdgJHTIPOPp+dcztJjDqwODnIIx6YPvVYOk+XmqLX9DysXVvO0XoG4mQORlic4boc+vvSRJvnCFlBGQB15xxk1LFGzMZIm2rnnefuk5wDnrT/KMckgZAgki+QFht3ZHf8D+ddr0OQdHDI7RPtGSvBPOcdzn/ADxT5o5ktpJWUyuxUBmH3ff8cU2Eo0oEWdrddp+7xgnHfnNT3TGGDYp8yK5jDqZFww2nGBg9Kzu0yt0RyqVd4rclwJR5bFvQdM/rUcvL4lkDl2G4xjG31x6np+tW5JbWWygQKfPyPMB4QD19ifT2p8ccSA3B8oI7FRtcFto6qo6jOev5dKalbcTWpUYLHLcbUUsQpC9QqkA9D3wfzoQI7qIo/LVQW+9zgc8n1+lOXy5ELNE8krLwRIBgD2xzgcUKyBg28RxqRmVxgDvj3OBV3F1NvTtem0uOKCG1jkdCNwJILD25wPyrozEt3GLm4REeTBPmtnH0A61w1rdNHqUc1vG9ww/5ZMMlh74rp316HUoRbyNNaypyYigbHbqOa8vE4ZqanTja+7O/C1k/dkyW4urG1tPKtIDJlRvaSPYN3r1ycentWEzo2dsar6BOlLcpEr8zh+f4tw7+4qJR3BYjtgY4rso01FXXU6HOzL0ESqu5yiD35NXI7kDC20eST95x3rKRztwXVV75PNXrfULG0UNNcKSP4RyaqcWzppVYo6TTNNe7nU3E7Hnkr/ICuyisr6ztlj02wifPe4mCD8QBn+VeST+JJbqUCNjFEhyqq2Dn1J9a2tO8UXRAFxfXxA4Gzaw/XFedXwcZv31dfgbym6q91o9tutfnClYkCH16muX1XU7hwXuZH64Bz/XtVp5pZhhGAfHKxoSPzPesy4srh4zM1mZEzjM8h2n3IxXz1HFYis/3rcvw/wAkcNCjTpvZHLX95FOWAAkPT5Vz+tYU+wH7qx/Vv6VoalNukMct4GC8eXAuFH4iseWWFOY4CSPUf4179KnKMUkj1U0kQSbD0c49SKgMoxjzCfbd3pZryYEAKgH1zVKW4llOCFx7LiuyEZ9Uc9SolsTtIT/Fu7nI6VAzbzjCcnPK1ErTrjGOevFWY7y7UAKLce5QZ/Hmraktl+Jyyk2MiEoOVQEnpha1rQSMw8yZYhjucn9M1WilmlJ8x7U/70m0foDWtYypESXs4WRfm3wTK3T1U4yP1rkruTVrfiTzcqOT1e3hs7t44Gmc7t++VAoOeePWqY8uciIIrM2AqovUnp+FXdSubW61W4kUO1vICwbfuYEgYxnGBweKghklhkzGwXeu0MQM9Oo/P3r0qXMqavvY8Wo05OxGkgt5FiIXLjEiSJ8u09sd8Z609IEe3aNh5U0Eg+d2bEisQOeoBHBz6A0ryGNEMkccibcrEWLEA9RnHBGM8+tJHIgdgsryQSqVfecEjGccdxiiXkJa7hc3kwaCN1JaCPySQME8licjqOePbFCzhY2SLeCiFcdOpHT8zUAMrhVMkK44Xe2C2cn8h/hTBE0yOZA7yZ5YE8H3/CqjFCc7FpJZxm0kJWNpN8ux/kJxwfQ4FTHbJcWsDtJPFFhdqjHy5z/LP51SWKZLZ3QAxAkyJn5lAxyR2GSKUO0Mu6VcuvJZGBySOM89KHFdBKRpO7MtzNLIY0kPKIcuuDwo9un5U14luLNQFmS2g4Ltjb8xGeBjJNRI0FuBGRM0pbBh4AYEDBJ69ParN1aSWMqfaJYJo8kBIZd+0cZ6cd+vrWV2mkaablGZolQRLOZIiCGaW3GFH+z3p1pMbKdvsxWVQ4A3JjI9hTJI1WUIjGRcfJuGM/X0NNTyg7M7ShlYMgRAVJ9znj8K6GlKNmQm07o6yMXGoacjOjQNjBSMdffnoKxbvTPLfhGJ77psk+9bmiSHU9NdneNJlbbtB3b17HFVLy2HLIYXw5QBcqx46kHntXlUJclSUNtdj14pTgnuYJtCGGbdcjjrR5OM/uSPpVx4gpKltpB6H5h+Yp6I69Dn/dOc16CaFGOpSUKp5Rh+GatQzRg9dh+mKkZGz82M+jDGKQRhs/KR9DmnzGnsrao98uL7UinzX0NomOkKDj8TzXGXjrcyyNPcPMSTksxOfrW9IkXzNJC0798nAqhc2t9f4ht9PCIOggjLH6nGa+SwrfWX4WO6hGFPsvPRf8E5Sa3yW2E+49Kzp7Vf4n5PUA1rXUSwyMkjyKy8FWXBB9Oaz3lRMhUJNezTlLodko3KTW0QBwgb3NQtE3IVQODirctwzAAAZz2FUZWkLHezZGa6I8z3OapyRIXh+bLyKD1NNZrdeAWJHSopJB0zlvb0pixuxJGVA6mtuXucU6i6IsxzIrBhCp/mf6/lXR6PatfYE6QRwN8pVlyXH45Ncws0UJwgJfuQMmtC2tbuVfPfdBCwOJppCM/7qjkn6VyYqnzQ+K34kqd1a1yrr1tolhdGy02K4luYpP300shKkEcKB7ev86ykSQREPaGaByVOw4dMdxjkc461dvdB1GyhZ/KZ7dgD58vBJ6Y/rWfbSCC5DtI8bYX97GRwM9fQ9K7aDi6S5Zc3ne/9eh4tRNT1VhqSlBG9tNIZCp3HAzzxx7n1+tNlEnlBpkMpGYxv3KQcZyDxnGOalbyI/wB63mNI7F9si+WiY9+4qozvMCpd3AO7lsgD2rXd6EiCOJA4cCRmUESKSu09xjue34VbVXEZijVgzDI28lh6E/XFRSrCskXktHh4FdlRy5DEcgkjhs9R29aWKUxHCN5fynLKew5x+PTNXHVXMpbimBmX5UJGRna3GOv+NRHbAWkQAR7hhHY5Of5gflT42Jj4UnC4UZzg+3rxSSF47ctIreTOrIGBH3gQfqB9KGEdyaJHeMi3SMSRqFP7vc7gjJPP3fT04q59uM0NpYafblIwVGI1BeSQkZJbGSM4wDwKpWGp3Viu1Q0iyDDhVyTnpz+PSpNup2nmNIzWbSsFYRkZPfBA5XgZ7Vk1d6/I2T0F8tNzLKdkinhyWKjPUNzx+Rqu1wwc4eKQqQQoTlu3JGCR+NWVSGDNwt9E0rKfKRgzbySQQy/w9O9Ui0Z2ny5UYMdw3DBGOg9OnStV5kXOt0iPS5EczuY3HBAc7VOeoxxmk1GyYncLhZY88PJ830+cc/nVfTlB06Ka3CRlh80JYYBx7+vWovtgimYndA/Q5GFP17f0968uEL1ZOMn89T16T5aautylPHNCQskZI7HO4H6EUiTKcAOVx0DcitIshJDK0RbvGPlP4dD9QaqT2OQZFj81e7Rdvqp5FdsXK2oXXR/eSrKxGT09fvD9amHlPyHUH06fpWMFKOPKlOfT0p6XcyMQ6K2Pai3Y2jPufTi2BjTb9ttkPc7AT+uay9QhsZQ63niG5Kgf6u3O2sia9uUX5VbpwVIrDv76S4OXcsQMYVelfM0H/Jp9xtQwc3K7l9yX+TItaj0pXBsRdk872nkB3fpn9awHMSnGNx6cDJ/OrU5VTkqzMehrPnllOQibfSvUpyv1PT5OWNtWV55SeAgUdgf8KzpN8riMbmZzhUQZLfSuq8O+DNY8VSCSBRBZ5+a7kB2++31robuXw34Hhe10SJNX1xhte8mO5IT3PHHH90H6munmko3X4s8ytVTn7OCu/L9exxX9gw6RbLc67IYyf9XZREGR/wDeb+EVlTzNeSb2WO2hHCxIOFH07n3NOvJJZbhp7iZp7hj80jnP4VTGWJbd0PPqKqlSm1z1Hr/Wy/V6swqSUHy7ssQOsT7bWL5v7zYLfX0rQtcNOJLi5mnuDxtibGPq/b6AfiKoQRl+Oi9/QfUj+VaIP2WPymGHB+6Rgj6+g9qVWK6FQTl8Q/X9N+0WkEkTfvItzyQxgkuMDHJOc9ea5Mo77EkicSMQIyeqegP55rtknMAMYJLsMyMe3ov49/8A61c7rVldPcSzyxv9nP3pAQAcD9OSOfapwVRx/dTfocONopPnRmIHPnNJClzO+Nr7yRjqeD70t0l5NPD51qzzsfLXeMHOOABnAGDUSuJ5NzuqIePN3enXGO1QNKXMo3yMC+F3scrnofb1rvcdTz7koWWSNbYRorI7yZcBTnuC3pgdPWkWQY2uSsZHI559qszRrMkkrLtmMQMiySAhSOCcerdh2qPznCpLPmZZB+/AIVzGCOAcfLwMZxSUmgcUwCoBBNczqkcwYhtu8gjsyjGMn+eagk8y6ePIZ5VUIi4zgdcZ9B6e9IY08tGYlQpO4oNzcd8fiParNpMlvCCQzJJ+8WVTgwuOh9+D09KG3uCSGwMsCqnlvBJnc0n3lZTyA6/jTXuFkQYgii3Eg7eAM9z+P0pzbBeym3i/dS5KgksozzjPsf5VH8jwpGHRJQSpbOQR/eNEV1BvoNYiNRlmLZw6MOB6f1qwLUXNynkENuxkIh49Ryc54pFhu55DZOsUkhbCytyV9tw4wRjr6V0Vvp4sLXY6hrhcENkqQfY+9Z1q8ado31Zvh6DqPVaFS1tZp40Ek+5yxUh+CAP72KtWr6eb8WqzKQp5uJRhCf8AZHpVW4nkneUKTFJL984xv9celVEtigClSD6EVm05e6nZeR6EU4x0OuvrC3lsX+xiMzITtK/dI9/5fhXOQzh3+XdHKDgxtwyn2PcVEklzaqJI3dEJ2Bl6ZxnGencGo3DXEjO4LPyxZBg9OpqcPRnSTTldfiDd90X2iS6+WWJXYe2Gph0him+2lYD+7IMj86ghu3iKrcAsB92Veo/xrf0y92EFJQmf415B+oNazm7bG1Omm9D1u602K0ZDdC2i3DhdrFm/AVi3SwAOUtbdAOjSAE/ka6OPTxNGJJbgtvPLMTk/1qdbDTrBt32cTTH7oZdzH6Cvz54inS+JW+9t/K/+SHDEcm7bZx0Oj6nrB+SLybcD/WyfKv4Dv+lXp9D0Kwso/MtYbmaHJe4mBKOfTZ0c+g6Vrajqb42yESN0WCI8D/eb+grlrrUwzPLfklhxHHj9FArqoV8TiP4a5YeW7+f+Wnmzrh7WtrJ2iui/rX8jJ1jUNQ1AnfczR2g+VYvMIUDt8o4/TA7VzkhAtC6kJDuK7uhbHXH0rYaOXVLqQv8AubWP5pGz91fTjuewH51iateG7mUJF5VtENkUY7D/ADya+nwsFZJm9aapx5YaGVI4Lb2A24wBkj+VIgO2NFVsnIAH3j7ewpx3GQ7RuYjn0WnxqY9xUnOPmf8AoK9Fs8tQTdywjeQPvAsvPsh9vU+9T27G2IIJ+0ODtA6oD/U1AieSo3gGQ8queAPU+9Eh+zqQufNcct1wCOayUU9SpytoSLITOFUEhDgAfxuaNanW9RdLSRvKi5uGTHzyHtn0Aqt5zQRgQ589xtX/AGR3P1qKCF8RpGAA+cFup9WP9BWcqS5+fqtv8zNvmXKzNu7OSAB1iUAkrHhi249zz9RVXzIDANlsq+WQZW3ElznH4DmutedJ1jUL+4j+SIHHzc8nPuefwpnh3QLTXtf1CO5i/cIMjYcANnj+Rq3iXCm51Vt/nY4quFs/cZy+9pi8TXB8sHzMNyC/pg9frSApGX80giWM/KM/KPr61v8Aizwu+h6jEtmzvDcLuUOOUI7fTkVjrYXMk5VipUxk5B5Bweg+taQr05QU09Gc/sZ3skVhPMpSVUHZeo6dx+NLvG1DC6x7SxYBsZIzj8QuR706XTZ0fymhZHVsM2QFY56g55p0unnyfJ8t/tIfc07EbCvPTv8Ap2rXnhvcjkne1isJAGjV5HMWcEk54J/lSzJH50yeW0bI/KAZwAOufpV9dNjREAjJElsGB756kmuk0HSVksLjUJFDjz0j55Jyp7/lWdTExhG61NqeFlJ66Fbw/ZvawK8pOZXYKh427eCP5H8a076XzrUZIbyz8y4znPfNLrBmW5djD5JkVLlUA4wRtbA98A1nyzMqLtwSASNvdepB9x2rhhR9q1Wluz06TjCHKuhATG6FJFyp4DEZI9iKjO+PMYO4qAQD/MH2p0z7pC6857Y7f5/w7VHIFdVXdnH3T/h7V2xjYblf1J45ZTbS28cxEUpBki/hYjocevPWoBArcrww56U1Q24RkDI5DZ5qzGysmJQVbtIOR9D6Gqa5dhxtJarUrmMSEqVVX6HjjNKkEttIGTco7gVofZy0gViNx6H1q7BatK3lvj6n/GsalSy5jeFP7z3CS5RBuc7CRnnlzWRd3zOpWLcA3B28s31NXxpSyP5l5OW55CnH5mknS2ij2Qsqr3Ycn6CvzKhTjKV4rnf4Hn+3w9DWT/r8zmppLjH3VT0HU1mnR7q+LGCF2JPzSsPlX8f6V29rpdu3711dsn5UY8H61X1rVBBB5EJUE8KAMD3OPQfzr6KEXQheXyS6v+tzajmbrT9nh4/N/wCR57rUMdhD9hhcmCM7pZO8sh6k/wAh7CuPkDTzYGFGefQD0rpNV3TylULbAxVR/ePc1k3NsFYQxjkdfc19BhW4wTk9T0Zx0SMrGTiMEL+rfWp/L8p1OCZMD321b8jZxGo3dM+lOMXlxs27Hqx9PaupT5tjOUVFalJz5LMGbLZ+YdcH+tVjMEBJJLnoCKJplJzjgcKO5PrVSd3dvL3fvG4b/ZHpXTGKPPqTb1AsZ5y2MqOw/iP+FW03KWQDLuP3hXsO4H+e9RWiIuJTjAGAnU+5+lXRGkcRklT5pDuU78cdMH8cfhVONzJz5dCIEBMkHco3Y6Ae38q0PAWoCPxJNbs2DdqRGfV1OcfiM1lXkwCNtLEkfvCf73U/rWfpZkhuo7uOQxvE+5ZME4I5zxWdaiqlNw7/ANfmJTd7npnxAhT+ybKbncku0E9xjn+X6Vx1nHtyyk7iAMEdNxwDXVapqUWv+EY7hUHmwTAOg5K8f1ycVgeWlqlwjjdN94vG/wAo+UkA+44rjp0v3bi+7/HX/M2pu1kVdTQXi2iDG6Sbbx1IOOn51BcBJNSulQARDMScdlGP51YtiBq8TsRsgV5M+uFPGPqRVa2LxhGYncRkgjvnPP61cKKSsui/P/hipytO5YVEZ1jfccKUCpwQMVNpV0sVpcRHhWVeeTypBqJbgxwEqcmTqW+g6fTpUemyvBLlGCkPkEjI6gj9efwrWNJNNEzk42fYvateS3M9qZpN5jh8pXwD8oJx+hFVUR5YSqAOU+faeo+ntj+VP1FxNHGSrJImcqFwDk9RVaCXbMN2fpnoPrWtGNoKJFRe85R2YR4UqOQvVCTkD2NEkO1vMVRtY8jsDVq4t0Urg/K/zA+/r9DToAoOHAZTwSDmr23GlfYueGbbTLnV4odSt/NilIVW3kbH7H3B969c/sjTnspLCWyhFs52Om0LuH1HIPoa8g+zyWckc0Y4BDKw9j0P5frXuWmeTqum29zHg+ZEGz3HsfXnNYVJcj1KrO0FI8h1nw7PoN28BDTWjH927j7w9CezUlhgOoOCvQGQfd9j7V7RLZieB4bmBJYyMFXGQy/4+9cpqHgEidZ9LfzIzz5Lthh7A9CPrXDUqKV0dOHxtKS5Kuj79P8AgCx3bzEF/Mc/nWxZWLyFZ7gcHlE9vU02x0/oWIES9fU1rXlvJ9iYIG81hwobGPqa4HyWcoqyPisNQlWlqypc3iQxSENnaMM39BXEaxceZlg2Hc7c+grpLqynWCG2LgOR8sY5Zj9Ow9zWBdaYzuUhmjl2nbK46bu4Hr9a87DQnVq+2q6JbLsv+CfZYKlSoRtFmA0KiJ59pGDtQD+dVHs9mYwheVuW29RntW6tr5LOoUsyn5QfWtO10nEZ3D5hksSMFj/hXoTxcYeh2TqJanGXNkYVJbC7eGf09cVzt9dLJwpxEvAz3rY8Q6o15KwLgQocEqMbz6/SuYlLuwwPmI4HYD1Nexh4vlTkY1JNoilk8v5z9/GVA7UWluzks+QOrtn7uf8A6+KVLYs+Wy2DyfU1bWJseUnGThj1H5V28yRxuDZJEDLPl0DRY5VRSTBs7BjGTg+pNWOIYkZTt3L8m05yMnOaqsAiu7EjsvoKdzNRTdyhqOGk8iI7+dikd/8A63WpIY9mEyFXb94njjrTLVPMkecgkL0/GrSrs49SM/L269aLhyWN3Q5VWx1GMBGJh3MGXIJz6e2aqPKZbd2djub0OOOBVrTJSbDUzgAmItuIxjJHHtWXcuRbsqk7QQB+A/8ArmsZLX1/4Y0pwKyO2ZyvHmKQcD1PpT9/EaMOcDH09qjCkO2xiMDBPQY7VMwVJI2IZgRtAYeoP51rYjdjmAjZY2BBBAIbqPlFFm4iulLHCMxUn096WTdLIjSNvYOBuz1wAP6UsaghN4bGT061nzcuqNeXmWvY3LvTVuYC1tG2+Fdzr9447nPsR6d6w/IzGWGQy9K19KujJGsBfbOBhWzjeB0/H2qvcW/2G785wzQzLlvlxjnqPoafMlrfRijF/C+hHZSo0bW86gjoD3U/5/nUrQmKf5/wPbHvStauuCmGHBBHO4Hvn0rUgjW8hEb5DgfKwHNKpNLVm1Olcdp8YcCJ/uE8E9R9K9N8EyCPTltUPz25IK9ip5BrgNNtdrG3mGHz8uBkEe1dPpMkul6hHPGDwdsq+qmvKrYqKbhJ6fkXiaKnBpb/AJno3y8OM4PX2pNhzhQPXHYmnI3P+eRUuM8joaKdNVFv/X9f1Y+ebaM6GBYkEtwyjaOFzhUqhNq6Tsxs2RkBw8752D2H94/SsdmRmAupbicg8faMIg/4CDzU5vLNSDkSkcKqLhR9OP5V5OJrU1C0Hf8AH/gf1sd9HBxp9LkFysrpL5ZlKv8A62UnDuPTPRV9hyapwvMytb2cAUgYBHAQe3oevPWtJzJekmbENsvUDq34VQu9Tgt18m2OFH8KDJP1P+NeYsXVqv2dJXfXsvU7oXty21H2+lx27ckyTk5Jxxj2HpXOeKfECi3k0+zbO4bJ5lPbuq/XoTVu6ur67t2hjzEjHG2M/Ow9CfeqcngrU4rCS8msvLiWMuf3ihlAHpnI/KvRwODk5e1rPma+5f19xV4p3qS+RwyabPd3CjGOMj0X3+tSXOmpaiNUGWdd7EdeuMH/AD3rqVgjRZQItoHLeuOw/Eiqc9kTI8jEgRDfJxznsK9VYtue9kdNkc69r5CLHj5sZc+n+elTQ242qdh5Hbrj+nrV2GCSXzXkU+Y5y2eij0q29m8KEAYZl5HBwDXWq6TsyZRurIwnhDTMT1P8XIyPQe1Zl8RICgPyKcMRXRXsBgDRAfvNvzf7Oe31rElsnR4rfHJGSPrW0a6lszL2Vh2nWQa2iLrjdmV8nBA6AfyptzDiXYQAxJ56jGP51rPEY5HRfuDCH8KosnmzvtwAXCZ6L0PFbc+tjJwtqy5bRmHQ523EGSRM7TzjkkH36Vn3CARhccHGQK1sD7KIh91pyBzxwBjn8ao3EW+RVbjLYyORx9Kzc/eNIQ01MuJCSRuOOvB4zUpBwMdM4Aq3BAosGIBL7+vHT+dNhiZ4d2OhJ/8Ar1cp7mdOndp+pHKGMgODjKnDHPGKsLbu6KQrbQPmIGcVM9ubi4CQozEhWAA5+7z/AFrtPAVvB/aklpfQEm4i2BH7gkZ/SsZTV18jRrlg5W2RwyWxAh2sQ5bII4wc9a6OGz/tKEwToFmLEEgD5mA6/iP5Vsp4RlXxXFp6L+6G+ZCe6A9P1FLdaa9spYjbJktgjGTmsoqU4O3mebj8d9XqwcdV/mcbBFLpV81jcqTbMSUPeJu4x6e1egR+FpLnRbbUIYU37eVQ5Ei9mHoaz9RsU1exS+Q4uYcCQgdSOje9bHg7UrrToTC6PJpxO5go3eST1x7Z5x6VxvF+0jab5ZLR9r+fk97noKcnSVWg79bfoYvlmG5ETqdynIJHSuggkWWJYpeJOgauyuNJsr9RI0KP3DYwaoS+H7RlKBXQ9nU8/WvNx1OquXmj8+hm8wpVFqmmadhKJ7KKQH5sc/WrUfBIPfkVnafbNYB42bKg5X8a0WGQJFPT+Vd2AqXpp/ajuvJaX+78jyKqXO7bH//Z\n", 337 | "text/plain": [ 338 | "" 339 | ] 340 | }, 341 | "metadata": {}, 342 | "output_type": "display_data" 343 | }, 344 | { 345 | "data": { 346 | "application/vnd.jupyter.widget-view+json": { 347 | "model_id": "4045dd3c7d234b32a1be8284fbe20f2b", 348 | "version_major": 2, 349 | "version_minor": 0 350 | }, 351 | "text/plain": [ 352 | "HBox(children=(VBox(children=(Label(value='134.38 image_dandelion_248'), Image(value=b'\\xff\\xd8\\xff\\xe0\\x00\\x1…" 353 | ] 354 | }, 355 | "metadata": {}, 356 | "output_type": "display_data" 357 | } 358 | ], 359 | "source": [ 360 | "p1 = 19\n", 361 | "p2 = 67\n", 362 | "image1 = id_to_name[p1]\n", 363 | "image2 = id_to_name[p2]\n", 364 | "image_embeddings.knn.display_picture(path_images, image1)\n", 365 | "image_embeddings.knn.display_picture(path_images, image2)\n", 366 | "results = image_embeddings.knn.search(index, id_to_name, (embeddings[p1] + embeddings[p2])/2, 7)\n", 367 | "image_embeddings.knn.display_results(path_images, results)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": {}, 373 | "source": [ 374 | "We get mostly one of the picture. One thing that can be done to improve this is to\n", 375 | "normalize the embeddings to get a better mix, let's try it !" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 33, 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [ 384 | "import numpy as np\n", 385 | "\n", 386 | "def normalized(a, axis=-1, order=2):\n", 387 | " l2 = np.atleast_1d(np.linalg.norm(a, order, axis))\n", 388 | " l2[l2==0] = 1\n", 389 | " return a / np.expand_dims(l2, axis)\n", 390 | "normalized_embeddings = normalized(embeddings, 1)\n", 391 | "index_normalized = image_embeddings.knn.build_index(normalized_embeddings)" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 34, 397 | "metadata": {}, 398 | "outputs": [ 399 | { 400 | "data": { 401 | "image/jpeg": "\n", 402 | "text/plain": [ 403 | "" 404 | ] 405 | }, 406 | "metadata": {}, 407 | "output_type": "display_data" 408 | }, 409 | { 410 | "data": { 411 | "image/jpeg": "\n", 412 | "text/plain": [ 413 | "" 414 | ] 415 | }, 416 | "metadata": {}, 417 | "output_type": "display_data" 418 | }, 419 | { 420 | "data": { 421 | "application/vnd.jupyter.widget-view+json": { 422 | "model_id": "120642bd44994b69a716f8d420fe069f", 423 | "version_major": 2, 424 | "version_minor": 0 425 | }, 426 | "text/plain": [ 427 | "HBox(children=(VBox(children=(Label(value='0.61 image_sunflowers_378'), Image(value=b'\\xff\\xd8\\xff\\xe0\\x00\\x10…" 428 | ] 429 | }, 430 | "metadata": {}, 431 | "output_type": "display_data" 432 | } 433 | ], 434 | "source": [ 435 | "p1 = 19\n", 436 | "p2 = 67\n", 437 | "image1 = id_to_name[p1]\n", 438 | "image2 = id_to_name[p2]\n", 439 | "image_embeddings.knn.display_picture(path_images, image1)\n", 440 | "image_embeddings.knn.display_picture(path_images, image2)\n", 441 | "results = image_embeddings.knn.search(index_normalized, id_to_name, (normalized_embeddings[p1] + normalized_embeddings[p2])/2, 7)\n", 442 | "image_embeddings.knn.display_results(path_images, results)" 443 | ] 444 | }, 445 | { 446 | "cell_type": "markdown", 447 | "metadata": {}, 448 | "source": [ 449 | "As expected we see a mixture of images that looks like both" 450 | ] 451 | }, 452 | { 453 | "cell_type": "markdown", 454 | "metadata": {}, 455 | "source": [ 456 | "## Exporting the embeddings to numpy\n", 457 | "For easy access to the embeddings in other languages, we provide a function to export them to numpy" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": null, 463 | "metadata": {}, 464 | "outputs": [], 465 | "source": [ 466 | "from image_embeddings.knn import embeddings_to_numpy" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": null, 472 | "metadata": {}, 473 | "outputs": [], 474 | "source": [ 475 | "path_embeddings_numpy = f\"{home}/{dataset}/embeddings_numpy\"\n", 476 | "embeddings_to_numpy(path_embeddings, path_embeddings_numpy)" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": {}, 482 | "source": [ 483 | "## Next ?\n", 484 | "\n", 485 | "What else can you build with this ?\n", 486 | "\n", 487 | "An obvious example is retrieval (looking for closeby pictures) but recommendation systems or photo managers can also work very well using image embeddings !" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "metadata": {}, 494 | "outputs": [], 495 | "source": [] 496 | } 497 | ], 498 | "metadata": { 499 | "kernelspec": { 500 | "display_name": "Python 3", 501 | "language": "python", 502 | "name": "python3" 503 | }, 504 | "language_info": { 505 | "codemirror_mode": { 506 | "name": "ipython", 507 | "version": 3 508 | }, 509 | "file_extension": ".py", 510 | "mimetype": "text/x-python", 511 | "name": "python", 512 | "nbconvert_exporter": "python", 513 | "pygments_lexer": "ipython3", 514 | "version": "3.7.5" 515 | } 516 | }, 517 | "nbformat": 4, 518 | "nbformat_minor": 4 519 | } 520 | -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- 1 | black==19.3b0 2 | pytest-cov==2.8.1 3 | pytest-xdist==1.31.0 4 | pytest==5.4.1 5 | jupyter 6 | notebook -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | faiss-cpu-noavx2==1.6.3 2 | tensorflow>=2.2,<3 3 | efficientnet==1.1.0 4 | numpy 5 | tensorflow_datasets 6 | fire 7 | pandas 8 | pyarrow 9 | dataclasses 10 | ipython==7.16.3 11 | ipywidgets==7.5.1 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup script.""" 2 | 3 | from pathlib import Path 4 | import re 5 | import setuptools 6 | 7 | 8 | if __name__ == "__main__": 9 | # Read metadata from version.py 10 | with Path("image_embeddings/version.py").open(encoding="utf-8") as file: 11 | metadata = dict(re.findall(r'__([a-z]+)__\s*=\s*"([^"]+)"', file.read())) 12 | 13 | # Read description from README 14 | with Path(Path(__file__).parent, "README.md").open(encoding="utf-8") as file: 15 | long_description = file.read() 16 | 17 | # Run setup 18 | setuptools.setup( 19 | name="image_embeddings", 20 | author=metadata["author"], 21 | version=metadata["version"], 22 | install_requires=[ 23 | "fire>=0.3", 24 | "numpy", 25 | "pandas>=1", 26 | "pyarrow>=0.14", 27 | "tensorflow>=2.2", 28 | "tensorflow_datasets", 29 | "fire", 30 | "efficientnet", 31 | "faiss-cpu-noavx2", 32 | "dataclasses", 33 | "ipython", 34 | "ipywidgets", 35 | ], 36 | tests_require=["pytest", "black"], 37 | dependency_links=[], 38 | data_files=[(".", ["requirements.txt", "README.md"])], 39 | entry_points={"console_scripts": ["image_embeddings = image_embeddings.cli.main:main"]}, 40 | packages=setuptools.find_packages(), 41 | description=long_description.split("\n")[0], 42 | long_description=long_description, 43 | long_description_content_type="text/markdown", 44 | classifiers=[ 45 | "License :: OSI Approved :: MIT License", 46 | "Operating System :: OS Independent", 47 | "Programming Language :: Python :: 3", 48 | "Programming Language :: Python :: 3.6", 49 | "Intended Audience :: Developers", 50 | "Intended Audience :: Science/Research", 51 | "Development Status :: 5 - Production/Stable", 52 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 53 | ], 54 | ) 55 | -------------------------------------------------------------------------------- /tests/unit/test_basic.py: -------------------------------------------------------------------------------- 1 | import image_embeddings 2 | 3 | 4 | def test_basic(): 5 | print("it works !") 6 | --------------------------------------------------------------------------------