├── .devcontainer.json ├── .env ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── __init__.py ├── conf └── server_config.yaml ├── data ├── test │ ├── 001_accordion_image_0001.jpg │ ├── 002_anchor_image_0001.jpg │ ├── 003_ant_image_0001.jpg │ └── 2012_000015.jpg └── train │ ├── 001_accordion_image_0002.jpg │ ├── 001_accordion_image_0003.jpg │ ├── 002_anchor_image_0002.jpg │ ├── 002_anchor_image_0003.jpg │ ├── 002_anchor_image_0004.jpg │ ├── 003_ant_image_0002.jpg │ ├── 003_ant_image_0003.jpg │ ├── 2012_000003.jpg │ ├── 2012_000004.jpg │ ├── 2012_000007.jpg │ ├── 2012_000010.jpg │ ├── 2012_000014.jpg │ ├── 2012_000162.jpg │ ├── 2012_000166.jpg │ ├── 2012_000168.jpg │ ├── 2012_000169.jpg │ ├── 2012_001337.jpg │ ├── 2012_001339.jpg │ ├── 2012_001341.jpg │ ├── 2012_001344.jpg │ ├── 2012_001346.jpg │ ├── 2012_001347.jpg │ ├── 2012_002049.jpg │ ├── 2012_002050.jpg │ ├── 2012_002051.jpg │ └── 2012_002056.jpg ├── docker-compose-devcontainer.yml ├── docs └── build.md ├── index.py ├── index └── train.h5 ├── pic └── system_arch.png ├── requirements.txt ├── retrieval.py ├── scripts ├── clean.sh └── devcontainer.sh └── service ├── __init__.py ├── es_retrieval.py ├── faiss_retrieval.py ├── milvus_retrieval.py ├── numpy_retrieval.py └── vggnet.py /.devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Image Retrieval Dev Container Definition", 3 | "dockerComposeFile": ["./docker-compose-devcontainer.yml"], 4 | "service": "image-retrieval", 5 | "initializeCommand": "scripts/devcontainer.sh up", 6 | "workspaceFolder": "/www/server", 7 | "remoteEnv": { }, 8 | "extensions": [ 9 | "ms-python.python", 10 | "ms-python.vscode-pylance" 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | ES_HOST=es 2 | ES_PORT=9200 3 | MILVUS_HOST=milvus 4 | MILVUS_PORT=19530 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .idea/ 3 | .DS_Store 4 | __pycache__/ 5 | 6 | volumes 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM liyaodev/base-cpu-u18-py3.8:v1.0.0 2 | LABEL maintainer=liyaodev 3 | 4 | RUN rm -rf /usr/local/bin/python && ln -s /usr/local/bin/python3.8 /usr/local/bin/python 5 | RUN rm -rf /usr/local/bin/pip && ln -s /usr/local/bin/pip3 /usr/local/bin/pip 6 | 7 | RUN echo 'root:root' | chpasswd 8 | 9 | # 构建Tini的多服务容器 10 | RUN wget -O /tini https://github.com/krallin/tini/releases/download/v0.19.0/tini && \ 11 | chmod +x /tini 12 | ENTRYPOINT ["/tini", "--"] 13 | 14 | WORKDIR /www/server 15 | 16 | COPY ./requirements.txt ./requirements.txt 17 | RUN pip3 install --no-cache-dir -r ./requirements.txt \ 18 | -i http://pypi.douban.com/simple --trusted-host pypi.douban.com 19 | 20 | ENV PYTHONUNBUFFERED 1 21 | 22 | CMD ["tail", "-f", "/dev/null"] 23 | EXPOSE 8888 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | up: 3 | sh scripts/devcontainer.sh up 4 | 5 | down: 6 | sh scripts/devcontainer.sh down 7 | 8 | dev: 9 | docker exec -it image-retrieval /bin/bash 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 图片向量检索服务构建 2 | 3 | 该系统使用VGG(图像特征提取模型)和Numpy、Faiss、ES、Milvus构建了图像搜索流程。 系统架构如下: 4 | 5 | system_arch 6 | 7 | ## 构建环境 8 | 9 | ### Docker-Compose 10 | 11 | ```shell 12 | # 启动 13 | make up 14 | 15 | # 开发运行 16 | make dev 17 | 18 | # 关闭 19 | make down 20 | ``` 21 | 22 | ### Docker 环境 23 | 24 | 详见[环境安装](./docs/build.md) 25 | 26 | ### 操作简介 27 | 28 | 操作一:构建基础索引 29 | 30 | ```shell 31 | python index.py 32 | --train_data:自定义训练图片文件夹路径,默认为`/data/train` 33 | --index_file:自定义索引文件存储路径,默认为`/index/train.h5` 34 | 35 | # 示例: 36 | python index.py --train_data /www/server/data/train --index_file /www/server/index/train.h5 37 | ``` 38 | 39 | 操作二:使用相似检索 40 | 41 | ```shell 42 | python retrieval.py --engine=numpy 43 | --test_data:自定义测试图片详细地址,默认为`/data/test/001_accordion_image_0001.jpg` 44 | --index_file:自定义索引文件存储路径,默认为`/index/train.h5` 45 | --db_name:自定义ES或者Milvus索引库名,默认为`image_retrieval` 46 | --engine:自定义检索引擎类型,默认为`numpy`,可选包括:numpy、faiss、es、milvus 47 | 48 | # 示例: 49 | python retrieval.py --engine=numpy --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg 50 | 51 | python retrieval.py --engine=faiss --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg 52 | 53 | python retrieval.py --engine=es --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg 54 | 55 | python retrieval.py --engine=milvus --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg 56 | ``` 57 | 58 | ### 附录 59 | 60 | 参考1:https://github.com/willard-yuan/flask-keras-cnn-image-retrieval
61 | 参考2:https://github.com/zilliz-bootcamp/image_search 62 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /conf/server_config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019-2020 Zilliz. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance 4 | # with the License. You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software distributed under the License 9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 10 | # or implied. See the License for the specific language governing permissions and limitations under the License. 11 | 12 | version: 0.5 13 | 14 | #----------------------+------------------------------------------------------------+------------+-----------------+ 15 | # Cluster Config | Description | Type | Default | 16 | #----------------------+------------------------------------------------------------+------------+-----------------+ 17 | # enable | If runinng with Mishards, set true, otherwise false. | Boolean | false | 18 | #----------------------+------------------------------------------------------------+------------+-----------------+ 19 | # role | Milvus deployment role: rw / ro | role | rw | 20 | #----------------------+------------------------------------------------------------+------------+-----------------+ 21 | cluster: 22 | enable: false 23 | role: rw 24 | 25 | #----------------------+------------------------------------------------------------+------------+-----------------+ 26 | # General Config | Description | Type | Default | 27 | #----------------------+------------------------------------------------------------+------------+-----------------+ 28 | # time_zone | Use UTC-x or UTC+x to specify a time zone. | Timezone | UTC+8 | 29 | #----------------------+------------------------------------------------------------+------------+-----------------+ 30 | # meta_uri | URI for metadata storage, using SQLite (for single server | URL | sqlite://:@:/ | 31 | # | Milvus) or MySQL (for distributed cluster Milvus). | | | 32 | # | Format: dialect://username:password@host:port/database | | | 33 | # | Keep 'dialect://:@:/', 'dialect' can be either 'sqlite' or | | | 34 | # | 'mysql', replace other texts with real values. | | | 35 | #----------------------+------------------------------------------------------------+------------+-----------------+ 36 | general: 37 | timezone: UTC+8 38 | meta_uri: sqlite://:@:/ 39 | 40 | #----------------------+------------------------------------------------------------+------------+-----------------+ 41 | # Network Config | Description | Type | Default | 42 | #----------------------+------------------------------------------------------------+------------+-----------------+ 43 | # bind.address | IP address that Milvus server monitors. | IP | 0.0.0.0 | 44 | #----------------------+------------------------------------------------------------+------------+-----------------+ 45 | # bind.port | Port that Milvus server monitors. Port range (1024, 65535) | Integer | 19530 | 46 | #----------------------+------------------------------------------------------------+------------+-----------------+ 47 | # http.enable | Enable web server or not. | Boolean | true | 48 | #----------------------+------------------------------------------------------------+------------+-----------------+ 49 | # http.port | Port that Milvus web server monitors. | Integer | 19121 | 50 | # | Port range (1024, 65535) | | | 51 | #----------------------+------------------------------------------------------------+------------+-----------------+ 52 | network: 53 | bind.address: 0.0.0.0 54 | bind.port: 19530 55 | http.enable: true 56 | http.port: 19121 57 | 58 | #----------------------+------------------------------------------------------------+------------+-----------------+ 59 | # Storage Config | Description | Type | Default | 60 | #----------------------+------------------------------------------------------------+------------+-----------------+ 61 | # path | Path used to save meta data, vector data and index data. | Path | /var/lib/milvus | 62 | #----------------------+------------------------------------------------------------+------------+-----------------+ 63 | # auto_flush_interval | The interval, in seconds, at which Milvus automatically | Integer | 1 (s) | 64 | # | flushes data to disk. | | | 65 | # | 0 means disable the regular flush. | | | 66 | #----------------------+------------------------------------------------------------+------------+-----------------+ 67 | # s3_enabled | If using s3 storage backend. | Boolean | false | 68 | #----------------------+------------------------------------------------------------+------------+-----------------+ 69 | # s3_address | The s3 server address, support domain/hostname/ipaddress | String | 127.0.0.1 | 70 | #----------------------+------------------------------------------------------------+------------+-----------------+ 71 | # s3_port | The s3 server port. | Integer | 80 | 72 | #----------------------+------------------------------------------------------------+------------+-----------------+ 73 | # s3_access_key | The access key for accessing s3 service. | String | s3_access_key | 74 | #----------------------+------------------------------------------------------------+------------+-----------------+ 75 | # s3_secret_key | The secrey key for accessing s3 service. | String | s3_secret_key | 76 | #----------------------+------------------------------------------------------------+------------+-----------------+ 77 | # s3_bucket | The s3 bucket name for store milvus's data. | String | s3_bucket | 78 | # | Note: please using differnet bucket for different milvus | | | 79 | # | cluster. | | | 80 | #----------------------+------------------------------------------------------------+------------+-----------------+ 81 | storage: 82 | path: /var/lib/milvus 83 | auto_flush_interval: 1 84 | 85 | 86 | #----------------------+------------------------------------------------------------+------------+-----------------+ 87 | # WAL Config | Description | Type | Default | 88 | #----------------------+------------------------------------------------------------+------------+-----------------+ 89 | # enable | Whether to enable write-ahead logging (WAL) in Milvus. | Boolean | true | 90 | # | If WAL is enabled, Milvus writes all data changes to log | | | 91 | # | files in advance before implementing data changes. WAL | | | 92 | # | ensures the atomicity and durability for Milvus operations.| | | 93 | #----------------------+------------------------------------------------------------+------------+-----------------+ 94 | # recovery_error_ignore| Whether to ignore logs with errors that happens during WAL | Boolean | false | 95 | # | recovery. If true, when Milvus restarts for recovery and | | | 96 | # | there are errors in WAL log files, log files with errors | | | 97 | # | are ignored. If false, Milvus does not restart when there | | | 98 | # | are errors in WAL log files. | | | 99 | #----------------------+------------------------------------------------------------+------------+-----------------+ 100 | # buffer_size | Sum total of the read buffer and the write buffer in MBs. | Integer | 256 (MB) | 101 | # | buffer_size must be in range [64, 4096] (MB). | | | 102 | # | If the value you specified is out of range, Milvus | | | 103 | # | automatically uses the boundary value closest to the | | | 104 | # | specified value. It is recommended you set buffer_size to | | | 105 | # | a value greater than the inserted data size of a single | | | 106 | # | insert operation for better performance. | | | 107 | #----------------------+------------------------------------------------------------+------------+-----------------+ 108 | # path | Location of WAL log files. | String | | 109 | #----------------------+------------------------------------------------------------+------------+-----------------+ 110 | wal: 111 | enable: true 112 | recovery_error_ignore: false 113 | buffer_size: 256MB 114 | path: /var/lib/milvus/wal 115 | 116 | #----------------------+------------------------------------------------------------+------------+-----------------+ 117 | # Cache Config | Description | Type | Default | 118 | #----------------------+------------------------------------------------------------+------------+-----------------+ 119 | # cache_size | The size of CPU memory used for caching data for faster | Integer | 4 (GB) | 120 | # | query. The sum of 'cpu_cache_capacity' and | | | 121 | # | 'insert_buffer_size' must be less than system memory size. | | | 122 | #----------------------+------------------------------------------------------------+------------+-----------------+ 123 | # insert_buffer_size | Buffer size used for data insertion. | Integer | 1 (GB) | 124 | # | The sum of 'insert_buffer_size' and 'cpu_cache_capacity' | | | 125 | # | must be less than system memory size. | | | 126 | #----------------------+------------------------------------------------------------+------------+-----------------+ 127 | # preload_collection | A comma-separated list of collection names that need to | StringList | | 128 | # | be pre-loaded when Milvus server starts up. | | | 129 | # | '*' means preload all existing tables (single-quote or | | | 130 | # | double-quote required). | | | 131 | #----------------------+------------------------------------------------------------+------------+-----------------+ 132 | cache: 133 | cache_size: 4GB 134 | insert_buffer_size: 1GB 135 | preload_collection: 136 | 137 | #----------------------+------------------------------------------------------------+------------+-----------------+ 138 | # GPU Config | Description | Type | Default | 139 | #----------------------+------------------------------------------------------------+------------+-----------------+ 140 | # enable | Enable GPU resources or not. | Boolean | false | 141 | #----------------------+------------------------------------------------------------+------------+-----------------+ 142 | # cache_size | The size of GPU memory per card used for cache. | Integer | 1 (GB) | 143 | #----------------------+------------------------------------------------------------+------------+-----------------+ 144 | # gpu_search_threshold | A Milvus performance tuning parameter. This value will be | Integer | 1000 | 145 | # | compared with 'nq' to decide if the search computation will| | | 146 | # | be executed on GPUs only. | | | 147 | # | If nq >= gpu_search_threshold, the search computation will | | | 148 | # | be executed on GPUs only; | | | 149 | # | if nq < gpu_search_threshold, the search computation will | | | 150 | # | be executed on CPUs only. | | | 151 | # | The SQ8H index is special, if nq < gpu_search_threshold, | | | 152 | # | the search will be executed on both CPUs and GPUs. | | | 153 | #----------------------+------------------------------------------------------------+------------+-----------------+ 154 | # search_resources | The list of GPU devices used for search computation. | DeviceList | gpu0 | 155 | # | Must be in format gpux. | | | 156 | #----------------------+------------------------------------------------------------+------------+-----------------+ 157 | # build_index_resources| The list of GPU devices used for index building. | DeviceList | gpu0 | 158 | # | Must be in format gpux. | | | 159 | #----------------------+------------------------------------------------------------+------------+-----------------+ 160 | gpu: 161 | enable: false 162 | cache_size: 1GB 163 | gpu_search_threshold: 1000 164 | search_devices: 165 | - gpu0 166 | build_index_devices: 167 | - gpu0 168 | 169 | #----------------------+------------------------------------------------------------+------------+-----------------+ 170 | # FPGA Config | Description | Type | Default | 171 | #----------------------+------------------------------------------------------------+------------+-----------------+ 172 | # enable | Use FPGA devices or not. | Boolean | false | 173 | #----------------------+------------------------------------------------------------+------------+-----------------+ 174 | # search_devices | The list of FPGA devices used for search computation. | DeviceList | fpga0 | 175 | # | Must be in format fpgax. | | | 176 | #----------------------+------------------------------------------------------------+------------+-----------------+ 177 | fpga: 178 | enable: false 179 | search_devices: 180 | - fpga0 181 | 182 | #----------------------+------------------------------------------------------------+------------+-----------------+ 183 | # Logs Config | Description | Type | Default | 184 | #----------------------+------------------------------------------------------------+------------+-----------------+ 185 | # level | Log level in Milvus. Must be one of debug, info, warning, | String | debug | 186 | # | error, fatal | | | 187 | #----------------------+------------------------------------------------------------+------------+-----------------+ 188 | # trace.enable | Whether to enable trace level logging in Milvus. | Boolean | true | 189 | #----------------------+------------------------------------------------------------+------------+-----------------+ 190 | # path | Absolute path to the folder holding the log files. | String | | 191 | #----------------------+------------------------------------------------------------+------------+-----------------+ 192 | # max_log_file_size | The maximum size of each log file, size range [512, 4096] | Integer | 1024 (MB) | 193 | #----------------------+------------------------------------------------------------+------------+-----------------+ 194 | # log_rotate_num | The maximum number of log files that Milvus keeps for each | Integer | 0 | 195 | # | logging level, num range [0, 1024], 0 means unlimited. | | | 196 | #----------------------+------------------------------------------------------------+------------+-----------------+ 197 | # log_to_stdout | Whether to write logs to standard output in Milvus. | Boolean | false | 198 | #----------------------+------------------------------------------------------------+------------+-----------------+ 199 | # log_to_file | Whether to write logs to files in Milvus | Boolean | true | 200 | #----------------------+------------------------------------------------------------+------------+-----------------+ 201 | logs: 202 | level: debug 203 | trace.enable: true 204 | path: /var/lib/milvus/logs 205 | max_log_file_size: 1024MB 206 | log_rotate_num: 0 207 | log_to_stdout: false 208 | log_to_file: true 209 | 210 | #----------------------+------------------------------------------------------------+------------+-----------------+ 211 | # Metric Config | Description | Type | Default | 212 | #----------------------+------------------------------------------------------------+------------+-----------------+ 213 | # enable | Enable monitoring function or not. | Boolean | false | 214 | #----------------------+------------------------------------------------------------+------------+-----------------+ 215 | # address | Pushgateway address | IP | 127.0.0.1 + 216 | #----------------------+------------------------------------------------------------+------------+-----------------+ 217 | # port | Pushgateway port, port range (1024, 65535) | Integer | 9091 | 218 | #----------------------+------------------------------------------------------------+------------+-----------------+ 219 | metric: 220 | enable: false 221 | address: 127.0.0.1 222 | port: 9091 223 | 224 | -------------------------------------------------------------------------------- /data/test/001_accordion_image_0001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/001_accordion_image_0001.jpg -------------------------------------------------------------------------------- /data/test/002_anchor_image_0001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/002_anchor_image_0001.jpg -------------------------------------------------------------------------------- /data/test/003_ant_image_0001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/003_ant_image_0001.jpg -------------------------------------------------------------------------------- /data/test/2012_000015.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/2012_000015.jpg -------------------------------------------------------------------------------- /data/train/001_accordion_image_0002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/001_accordion_image_0002.jpg -------------------------------------------------------------------------------- /data/train/001_accordion_image_0003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/001_accordion_image_0003.jpg -------------------------------------------------------------------------------- /data/train/002_anchor_image_0002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/002_anchor_image_0002.jpg -------------------------------------------------------------------------------- /data/train/002_anchor_image_0003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/002_anchor_image_0003.jpg -------------------------------------------------------------------------------- /data/train/002_anchor_image_0004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/002_anchor_image_0004.jpg -------------------------------------------------------------------------------- /data/train/003_ant_image_0002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/003_ant_image_0002.jpg -------------------------------------------------------------------------------- /data/train/003_ant_image_0003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/003_ant_image_0003.jpg -------------------------------------------------------------------------------- /data/train/2012_000003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000003.jpg -------------------------------------------------------------------------------- /data/train/2012_000004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000004.jpg -------------------------------------------------------------------------------- /data/train/2012_000007.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000007.jpg -------------------------------------------------------------------------------- /data/train/2012_000010.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000010.jpg -------------------------------------------------------------------------------- /data/train/2012_000014.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000014.jpg -------------------------------------------------------------------------------- /data/train/2012_000162.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000162.jpg -------------------------------------------------------------------------------- /data/train/2012_000166.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000166.jpg -------------------------------------------------------------------------------- /data/train/2012_000168.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000168.jpg -------------------------------------------------------------------------------- /data/train/2012_000169.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000169.jpg -------------------------------------------------------------------------------- /data/train/2012_001337.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001337.jpg -------------------------------------------------------------------------------- /data/train/2012_001339.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001339.jpg -------------------------------------------------------------------------------- /data/train/2012_001341.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001341.jpg -------------------------------------------------------------------------------- /data/train/2012_001344.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001344.jpg -------------------------------------------------------------------------------- /data/train/2012_001346.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001346.jpg -------------------------------------------------------------------------------- /data/train/2012_001347.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001347.jpg -------------------------------------------------------------------------------- /data/train/2012_002049.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002049.jpg -------------------------------------------------------------------------------- /data/train/2012_002050.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002050.jpg -------------------------------------------------------------------------------- /data/train/2012_002051.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002051.jpg -------------------------------------------------------------------------------- /data/train/2012_002056.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002056.jpg -------------------------------------------------------------------------------- /docker-compose-devcontainer.yml: -------------------------------------------------------------------------------- 1 | version: '3.5' 2 | 3 | services: 4 | image-retrieval: 5 | container_name: image-retrieval 6 | image: liyaodev/image-retrieval-env:v1.0.0 7 | # 本地构建 8 | build: 9 | context: . 10 | dockerfile: ./Dockerfile 11 | ports: 12 | - 8888:8888 13 | environment: 14 | ES_HOST: ${ES_HOST} 15 | ES_PORT: ${ES_PORT} 16 | MILVUS_HOST: ${MILVUS_HOST} 17 | MILVUS_PORT: ${MILVUS_PORT} 18 | volumes: 19 | - .:/www/server:delegated 20 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/vscode-extensions:/www/.vscode-server:delegated 21 | depends_on: 22 | - es 23 | - milvus 24 | - milvus-em 25 | 26 | es: 27 | container_name: image-retrieval-es 28 | image: docker.elastic.co/elasticsearch/elasticsearch:7.5.0 29 | environment: 30 | - discovery.type=single-node 31 | ports: 32 | - 9200:9200 33 | - 9300:9300 34 | volumes: 35 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/es:/usr/share/elasticsearch/data 36 | 37 | milvus: 38 | container_name: image-retrieval-milvus 39 | image: milvusdb/milvus:1.1.1-cpu-d061621-330cc6 40 | ports: 41 | - 19530:19530 42 | - 19121:19121 43 | volumes: 44 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/conf:/var/lib/milvus/conf 45 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/milvus/db:/var/lib/milvus/db 46 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/milvus/logs:/var/lib/milvus/logs 47 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/milvus/wal:/var/lib/milvus/wal 48 | 49 | milvus-em: 50 | container_name: image-retrieval-milvus-em 51 | image: milvusdb/milvus-em:v0.4.2 52 | environment: 53 | - API_URL=http://${MILVUS_HOST}:19121 54 | ports: 55 | - 3000:80 56 | 57 | networks: 58 | default: 59 | name: image_retrieval_dev 60 | -------------------------------------------------------------------------------- /docs/build.md: -------------------------------------------------------------------------------- 1 | 2 | ## 手动构建环境 3 | 4 | ### 基础环境安装 5 | 6 | Python版本:3.8.12 7 | 8 | ```shell 9 | pip install -r requirements.txt 10 | ``` 11 | 12 | ### ES服务端安装 13 | 14 | ```shell 15 | docker run -it -d -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:7.5.0 16 | ``` 17 | 18 | ### Milvus服务端安装 19 | 20 | 安装指南:https://milvus.io/cn/docs/v1.1.1/milvus_docker-cpu.md
21 | 下载配置 22 | 23 | ```shell 24 | mkdir -p milvus/conf && cd milvus/conf 25 | wget https://raw.githubusercontent.com/milvus-io/milvus/v1.1.1/core/conf/demo/server_config.yaml 26 | ``` 27 | 28 | 服务启动 29 | 30 | ```shell 31 | docker run -d --name milvus_cpu_1.1.1 \ 32 | -p 19530:19530 \ 33 | -p 19121:19121 \ 34 | -v /milvus/db:/var/lib/milvus/db \ 35 | -v /milvus/conf:/var/lib/milvus/conf \ 36 | -v /milvus/logs:/var/lib/milvus/logs \ 37 | -v /milvus/wal:/var/lib/milvus/wal \ 38 | milvusdb/milvus:1.1.1-cpu-d061621-330cc6 39 | ``` 40 | -------------------------------------------------------------------------------- /index.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import h5py 4 | import argparse 5 | import numpy as np 6 | from service.vggnet import VGGNet 7 | import os 8 | import sys 9 | from os.path import dirname 10 | BASE_DIR = dirname(os.path.abspath(__file__)) 11 | sys.path.append(BASE_DIR) 12 | 13 | def get_imlist(path): 14 | return [os.path.join(path,f) for f in os.listdir(path) if f.endswith('.jpg')] 15 | 16 | 17 | if __name__ == "__main__": 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument("--train_data", type=str, default=os.path.join(BASE_DIR, 'data', 'train'), help="train data path.") 20 | parser.add_argument("--index_file", type=str, default=os.path.join(BASE_DIR, 'index', 'train.h5'), help="index file path.") 21 | args = vars(parser.parse_args()) 22 | img_list = get_imlist(args["train_data"]) 23 | print("--------------------------------------------------") 24 | print(" feature extraction starts") 25 | print("--------------------------------------------------") 26 | feats = [] 27 | names = [] 28 | model = VGGNet() 29 | for i, img_path in enumerate(img_list): 30 | norm_feat = model.vgg_extract_feat(img_path) 31 | img_name = os.path.split(img_path)[1] 32 | feats.append(norm_feat) 33 | names.append(img_name) 34 | print("extracting feature from image No. %d , %d images in total" %((i+1), len(img_list))) 35 | feats = np.array(feats) 36 | print("--------------------------------------------------") 37 | print(" writing feature extraction results") 38 | print("--------------------------------------------------") 39 | h5f = h5py.File(args["index_file"], 'w') 40 | h5f.create_dataset('dataset_1', data = feats) 41 | h5f.create_dataset('dataset_2', data = np.string_(names)) 42 | h5f.close() 43 | -------------------------------------------------------------------------------- /index/train.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/index/train.h5 -------------------------------------------------------------------------------- /pic/system_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/pic/system_arch.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.19.5 2 | keras==2.4.3 3 | tensorflow==2.4.1 4 | pillow==8.1.0 5 | elasticsearch==7.11.0 6 | pymilvus==1.1.2 7 | faiss-cpu==1.7.0 8 | -------------------------------------------------------------------------------- /retrieval.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import argparse 4 | from service.vggnet import VGGNet 5 | from service.numpy_retrieval import NumpyRetrieval 6 | from service.faiss_retrieval import FaissRetrieval 7 | from service.es_retrieval import ESRetrieval 8 | from service.milvus_retrieval import MilvusRetrieval 9 | import os 10 | import sys 11 | from os.path import dirname 12 | BASE_DIR = dirname(os.path.abspath(__file__)) 13 | sys.path.append(BASE_DIR) 14 | 15 | 16 | class RetrievalEngine(object): 17 | 18 | def __init__(self, index_file, db_name): 19 | self.index_file = index_file 20 | self.db_name = db_name 21 | self.numpy_r = self.faiss_r = self.es_r = self.milvus_r = None 22 | 23 | def get_method(self, m_name): 24 | m_name = "%s_handler" % str(m_name) 25 | method = getattr(self, m_name, self.default_handler) 26 | return method 27 | 28 | def numpy_handler(self, query_vector, req_id=None): 29 | # numpy计算 30 | if self.numpy_r is None: 31 | self.numpy_r = NumpyRetrieval(self.index_file) 32 | return self.numpy_r.retrieve(query_vector) 33 | 34 | def faiss_handler(self, query_vector, req_id=None): 35 | # faiss计算 36 | if self.faiss_r is None: 37 | self.faiss_r = FaissRetrieval(self.index_file) 38 | return self.faiss_r.retrieve(query_vector) 39 | 40 | def es_handler(self, query_vector, req_id=None): 41 | # es计算 42 | if self.es_r is None: 43 | self.es_r = ESRetrieval(self.db_name, self.index_file) 44 | return self.es_r.retrieve(query_vector) 45 | 46 | def milvus_handler(self, query_vector, req_id=None): 47 | # milvus计算 48 | if self.milvus_r is None: 49 | self.milvus_r = MilvusRetrieval(self.db_name, self.index_file) 50 | return self.milvus_r.retrieve(query_vector) 51 | 52 | def default_handler(self, query_vector, req_id=None): 53 | return [] 54 | 55 | 56 | if __name__ == '__main__': 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument("--test_data", type=str, default=os.path.join(BASE_DIR, 'data', 'test', '001_accordion_image_0001.jpg'), help="test data path.") 59 | parser.add_argument("--index_file", type=str, default=os.path.join(BASE_DIR, 'index', 'train.h5'), help="index file path.") 60 | parser.add_argument("--db_name", type=str, default='image_retrieval', help="database name.") 61 | parser.add_argument("--engine", type=str, default='numpy', help="retrieval engine.") 62 | args = vars(parser.parse_args()) 63 | # 1.图片推理 64 | model = VGGNet() 65 | query_vector = model.vgg_extract_feat(args["test_data"]) 66 | # 2.图片检索 67 | re = RetrievalEngine(args["index_file"], args["db_name"]) 68 | result = re.get_method(args["engine"])(query_vector, None) 69 | print(result) 70 | 71 | -------------------------------------------------------------------------------- /scripts/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | -------------------------------------------------------------------------------- /scripts/devcontainer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "${1-}" = "up" ]; then 4 | mkdir -p "${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/vscode-extensions" 5 | chmod -R 777 "${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes" 6 | 7 | docker-compose -f ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/docker-compose-devcontainer.yml up -d 8 | fi 9 | 10 | if [ "${1-}" = "down" ]; then 11 | docker-compose -f ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/docker-compose-devcontainer.yml down 12 | rm -rf "${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes" 13 | fi 14 | -------------------------------------------------------------------------------- /service/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /service/es_retrieval.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import h5py 5 | import numpy as np 6 | from elasticsearch import Elasticsearch 7 | from elasticsearch.helpers import bulk 8 | 9 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85')) # 检索阈值 10 | INDEX_TABLE = { 11 | "settings": { 12 | "number_of_shards": 2, 13 | "number_of_replicas": 1 14 | }, 15 | "mappings": { 16 | "dynamic": "true", 17 | "_source": { 18 | "enabled": "true" 19 | }, 20 | "properties": { 21 | "image_vector": { 22 | "type": "dense_vector", 23 | "dims": 512 24 | }, 25 | "id": { 26 | "type": "keyword" 27 | }, 28 | "name": { 29 | "type": "keyword" 30 | } 31 | } 32 | } 33 | } 34 | 35 | 36 | class ESRetrieval(object): 37 | def __init__(self, index_name, index_dir, 38 | host=os.environ.get("ES_HOST", "127.0.0.1"), 39 | port=os.environ.get("ES_PORT", 9200)): 40 | self.index_name = index_name 41 | self.client = Elasticsearch([host]) 42 | self.load(index_dir) 43 | 44 | def load(self, index_dir): 45 | def index_batch(docs): 46 | requests = [] 47 | for i, doc in enumerate(docs): 48 | request = doc 49 | request["_op_type"] = "index" 50 | request["_index"] = self.index_name 51 | requests.append(request) 52 | bulk(self.client, requests) 53 | # 1. 读取索引 54 | h5f = h5py.File(index_dir, 'r') 55 | self.retrieval_db = h5f['dataset_1'][:] 56 | self.retrieval_name = h5f['dataset_2'][:] 57 | h5f.close() 58 | # 2. 入库ES 59 | r_list = [] 60 | for i, val in enumerate(self.retrieval_name): 61 | temp = { 62 | 'id': i, 63 | 'name': str(val), 64 | 'image_vector': self.retrieval_db[i].tolist() 65 | } 66 | r_list.append(temp) 67 | self.client.indices.delete(index=self.index_name, ignore=[404]) 68 | self.client.indices.create(index=self.index_name, body=INDEX_TABLE) 69 | docs = [] 70 | count = 0 71 | batch_size = 1000 72 | for doc in r_list: 73 | docs.append(doc) 74 | count += 1 75 | if count % batch_size == 0: 76 | index_batch(docs) 77 | docs = [] 78 | if docs: 79 | index_batch(docs) 80 | self.client.indices.refresh(index=self.index_name) 81 | print("************* Done es indexing, Indexed {} documents *************".format(len(self.retrieval_db))) 82 | 83 | def retrieve(self, query_vector, search_size=3): 84 | 85 | # script_query = { 86 | # "script_score": { 87 | # "query": {"match_all": {}}, 88 | # "script": { 89 | # "source": "cosineSimilarity(params.query_vector, doc['image_vector']) + 1.0", 90 | # "params": {"query_vector": query_vector} 91 | # } 92 | # } 93 | # } 94 | 95 | # script_query = { 96 | # "script_score": { 97 | # "query": {"match_all": {}}, 98 | # "script": { 99 | # "source": """ 100 | # double value = dotProduct(params.query_vector, doc['image_vector']); 101 | # return sigmoid(1, Math.E, -value); 102 | # """, 103 | # "params": {"query_vector": query_vector} 104 | # } 105 | # } 106 | # } 107 | 108 | # script_query = { 109 | # "script_score": { 110 | # "query": {"match_all": {}}, 111 | # "script": { 112 | # "source": "1 / (1 + l1norm(params.queryVector, doc['image_vector']))", 113 | # "params": { 114 | # "queryVector": query_vector 115 | # } 116 | # } 117 | # } 118 | # } 119 | 120 | # script_query = { 121 | # "script_score": { 122 | # "query": {"match_all": {}}, 123 | # "script": { 124 | # "source": "1 / (1 + l2norm(params.queryVector, doc['image_vector']))", 125 | # "params": { 126 | # "queryVector": query_vector 127 | # } 128 | # } 129 | # } 130 | # } 131 | 132 | script_query = { 133 | "script_score": { 134 | "query": {"match_all": {}}, 135 | "script": { 136 | "source": """ 137 | double value = doc['image_vector'].size() == 0 ? 0 : dotProduct(params.query_vector, doc['image_vector']); 138 | return value; 139 | """, 140 | "params": {"query_vector": query_vector} 141 | } 142 | } 143 | } 144 | response = self.client.search( 145 | index=self.index_name, 146 | body={ 147 | "size": search_size, 148 | "query": script_query, 149 | "_source": {"includes": ["id", "name", "face_vector"]} 150 | } 151 | ) 152 | r_list = [] 153 | for hit in response["hits"]["hits"]: 154 | score = float(hit['_score']) * 0.5 + 0.5 155 | name = hit['_source']["name"] 156 | if name.encode("utf-8") and score > THRESHOLD: 157 | temp = { 158 | "id": hit['_source']["id"], 159 | "name": name, 160 | "score": round(score, 6) 161 | } 162 | r_list.append(temp) 163 | 164 | return r_list 165 | -------------------------------------------------------------------------------- /service/faiss_retrieval.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import h5py 5 | import numpy as np 6 | import faiss 7 | 8 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85')) # 检索阈值 9 | 10 | 11 | class FaissRetrieval(object): 12 | def __init__(self, index_dir, emb_size=512): 13 | self.emb_size = emb_size 14 | self.load(index_dir) 15 | 16 | def load(self, index_dir): 17 | # 1.读取索引 18 | h5f = h5py.File(index_dir, 'r') 19 | self.retrieval_db = h5f['dataset_1'][:] 20 | self.retrieval_name = h5f['dataset_2'][:] 21 | h5f.close() 22 | # 2. 加载faiss 23 | self.retrieval_db = np.asarray(self.retrieval_db).astype(np.float32) 24 | self.index = faiss.IndexFlatIP(self.emb_size) 25 | # self.index.train(self.retrieval_db) 26 | self.index.add(self.retrieval_db) 27 | print("************* Done faiss indexing, Indexed {} documents *************".format(len(self.retrieval_db))) 28 | 29 | def retrieve(self, query_vector, search_size=3): 30 | score_list, index_list = self.index.search(np.array([query_vector]).astype(np.float32), search_size) 31 | r_list = [] 32 | for i, val in enumerate(index_list[0]): 33 | name = self.retrieval_name[int(val)] 34 | score = float(score_list[0][i]) * 0.5 + 0.5 35 | if score > THRESHOLD: 36 | temp = { 37 | "name": name, 38 | "score": round(score, 6) 39 | } 40 | r_list.append(temp) 41 | 42 | return r_list 43 | -------------------------------------------------------------------------------- /service/milvus_retrieval.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import h5py 5 | import numpy as np 6 | from pprint import pprint 7 | from milvus import Milvus, IndexType, MetricType 8 | 9 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85')) # 检索阈值 10 | 11 | class MilvusRetrieval(object): 12 | def __init__(self, index_name, index_dir, 13 | host=os.environ.get("MILVUS_HOST", "127.0.0.1"), 14 | port=os.environ.get("MILVUS_PORT", 19530)): 15 | self.client = Milvus(host, port) 16 | self.index_name = index_name 17 | self.load(index_dir) 18 | 19 | def load(self, index_dir): 20 | # 1. 读取索引 21 | h5f = h5py.File(index_dir, 'r') 22 | self.retrieval_db = h5f['dataset_1'][:] 23 | self.retrieval_name = h5f['dataset_2'][:] 24 | h5f.close() 25 | # 2. 入库Milvus 26 | if self.index_name in self.client.list_collections()[1]: 27 | self.client.drop_collection(collection_name=self.index_name) 28 | self.client.create_collection({'collection_name': self.index_name, 'dimension': 512, 'index_file_size': 1024, 'metric_type': MetricType.IP}) 29 | self.id_dict = {} 30 | status, ids = self.client.insert(collection_name=self.index_name, records=[i.tolist() for i in self.retrieval_db]) 31 | for i, val in enumerate(self.retrieval_name): 32 | self.id_dict[ids[i]] = str(val) 33 | self.client.create_index(self.index_name, IndexType.FLAT, {'nlist': 16384}) 34 | # pprint(self.client.get_collection_info(self.index_name)) 35 | print("************* Done milvus indexing, Indexed {} documents *************".format(len(self.retrieval_db))) 36 | 37 | def retrieve(self, query_vector, search_size=3): 38 | r_list = [] 39 | _, vectors = self.client.search(collection_name=self.index_name, query_records=[query_vector], top_k=search_size, params={'nprobe': 16}) 40 | for v in vectors[0]: 41 | score = float(v.distance) * 0.5 + 0.5 42 | if score > THRESHOLD: 43 | temp = { 44 | "id": v.id, 45 | "name": self.id_dict[v.id], 46 | "score": round(score, 6) 47 | } 48 | r_list.append(temp) 49 | 50 | return r_list 51 | -------------------------------------------------------------------------------- /service/numpy_retrieval.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import h5py 5 | import numpy as np 6 | 7 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85')) # 检索阈值 8 | 9 | 10 | class NumpyRetrieval(object): 11 | def __init__(self, index_dir, emb_size=512): 12 | self.emb_size = emb_size 13 | self.load(index_dir) 14 | 15 | def load(self, index_dir): 16 | h5f = h5py.File(index_dir, 'r') 17 | self.retrieval_db = h5f['dataset_1'][:] 18 | self.retrieval_name = h5f['dataset_2'][:] 19 | h5f.close() 20 | print("************* Done numpy indexing, Indexed {} documents *************".format(len(self.retrieval_db))) 21 | 22 | def retrieve(self, query_vector, search_size=3): 23 | distance_db = np.dot(query_vector, self.retrieval_db.T) 24 | optinal_dis = np.argsort(-distance_db.T) 25 | 26 | r_list = [] 27 | for i in optinal_dis[:search_size]: 28 | name = self.retrieval_name[i] 29 | score = float(distance_db[i]) * 0.5 + 0.5 30 | if score > THRESHOLD: 31 | temp = { 32 | "name": name, 33 | "score": round(score, 6) 34 | } 35 | r_list.append(temp) 36 | 37 | return r_list 38 | -------------------------------------------------------------------------------- /service/vggnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | from keras.applications.vgg16 import VGG16 5 | from keras.applications.vgg16 import preprocess_input as preprocess_input_vgg 6 | from keras.preprocessing import image 7 | from numpy import linalg as LA 8 | 9 | 10 | class VGGNet(object): 11 | def __init__(self): 12 | self.input_shape = (224, 224, 3) 13 | self.weight = 'imagenet' 14 | self.pooling = 'max' 15 | self.model_vgg = VGG16(weights=self.weight, 16 | input_shape=(self.input_shape[0], self.input_shape[1], self.input_shape[2]), 17 | pooling=self.pooling, 18 | include_top=False) 19 | self.model_vgg.predict(np.zeros((1, 224, 224, 3))) 20 | 21 | def vgg_extract_feat(self, img_path): 22 | img = image.load_img(img_path, target_size=(self.input_shape[0], self.input_shape[1])) 23 | img = image.img_to_array(img) 24 | img = np.expand_dims(img, axis=0) 25 | img = preprocess_input_vgg(img) 26 | feat = self.model_vgg.predict(img) 27 | norm_feat = feat[0] / LA.norm(feat[0]) 28 | norm_feat = [i.item() for i in norm_feat] 29 | return norm_feat 30 | --------------------------------------------------------------------------------