├── .devcontainer.json
├── .env
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── __init__.py
├── conf
    └── server_config.yaml
├── data
    ├── test
    │   ├── 001_accordion_image_0001.jpg
    │   ├── 002_anchor_image_0001.jpg
    │   ├── 003_ant_image_0001.jpg
    │   └── 2012_000015.jpg
    └── train
    │   ├── 001_accordion_image_0002.jpg
    │   ├── 001_accordion_image_0003.jpg
    │   ├── 002_anchor_image_0002.jpg
    │   ├── 002_anchor_image_0003.jpg
    │   ├── 002_anchor_image_0004.jpg
    │   ├── 003_ant_image_0002.jpg
    │   ├── 003_ant_image_0003.jpg
    │   ├── 2012_000003.jpg
    │   ├── 2012_000004.jpg
    │   ├── 2012_000007.jpg
    │   ├── 2012_000010.jpg
    │   ├── 2012_000014.jpg
    │   ├── 2012_000162.jpg
    │   ├── 2012_000166.jpg
    │   ├── 2012_000168.jpg
    │   ├── 2012_000169.jpg
    │   ├── 2012_001337.jpg
    │   ├── 2012_001339.jpg
    │   ├── 2012_001341.jpg
    │   ├── 2012_001344.jpg
    │   ├── 2012_001346.jpg
    │   ├── 2012_001347.jpg
    │   ├── 2012_002049.jpg
    │   ├── 2012_002050.jpg
    │   ├── 2012_002051.jpg
    │   └── 2012_002056.jpg
├── docker-compose-devcontainer.yml
├── docs
    └── build.md
├── index.py
├── index
    └── train.h5
├── pic
    └── system_arch.png
├── requirements.txt
├── retrieval.py
├── scripts
    ├── clean.sh
    └── devcontainer.sh
└── service
    ├── __init__.py
    ├── es_retrieval.py
    ├── faiss_retrieval.py
    ├── milvus_retrieval.py
    ├── numpy_retrieval.py
    └── vggnet.py


/.devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Image Retrieval Dev Container Definition",
 3 |     "dockerComposeFile": ["./docker-compose-devcontainer.yml"],
 4 |     "service": "image-retrieval",
 5 |     "initializeCommand": "scripts/devcontainer.sh up",
 6 |     "workspaceFolder": "/www/server",
 7 |     "remoteEnv": { },
 8 |     "extensions": [
 9 |       "ms-python.python",
10 | 	    "ms-python.vscode-pylance"
11 |     ]
12 | }
13 | 


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | ES_HOST=es
2 | ES_PORT=9200
3 | MILVUS_HOST=milvus
4 | MILVUS_PORT=19530
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .idea/
3 | .DS_Store
4 | __pycache__/
5 | 
6 | volumes
7 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM liyaodev/base-cpu-u18-py3.8:v1.0.0
 2 | LABEL maintainer=liyaodev
 3 | 
 4 | RUN rm -rf /usr/local/bin/python && ln -s /usr/local/bin/python3.8 /usr/local/bin/python
 5 | RUN rm -rf /usr/local/bin/pip && ln -s /usr/local/bin/pip3 /usr/local/bin/pip
 6 | 
 7 | RUN echo 'root:root' | chpasswd
 8 | 
 9 | # 构建Tini的多服务容器
10 | RUN wget -O /tini https://github.com/krallin/tini/releases/download/v0.19.0/tini && \
11 |     chmod +x /tini
12 | ENTRYPOINT ["/tini", "--"]
13 | 
14 | WORKDIR /www/server
15 | 
16 | COPY ./requirements.txt ./requirements.txt
17 | RUN pip3 install --no-cache-dir -r ./requirements.txt \
18 |     -i http://pypi.douban.com/simple  --trusted-host pypi.douban.com
19 | 
20 | ENV PYTHONUNBUFFERED 1
21 | 
22 | CMD ["tail", "-f", "/dev/null"]
23 | EXPOSE 8888
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | up:
 3 | 	sh scripts/devcontainer.sh up
 4 | 
 5 | down:
 6 | 	sh scripts/devcontainer.sh down
 7 | 
 8 | dev:
 9 | 	docker exec -it image-retrieval /bin/bash
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## 图片向量检索服务构建
 2 | 
 3 | 该系统使用VGG（图像特征提取模型）和Numpy、Faiss、ES、Milvus构建了图像搜索流程。 系统架构如下：
 4 | 
 5 | <img src="pic/system_arch.png" width = "250" height = "300" alt="system_arch" align=center />
 6 | 
 7 | ## 构建环境
 8 | 
 9 | ### Docker-Compose
10 | 
11 | ```shell
12 | # 启动
13 | make up
14 | 
15 | # 开发运行
16 | make dev
17 | 
18 | # 关闭
19 | make down
20 | ```
21 | 
22 | ### Docker 环境
23 | 
24 | 详见[环境安装](./docs/build.md)
25 | 
26 | ### 操作简介
27 | 
28 | 操作一：构建基础索引
29 | 
30 | ```shell
31 | python index.py
32 | --train_data：自定义训练图片文件夹路径，默认为`<ROOT_DIR>/data/train`
33 | --index_file：自定义索引文件存储路径，默认为`<ROOT_DIR>/index/train.h5`
34 | 
35 | # 示例：
36 | python index.py --train_data /www/server/data/train --index_file /www/server/index/train.h5
37 | ```
38 | 
39 | 操作二：使用相似检索
40 | 
41 | ```shell
42 | python retrieval.py --engine=numpy
43 | --test_data：自定义测试图片详细地址，默认为`<ROOT_DIR>/data/test/001_accordion_image_0001.jpg`
44 | --index_file：自定义索引文件存储路径，默认为`<ROOT_DIR>/index/train.h5`
45 | --db_name：自定义ES或者Milvus索引库名，默认为`image_retrieval`
46 | --engine：自定义检索引擎类型，默认为`numpy`，可选包括：numpy、faiss、es、milvus
47 | 
48 | # 示例：
49 | python retrieval.py --engine=numpy --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg
50 | 
51 | python retrieval.py --engine=faiss --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg
52 | 
53 | python retrieval.py --engine=es --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg
54 | 
55 | python retrieval.py --engine=milvus --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg
56 | ```
57 | 
58 | ### 附录
59 | 
60 | 参考1：https://github.com/willard-yuan/flask-keras-cnn-image-retrieval <br>
61 | 参考2：https://github.com/zilliz-bootcamp/image_search
62 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/conf/server_config.yaml:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2019-2020 Zilliz. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
  4 | # with the License. You may obtain a copy of the License at
  5 | #
  6 | # http://www.apache.org/licenses/LICENSE-2.0
  7 | #
  8 | # Unless required by applicable law or agreed to in writing, software distributed under the License
  9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 10 | # or implied. See the License for the specific language governing permissions and limitations under the License.
 11 | 
 12 | version: 0.5
 13 | 
 14 | #----------------------+------------------------------------------------------------+------------+-----------------+
 15 | # Cluster Config       | Description                                                | Type       | Default         |
 16 | #----------------------+------------------------------------------------------------+------------+-----------------+
 17 | # enable               | If runinng with Mishards, set true, otherwise false.       | Boolean    | false           |
 18 | #----------------------+------------------------------------------------------------+------------+-----------------+
 19 | # role                 | Milvus deployment role: rw / ro                            | role       | rw              |
 20 | #----------------------+------------------------------------------------------------+------------+-----------------+
 21 | cluster:
 22 |   enable: false
 23 |   role: rw
 24 | 
 25 | #----------------------+------------------------------------------------------------+------------+-----------------+
 26 | # General Config       | Description                                                | Type       | Default         |
 27 | #----------------------+------------------------------------------------------------+------------+-----------------+
 28 | # time_zone            | Use UTC-x or UTC+x to specify a time zone.                 | Timezone   | UTC+8           |
 29 | #----------------------+------------------------------------------------------------+------------+-----------------+
 30 | # meta_uri             | URI for metadata storage, using SQLite (for single server  | URL        | sqlite://:@:/   |
 31 | #                      | Milvus) or MySQL (for distributed cluster Milvus).         |            |                 |
 32 | #                      | Format: dialect://username:password@host:port/database     |            |                 |
 33 | #                      | Keep 'dialect://:@:/', 'dialect' can be either 'sqlite' or |            |                 |
 34 | #                      | 'mysql', replace other texts with real values.             |            |                 |
 35 | #----------------------+------------------------------------------------------------+------------+-----------------+
 36 | general:
 37 |   timezone: UTC+8
 38 |   meta_uri: sqlite://:@:/
 39 | 
 40 | #----------------------+------------------------------------------------------------+------------+-----------------+
 41 | # Network Config       | Description                                                | Type       | Default         |
 42 | #----------------------+------------------------------------------------------------+------------+-----------------+
 43 | # bind.address         | IP address that Milvus server monitors.                    | IP         | 0.0.0.0         |
 44 | #----------------------+------------------------------------------------------------+------------+-----------------+
 45 | # bind.port            | Port that Milvus server monitors. Port range (1024, 65535) | Integer    | 19530           |
 46 | #----------------------+------------------------------------------------------------+------------+-----------------+
 47 | # http.enable          | Enable web server or not.                                  | Boolean    | true            |
 48 | #----------------------+------------------------------------------------------------+------------+-----------------+
 49 | # http.port            | Port that Milvus web server monitors.                      | Integer    | 19121           |
 50 | #                      | Port range (1024, 65535)                                   |            |                 |
 51 | #----------------------+------------------------------------------------------------+------------+-----------------+
 52 | network: 
 53 |   bind.address: 0.0.0.0
 54 |   bind.port: 19530
 55 |   http.enable: true
 56 |   http.port: 19121
 57 | 
 58 | #----------------------+------------------------------------------------------------+------------+-----------------+
 59 | # Storage Config       | Description                                                | Type       | Default         |
 60 | #----------------------+------------------------------------------------------------+------------+-----------------+
 61 | # path                 | Path used to save meta data, vector data and index data.   | Path       | /var/lib/milvus |
 62 | #----------------------+------------------------------------------------------------+------------+-----------------+
 63 | # auto_flush_interval  | The interval, in seconds, at which Milvus automatically    | Integer    | 1 (s)           |
 64 | #                      | flushes data to disk.                                      |            |                 |
 65 | #                      | 0 means disable the regular flush.                         |            |                 |
 66 | #----------------------+------------------------------------------------------------+------------+-----------------+
 67 | # s3_enabled           | If using s3 storage backend.                               | Boolean    | false           |
 68 | #----------------------+------------------------------------------------------------+------------+-----------------+
 69 | # s3_address           | The s3 server address, support domain/hostname/ipaddress   | String     | 127.0.0.1       |
 70 | #----------------------+------------------------------------------------------------+------------+-----------------+
 71 | # s3_port              | The s3 server port.                                        | Integer    | 80              |
 72 | #----------------------+------------------------------------------------------------+------------+-----------------+
 73 | # s3_access_key        | The access key for accessing s3 service.                   | String     | s3_access_key   |
 74 | #----------------------+------------------------------------------------------------+------------+-----------------+
 75 | # s3_secret_key        | The secrey key for accessing s3 service.                   | String     | s3_secret_key   |
 76 | #----------------------+------------------------------------------------------------+------------+-----------------+
 77 | # s3_bucket            | The s3 bucket name for store milvus's data.                | String     | s3_bucket       |
 78 | #                      | Note: please using differnet bucket for different milvus   |            |                 |
 79 | #                      |       cluster.                                             |            |                 |
 80 | #----------------------+------------------------------------------------------------+------------+-----------------+
 81 | storage:
 82 |   path: /var/lib/milvus
 83 |   auto_flush_interval: 1
 84 | 
 85 | 
 86 | #----------------------+------------------------------------------------------------+------------+-----------------+
 87 | # WAL Config           | Description                                                | Type       | Default         |
 88 | #----------------------+------------------------------------------------------------+------------+-----------------+
 89 | # enable               | Whether to enable write-ahead logging (WAL) in Milvus.     | Boolean    | true            |
 90 | #                      | If WAL is enabled, Milvus writes all data changes to log   |            |                 |
 91 | #                      | files in advance before implementing data changes. WAL     |            |                 |
 92 | #                      | ensures the atomicity and durability for Milvus operations.|            |                 |
 93 | #----------------------+------------------------------------------------------------+------------+-----------------+
 94 | # recovery_error_ignore| Whether to ignore logs with errors that happens during WAL | Boolean    | false           |
 95 | #                      | recovery. If true, when Milvus restarts for recovery and   |            |                 |
 96 | #                      | there are errors in WAL log files, log files with errors   |            |                 |
 97 | #                      | are ignored. If false, Milvus does not restart when there  |            |                 |
 98 | #                      | are errors in WAL log files.                               |            |                 |
 99 | #----------------------+------------------------------------------------------------+------------+-----------------+
100 | # buffer_size          | Sum total of the read buffer and the write buffer in MBs.  | Integer    | 256 (MB)        |
101 | #                      | buffer_size must be in range [64, 4096] (MB).              |            |                 |
102 | #                      | If the value you specified is out of range, Milvus         |            |                 |
103 | #                      | automatically uses the boundary value closest to the       |            |                 |
104 | #                      | specified value. It is recommended you set buffer_size to  |            |                 |
105 | #                      | a value greater than the inserted data size of a single    |            |                 |
106 | #                      | insert operation for better performance.                   |            |                 |
107 | #----------------------+------------------------------------------------------------+------------+-----------------+
108 | # path                 | Location of WAL log files.                                 | String     |                 |
109 | #----------------------+------------------------------------------------------------+------------+-----------------+
110 | wal:
111 |   enable: true
112 |   recovery_error_ignore: false
113 |   buffer_size: 256MB
114 |   path: /var/lib/milvus/wal
115 | 
116 | #----------------------+------------------------------------------------------------+------------+-----------------+
117 | # Cache Config         | Description                                                | Type       | Default         |
118 | #----------------------+------------------------------------------------------------+------------+-----------------+
119 | # cache_size           | The size of CPU memory used for caching data for faster    | Integer    | 4 (GB)          |
120 | #                      | query. The sum of 'cpu_cache_capacity' and                 |            |                 |
121 | #                      | 'insert_buffer_size' must be less than system memory size. |            |                 |
122 | #----------------------+------------------------------------------------------------+------------+-----------------+
123 | # insert_buffer_size   | Buffer size used for data insertion.                       | Integer    | 1 (GB)          |
124 | #                      | The sum of 'insert_buffer_size' and 'cpu_cache_capacity'   |            |                 |
125 | #                      | must be less than system memory size.                      |            |                 |
126 | #----------------------+------------------------------------------------------------+------------+-----------------+
127 | # preload_collection   | A comma-separated list of collection names that need to    | StringList |                 |
128 | #                      | be pre-loaded when Milvus server starts up.                |            |                 |
129 | #                      | '*' means preload all existing tables (single-quote or     |            |                 |
130 | #                      | double-quote required).                                    |            |                 |
131 | #----------------------+------------------------------------------------------------+------------+-----------------+
132 | cache:
133 |   cache_size: 4GB
134 |   insert_buffer_size: 1GB
135 |   preload_collection:
136 | 
137 | #----------------------+------------------------------------------------------------+------------+-----------------+
138 | # GPU Config           | Description                                                | Type       | Default         |
139 | #----------------------+------------------------------------------------------------+------------+-----------------+
140 | # enable               | Enable GPU resources or not.                               | Boolean    | false           |
141 | #----------------------+------------------------------------------------------------+------------+-----------------+
142 | # cache_size           | The size of GPU memory per card used for cache.            | Integer    | 1 (GB)          |
143 | #----------------------+------------------------------------------------------------+------------+-----------------+
144 | # gpu_search_threshold | A Milvus performance tuning parameter. This value will be  | Integer    | 1000            |
145 | #                      | compared with 'nq' to decide if the search computation will|            |                 |
146 | #                      | be executed on GPUs only.                                  |            |                 |
147 | #                      | If nq >= gpu_search_threshold, the search computation will |            |                 |
148 | #                      | be executed on GPUs only;                                  |            |                 |
149 | #                      | if nq < gpu_search_threshold, the search computation will  |            |                 |
150 | #                      | be executed on CPUs only.                                  |            |                 |
151 | #                      | The SQ8H index is special, if nq < gpu_search_threshold,   |            |                 |
152 | #                      | the search will be executed on both CPUs and GPUs.         |            |                 |
153 | #----------------------+------------------------------------------------------------+------------+-----------------+
154 | # search_resources     | The list of GPU devices used for search computation.       | DeviceList | gpu0            |
155 | #                      | Must be in format gpux.                                    |            |                 |
156 | #----------------------+------------------------------------------------------------+------------+-----------------+
157 | # build_index_resources| The list of GPU devices used for index building.           | DeviceList | gpu0            |
158 | #                      | Must be in format gpux.                                    |            |                 |
159 | #----------------------+------------------------------------------------------------+------------+-----------------+
160 | gpu:
161 |   enable: false
162 |   cache_size: 1GB
163 |   gpu_search_threshold: 1000
164 |   search_devices:
165 |     - gpu0
166 |   build_index_devices:
167 |     - gpu0
168 | 
169 | #----------------------+------------------------------------------------------------+------------+-----------------+
170 | # FPGA Config           | Description                                               | Type       | Default         |
171 | #----------------------+------------------------------------------------------------+------------+-----------------+
172 | # enable               | Use FPGA devices or not.                                   | Boolean    | false           |
173 | #----------------------+------------------------------------------------------------+------------+-----------------+
174 | # search_devices       | The list of FPGA devices used for search computation.      | DeviceList | fpga0           |
175 | #                      | Must be in format fpgax.                                   |            |                 |
176 | #----------------------+------------------------------------------------------------+------------+-----------------+
177 | fpga:
178 |    enable: false
179 |    search_devices:
180 |      - fpga0
181 | 
182 | #----------------------+------------------------------------------------------------+------------+-----------------+
183 | # Logs Config          | Description                                                | Type       | Default         |
184 | #----------------------+------------------------------------------------------------+------------+-----------------+
185 | # level                | Log level in Milvus. Must be one of debug, info, warning,  | String     | debug           |
186 | #                      | error, fatal                                               |            |                 |
187 | #----------------------+------------------------------------------------------------+------------+-----------------+
188 | # trace.enable         | Whether to enable trace level logging in Milvus.           | Boolean    | true            |
189 | #----------------------+------------------------------------------------------------+------------+-----------------+
190 | # path                 | Absolute path to the folder holding the log files.         | String     |                 |
191 | #----------------------+------------------------------------------------------------+------------+-----------------+
192 | # max_log_file_size    | The maximum size of each log file, size range [512, 4096]  | Integer    | 1024 (MB)       |
193 | #----------------------+------------------------------------------------------------+------------+-----------------+
194 | # log_rotate_num       | The maximum number of log files that Milvus keeps for each | Integer    | 0               |
195 | #                      | logging level, num range [0, 1024], 0 means unlimited.     |            |                 |
196 | #----------------------+------------------------------------------------------------+------------+-----------------+
197 | # log_to_stdout        | Whether to write logs to standard output in Milvus.        | Boolean    | false           |
198 | #----------------------+------------------------------------------------------------+------------+-----------------+
199 | # log_to_file          | Whether to write logs to files in Milvus                   | Boolean    | true            |
200 | #----------------------+------------------------------------------------------------+------------+-----------------+
201 | logs:
202 |   level: debug
203 |   trace.enable: true
204 |   path: /var/lib/milvus/logs
205 |   max_log_file_size: 1024MB
206 |   log_rotate_num: 0
207 |   log_to_stdout: false
208 |   log_to_file: true
209 | 
210 | #----------------------+------------------------------------------------------------+------------+-----------------+
211 | # Metric Config        | Description                                                | Type       | Default         |
212 | #----------------------+------------------------------------------------------------+------------+-----------------+
213 | # enable               | Enable monitoring function or not.                         | Boolean    | false           |
214 | #----------------------+------------------------------------------------------------+------------+-----------------+
215 | # address              | Pushgateway address                                        | IP         | 127.0.0.1       +
216 | #----------------------+------------------------------------------------------------+------------+-----------------+
217 | # port                 | Pushgateway port, port range (1024, 65535)                 | Integer    | 9091            |
218 | #----------------------+------------------------------------------------------------+------------+-----------------+
219 | metric:
220 |   enable: false
221 |   address: 127.0.0.1
222 |   port: 9091
223 | 
224 | 


--------------------------------------------------------------------------------
/data/test/001_accordion_image_0001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/001_accordion_image_0001.jpg


--------------------------------------------------------------------------------
/data/test/002_anchor_image_0001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/002_anchor_image_0001.jpg


--------------------------------------------------------------------------------
/data/test/003_ant_image_0001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/003_ant_image_0001.jpg


--------------------------------------------------------------------------------
/data/test/2012_000015.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/2012_000015.jpg


--------------------------------------------------------------------------------
/data/train/001_accordion_image_0002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/001_accordion_image_0002.jpg


--------------------------------------------------------------------------------
/data/train/001_accordion_image_0003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/001_accordion_image_0003.jpg


--------------------------------------------------------------------------------
/data/train/002_anchor_image_0002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/002_anchor_image_0002.jpg


--------------------------------------------------------------------------------
/data/train/002_anchor_image_0003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/002_anchor_image_0003.jpg


--------------------------------------------------------------------------------
/data/train/002_anchor_image_0004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/002_anchor_image_0004.jpg


--------------------------------------------------------------------------------
/data/train/003_ant_image_0002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/003_ant_image_0002.jpg


--------------------------------------------------------------------------------
/data/train/003_ant_image_0003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/003_ant_image_0003.jpg


--------------------------------------------------------------------------------
/data/train/2012_000003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000003.jpg


--------------------------------------------------------------------------------
/data/train/2012_000004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000004.jpg


--------------------------------------------------------------------------------
/data/train/2012_000007.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000007.jpg


--------------------------------------------------------------------------------
/data/train/2012_000010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000010.jpg


--------------------------------------------------------------------------------
/data/train/2012_000014.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000014.jpg


--------------------------------------------------------------------------------
/data/train/2012_000162.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000162.jpg


--------------------------------------------------------------------------------
/data/train/2012_000166.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000166.jpg


--------------------------------------------------------------------------------
/data/train/2012_000168.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000168.jpg


--------------------------------------------------------------------------------
/data/train/2012_000169.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000169.jpg


--------------------------------------------------------------------------------
/data/train/2012_001337.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001337.jpg


--------------------------------------------------------------------------------
/data/train/2012_001339.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001339.jpg


--------------------------------------------------------------------------------
/data/train/2012_001341.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001341.jpg


--------------------------------------------------------------------------------
/data/train/2012_001344.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001344.jpg


--------------------------------------------------------------------------------
/data/train/2012_001346.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001346.jpg


--------------------------------------------------------------------------------
/data/train/2012_001347.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001347.jpg


--------------------------------------------------------------------------------
/data/train/2012_002049.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002049.jpg


--------------------------------------------------------------------------------
/data/train/2012_002050.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002050.jpg


--------------------------------------------------------------------------------
/data/train/2012_002051.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002051.jpg


--------------------------------------------------------------------------------
/data/train/2012_002056.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002056.jpg


--------------------------------------------------------------------------------
/docker-compose-devcontainer.yml:
--------------------------------------------------------------------------------
 1 | version: '3.5'
 2 | 
 3 | services:
 4 |   image-retrieval:
 5 |     container_name: image-retrieval
 6 |     image: liyaodev/image-retrieval-env:v1.0.0
 7 |     # 本地构建
 8 |     build:
 9 |       context: .
10 |       dockerfile: ./Dockerfile
11 |     ports:
12 |       - 8888:8888
13 |     environment:
14 |       ES_HOST: ${ES_HOST}
15 |       ES_PORT: ${ES_PORT}
16 |       MILVUS_HOST: ${MILVUS_HOST}
17 |       MILVUS_PORT: ${MILVUS_PORT}
18 |     volumes:
19 |       - .:/www/server:delegated
20 |       - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/vscode-extensions:/www/.vscode-server:delegated
21 |     depends_on:
22 |       - es
23 |       - milvus
24 |       - milvus-em
25 | 
26 |   es:
27 |     container_name: image-retrieval-es
28 |     image: docker.elastic.co/elasticsearch/elasticsearch:7.5.0
29 |     environment:
30 |       - discovery.type=single-node
31 |     ports: 
32 |       - 9200:9200
33 |       - 9300:9300
34 |     volumes:
35 |       - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/es:/usr/share/elasticsearch/data
36 | 
37 |   milvus:
38 |     container_name: image-retrieval-milvus
39 |     image: milvusdb/milvus:1.1.1-cpu-d061621-330cc6
40 |     ports: 
41 |       - 19530:19530
42 |       - 19121:19121
43 |     volumes:
44 |       - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/conf:/var/lib/milvus/conf
45 |       - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/milvus/db:/var/lib/milvus/db
46 |       - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/milvus/logs:/var/lib/milvus/logs
47 |       - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/milvus/wal:/var/lib/milvus/wal
48 | 
49 |   milvus-em:
50 |       container_name: image-retrieval-milvus-em
51 |       image: milvusdb/milvus-em:v0.4.2
52 |       environment:
53 |         - API_URL=http://${MILVUS_HOST}:19121
54 |       ports: 
55 |         - 3000:80
56 | 
57 | networks:
58 |   default:
59 |     name: image_retrieval_dev
60 | 


--------------------------------------------------------------------------------
/docs/build.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## 手动构建环境
 3 | 
 4 | ### 基础环境安装
 5 | 
 6 | Python版本：3.8.12
 7 | 
 8 | ```shell
 9 | pip install -r requirements.txt
10 | ```
11 | 
12 | ### ES服务端安装
13 | 
14 | ```shell
15 | docker run -it -d -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:7.5.0
16 | ```
17 | 
18 | ### Milvus服务端安装
19 | 
20 | 安装指南：https://milvus.io/cn/docs/v1.1.1/milvus_docker-cpu.md <br>
21 | 下载配置
22 | 
23 | ```shell
24 | mkdir -p milvus/conf && cd milvus/conf
25 | wget https://raw.githubusercontent.com/milvus-io/milvus/v1.1.1/core/conf/demo/server_config.yaml
26 | ```
27 | 
28 | 服务启动
29 | 
30 | ```shell
31 | docker run -d --name milvus_cpu_1.1.1 \
32 | -p 19530:19530 \
33 | -p 19121:19121 \
34 | -v <ROOT_DIR>/milvus/db:/var/lib/milvus/db \
35 | -v <ROOT_DIR>/milvus/conf:/var/lib/milvus/conf \
36 | -v <ROOT_DIR>/milvus/logs:/var/lib/milvus/logs \
37 | -v <ROOT_DIR>/milvus/wal:/var/lib/milvus/wal \
38 | milvusdb/milvus:1.1.1-cpu-d061621-330cc6
39 | ```
40 | 


--------------------------------------------------------------------------------
/index.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import h5py
 4 | import argparse
 5 | import numpy as np
 6 | from service.vggnet import VGGNet
 7 | import os
 8 | import sys
 9 | from os.path import dirname
10 | BASE_DIR = dirname(os.path.abspath(__file__))
11 | sys.path.append(BASE_DIR)
12 | 
13 | def get_imlist(path):
14 |     return [os.path.join(path,f) for f in os.listdir(path) if f.endswith('.jpg')]
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     parser = argparse.ArgumentParser()
19 |     parser.add_argument("--train_data", type=str, default=os.path.join(BASE_DIR, 'data', 'train'), help="train data path.")
20 |     parser.add_argument("--index_file", type=str, default=os.path.join(BASE_DIR, 'index', 'train.h5'), help="index file path.")
21 |     args = vars(parser.parse_args())
22 |     img_list = get_imlist(args["train_data"])
23 |     print("--------------------------------------------------")
24 |     print("         feature extraction starts")
25 |     print("--------------------------------------------------")
26 |     feats = []
27 |     names = []
28 |     model = VGGNet()
29 |     for i, img_path in enumerate(img_list):
30 |         norm_feat = model.vgg_extract_feat(img_path)
31 |         img_name = os.path.split(img_path)[1]
32 |         feats.append(norm_feat)
33 |         names.append(img_name)
34 |         print("extracting feature from image No. %d , %d images in total" %((i+1), len(img_list)))
35 |     feats = np.array(feats)
36 |     print("--------------------------------------------------")
37 |     print("         writing feature extraction results")
38 |     print("--------------------------------------------------")
39 |     h5f = h5py.File(args["index_file"], 'w')
40 |     h5f.create_dataset('dataset_1', data = feats)
41 |     h5f.create_dataset('dataset_2', data = np.string_(names))
42 |     h5f.close()
43 | 


--------------------------------------------------------------------------------
/index/train.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/index/train.h5


--------------------------------------------------------------------------------
/pic/system_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/pic/system_arch.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.19.5
2 | keras==2.4.3
3 | tensorflow==2.4.1
4 | pillow==8.1.0
5 | elasticsearch==7.11.0
6 | pymilvus==1.1.2
7 | faiss-cpu==1.7.0
8 | 


--------------------------------------------------------------------------------
/retrieval.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import argparse
 4 | from service.vggnet import VGGNet
 5 | from service.numpy_retrieval import NumpyRetrieval
 6 | from service.faiss_retrieval import FaissRetrieval
 7 | from service.es_retrieval import ESRetrieval
 8 | from service.milvus_retrieval import MilvusRetrieval
 9 | import os
10 | import sys
11 | from os.path import dirname
12 | BASE_DIR = dirname(os.path.abspath(__file__))
13 | sys.path.append(BASE_DIR)
14 | 
15 | 
16 | class RetrievalEngine(object):
17 | 
18 |     def __init__(self, index_file, db_name):
19 |         self.index_file = index_file
20 |         self.db_name = db_name
21 |         self.numpy_r = self.faiss_r = self.es_r = self.milvus_r = None
22 | 
23 |     def get_method(self, m_name):
24 |         m_name = "%s_handler" % str(m_name)
25 |         method = getattr(self, m_name, self.default_handler)
26 |         return method
27 | 
28 |     def numpy_handler(self, query_vector, req_id=None):
29 |         # numpy计算
30 |         if self.numpy_r is None:
31 |             self.numpy_r = NumpyRetrieval(self.index_file)
32 |         return self.numpy_r.retrieve(query_vector)
33 | 
34 |     def faiss_handler(self, query_vector, req_id=None):
35 |         # faiss计算
36 |         if self.faiss_r is None:
37 |             self.faiss_r = FaissRetrieval(self.index_file)
38 |         return self.faiss_r.retrieve(query_vector)
39 | 
40 |     def es_handler(self, query_vector, req_id=None):
41 |         # es计算
42 |         if self.es_r is None:
43 |             self.es_r = ESRetrieval(self.db_name, self.index_file)
44 |         return self.es_r.retrieve(query_vector)
45 | 
46 |     def milvus_handler(self, query_vector, req_id=None):
47 |         # milvus计算
48 |         if self.milvus_r is None:
49 |             self.milvus_r = MilvusRetrieval(self.db_name, self.index_file)
50 |         return self.milvus_r.retrieve(query_vector)
51 | 
52 |     def default_handler(self, query_vector, req_id=None):
53 |         return []
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     parser = argparse.ArgumentParser()
58 |     parser.add_argument("--test_data", type=str, default=os.path.join(BASE_DIR, 'data', 'test', '001_accordion_image_0001.jpg'), help="test data path.")
59 |     parser.add_argument("--index_file", type=str, default=os.path.join(BASE_DIR, 'index', 'train.h5'), help="index file path.")
60 |     parser.add_argument("--db_name", type=str, default='image_retrieval', help="database name.")
61 |     parser.add_argument("--engine", type=str, default='numpy', help="retrieval engine.")
62 |     args = vars(parser.parse_args())
63 |     # 1.图片推理
64 |     model = VGGNet()
65 |     query_vector = model.vgg_extract_feat(args["test_data"])
66 |     # 2.图片检索
67 |     re = RetrievalEngine(args["index_file"], args["db_name"])
68 |     result = re.get_method(args["engine"])(query_vector, None)
69 |     print(result)
70 | 
71 | 


--------------------------------------------------------------------------------
/scripts/clean.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/scripts/devcontainer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ "${1-}" = "up" ]; then
 4 |     mkdir -p "${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/vscode-extensions"
 5 |     chmod -R 777 "${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes"
 6 | 
 7 |     docker-compose -f ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/docker-compose-devcontainer.yml up -d
 8 | fi
 9 | 
10 | if [ "${1-}" = "down" ]; then
11 |     docker-compose -f ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/docker-compose-devcontainer.yml down
12 |     rm -rf "${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes"
13 | fi
14 | 


--------------------------------------------------------------------------------
/service/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/service/es_retrieval.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | import h5py
  5 | import numpy as np
  6 | from elasticsearch import Elasticsearch
  7 | from elasticsearch.helpers import bulk
  8 | 
  9 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85'))  # 检索阈值
 10 | INDEX_TABLE = {
 11 |     "settings": {
 12 |         "number_of_shards": 2,
 13 |         "number_of_replicas": 1
 14 |     },
 15 |     "mappings": {
 16 |         "dynamic": "true",
 17 |         "_source": {
 18 |             "enabled": "true"
 19 |         },
 20 |         "properties": {
 21 |             "image_vector": {
 22 |                 "type": "dense_vector",
 23 |                 "dims": 512
 24 |             },
 25 |             "id": {
 26 |                 "type": "keyword"
 27 |             },
 28 |             "name": {
 29 |                 "type": "keyword"
 30 |             }
 31 |         }
 32 |     }
 33 | }
 34 | 
 35 | 
 36 | class ESRetrieval(object):
 37 |     def __init__(self, index_name, index_dir,
 38 |         host=os.environ.get("ES_HOST", "127.0.0.1"),
 39 |         port=os.environ.get("ES_PORT", 9200)):
 40 |         self.index_name = index_name
 41 |         self.client = Elasticsearch([host])
 42 |         self.load(index_dir)
 43 | 
 44 |     def load(self, index_dir):
 45 |         def index_batch(docs):
 46 |             requests = []
 47 |             for i, doc in enumerate(docs):
 48 |                 request = doc
 49 |                 request["_op_type"] = "index"
 50 |                 request["_index"] = self.index_name
 51 |                 requests.append(request)
 52 |             bulk(self.client, requests)
 53 |         # 1. 读取索引
 54 |         h5f = h5py.File(index_dir, 'r')
 55 |         self.retrieval_db = h5f['dataset_1'][:]
 56 |         self.retrieval_name = h5f['dataset_2'][:]
 57 |         h5f.close()
 58 |         # 2. 入库ES
 59 |         r_list = []
 60 |         for i, val in enumerate(self.retrieval_name):
 61 |             temp = {
 62 |                 'id': i,
 63 |                 'name': str(val),
 64 |                 'image_vector': self.retrieval_db[i].tolist()
 65 |             }
 66 |             r_list.append(temp)
 67 |         self.client.indices.delete(index=self.index_name, ignore=[404])
 68 |         self.client.indices.create(index=self.index_name, body=INDEX_TABLE)
 69 |         docs = []
 70 |         count = 0
 71 |         batch_size = 1000
 72 |         for doc in r_list:
 73 |             docs.append(doc)
 74 |             count += 1
 75 |             if count % batch_size == 0:
 76 |                 index_batch(docs)
 77 |                 docs = []
 78 |         if docs:
 79 |             index_batch(docs)
 80 |         self.client.indices.refresh(index=self.index_name)
 81 |         print("************* Done es indexing, Indexed {} documents *************".format(len(self.retrieval_db)))
 82 | 
 83 |     def retrieve(self, query_vector, search_size=3):
 84 | 
 85 |         # script_query = {
 86 |         #     "script_score": {
 87 |         #         "query": {"match_all": {}},
 88 |         #         "script": {
 89 |         #             "source": "cosineSimilarity(params.query_vector, doc['image_vector']) + 1.0",
 90 |         #             "params": {"query_vector": query_vector}
 91 |         #         }
 92 |         #     }
 93 |         # }
 94 | 
 95 |         # script_query = {
 96 |         #     "script_score": {
 97 |         #         "query": {"match_all": {}},
 98 |         #         "script": {
 99 |         #             "source": """
100 |         #                 double value = dotProduct(params.query_vector, doc['image_vector']);
101 |         #                 return sigmoid(1, Math.E, -value); 
102 |         #                 """,
103 |         #             "params": {"query_vector": query_vector}
104 |         #         }
105 |         #     }
106 |         # }
107 | 
108 |         # script_query = {
109 |         #     "script_score": {
110 |         #         "query": {"match_all": {}},
111 |         #         "script": {
112 |         #             "source": "1 / (1 + l1norm(params.queryVector, doc['image_vector']))",
113 |         #             "params": {
114 |         #             "queryVector": query_vector
115 |         #             }
116 |         #         }
117 |         #     }
118 |         # }
119 | 
120 |         # script_query = {
121 |         #     "script_score": {
122 |         #         "query": {"match_all": {}},
123 |         #         "script": {
124 |         #             "source": "1 / (1 + l2norm(params.queryVector, doc['image_vector']))",
125 |         #             "params": {
126 |         #             "queryVector": query_vector
127 |         #             }
128 |         #         }
129 |         #     }
130 |         # }
131 | 
132 |         script_query = {
133 |             "script_score": {
134 |                 "query": {"match_all": {}},
135 |                 "script": {
136 |                     "source": """
137 |                         double value = doc['image_vector'].size() == 0 ? 0 : dotProduct(params.query_vector, doc['image_vector']);
138 |                         return value;
139 |                         """,
140 |                     "params": {"query_vector": query_vector}
141 |                 }
142 |             }
143 |         }
144 |         response = self.client.search(
145 |             index=self.index_name,
146 |             body={
147 |                 "size": search_size,
148 |                 "query": script_query,
149 |                 "_source": {"includes": ["id", "name", "face_vector"]}
150 |             }
151 |         )
152 |         r_list = []
153 |         for hit in response["hits"]["hits"]:
154 |             score = float(hit['_score']) * 0.5 + 0.5
155 |             name = hit['_source']["name"]
156 |             if name.encode("utf-8") and score > THRESHOLD:
157 |                 temp = {
158 |                     "id": hit['_source']["id"],
159 |                     "name": name,
160 |                     "score": round(score, 6)
161 |                 }
162 |                 r_list.append(temp)
163 |         
164 |         return r_list
165 | 


--------------------------------------------------------------------------------
/service/faiss_retrieval.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import h5py
 5 | import numpy as np
 6 | import faiss
 7 | 
 8 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85'))  # 检索阈值
 9 | 
10 | 
11 | class FaissRetrieval(object):
12 |     def __init__(self, index_dir, emb_size=512):
13 |         self.emb_size = emb_size
14 |         self.load(index_dir)
15 | 
16 |     def load(self, index_dir):
17 |         # 1.读取索引
18 |         h5f = h5py.File(index_dir, 'r')
19 |         self.retrieval_db = h5f['dataset_1'][:]
20 |         self.retrieval_name = h5f['dataset_2'][:]
21 |         h5f.close()
22 |         # 2. 加载faiss
23 |         self.retrieval_db = np.asarray(self.retrieval_db).astype(np.float32)
24 |         self.index = faiss.IndexFlatIP(self.emb_size)
25 |         # self.index.train(self.retrieval_db)
26 |         self.index.add(self.retrieval_db)
27 |         print("************* Done faiss indexing, Indexed {} documents *************".format(len(self.retrieval_db)))
28 | 
29 |     def retrieve(self, query_vector, search_size=3):
30 |         score_list, index_list = self.index.search(np.array([query_vector]).astype(np.float32), search_size)
31 |         r_list = []
32 |         for i, val in enumerate(index_list[0]):
33 |             name = self.retrieval_name[int(val)]
34 |             score = float(score_list[0][i]) * 0.5 + 0.5
35 |             if score > THRESHOLD:
36 |                 temp = {
37 |                     "name": name,
38 |                     "score": round(score, 6)
39 |                 }
40 |                 r_list.append(temp)
41 |         
42 |         return r_list
43 | 


--------------------------------------------------------------------------------
/service/milvus_retrieval.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import h5py
 5 | import numpy as np
 6 | from pprint import pprint
 7 | from milvus import Milvus, IndexType, MetricType
 8 | 
 9 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85'))  # 检索阈值
10 | 
11 | class MilvusRetrieval(object):
12 |     def __init__(self, index_name, index_dir,
13 |         host=os.environ.get("MILVUS_HOST", "127.0.0.1"),
14 |         port=os.environ.get("MILVUS_PORT", 19530)):
15 |         self.client = Milvus(host, port)
16 |         self.index_name = index_name
17 |         self.load(index_dir)
18 | 
19 |     def load(self, index_dir):
20 |         # 1. 读取索引
21 |         h5f = h5py.File(index_dir, 'r')
22 |         self.retrieval_db = h5f['dataset_1'][:]
23 |         self.retrieval_name = h5f['dataset_2'][:]
24 |         h5f.close()
25 |         # 2. 入库Milvus
26 |         if self.index_name in self.client.list_collections()[1]:
27 |             self.client.drop_collection(collection_name=self.index_name)
28 |         self.client.create_collection({'collection_name': self.index_name, 'dimension': 512, 'index_file_size': 1024, 'metric_type': MetricType.IP})
29 |         self.id_dict = {}
30 |         status, ids = self.client.insert(collection_name=self.index_name, records=[i.tolist() for i in self.retrieval_db])
31 |         for i, val in enumerate(self.retrieval_name):
32 |             self.id_dict[ids[i]] = str(val)
33 |         self.client.create_index(self.index_name, IndexType.FLAT, {'nlist': 16384})
34 |         # pprint(self.client.get_collection_info(self.index_name))
35 |         print("************* Done milvus indexing, Indexed {} documents *************".format(len(self.retrieval_db)))
36 | 
37 |     def retrieve(self, query_vector, search_size=3):
38 |         r_list = []
39 |         _, vectors = self.client.search(collection_name=self.index_name, query_records=[query_vector], top_k=search_size, params={'nprobe': 16})
40 |         for v in vectors[0]:
41 |             score = float(v.distance) * 0.5 + 0.5
42 |             if score > THRESHOLD:
43 |                 temp = {
44 |                     "id": v.id,
45 |                     "name": self.id_dict[v.id],
46 |                     "score": round(score, 6)
47 |                 }
48 |                 r_list.append(temp)
49 | 
50 |         return r_list
51 | 


--------------------------------------------------------------------------------
/service/numpy_retrieval.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os
 4 | import h5py
 5 | import numpy as np
 6 | 
 7 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85'))  # 检索阈值
 8 | 
 9 | 
10 | class NumpyRetrieval(object):
11 |     def __init__(self, index_dir, emb_size=512):
12 |         self.emb_size = emb_size
13 |         self.load(index_dir)
14 | 
15 |     def load(self, index_dir):
16 |         h5f = h5py.File(index_dir, 'r')
17 |         self.retrieval_db = h5f['dataset_1'][:]
18 |         self.retrieval_name = h5f['dataset_2'][:]
19 |         h5f.close()
20 |         print("************* Done numpy indexing, Indexed {} documents *************".format(len(self.retrieval_db)))
21 | 
22 |     def retrieve(self, query_vector, search_size=3):
23 |         distance_db = np.dot(query_vector, self.retrieval_db.T)
24 |         optinal_dis = np.argsort(-distance_db.T)
25 | 
26 |         r_list = []
27 |         for i in optinal_dis[:search_size]:
28 |             name = self.retrieval_name[i]
29 |             score = float(distance_db[i]) * 0.5 + 0.5
30 |             if score > THRESHOLD:
31 |                 temp = {
32 |                     "name": name,
33 |                     "score": round(score, 6)
34 |                 }
35 |                 r_list.append(temp)
36 |         
37 |         return r_list
38 | 


--------------------------------------------------------------------------------
/service/vggnet.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | from keras.applications.vgg16 import VGG16
 5 | from keras.applications.vgg16 import preprocess_input as preprocess_input_vgg
 6 | from keras.preprocessing import image
 7 | from numpy import linalg as LA
 8 | 
 9 | 
10 | class VGGNet(object):
11 |     def __init__(self):
12 |         self.input_shape = (224, 224, 3)
13 |         self.weight = 'imagenet'
14 |         self.pooling = 'max'
15 |         self.model_vgg = VGG16(weights=self.weight,
16 |                                input_shape=(self.input_shape[0], self.input_shape[1], self.input_shape[2]),
17 |                                pooling=self.pooling,
18 |                                include_top=False)
19 |         self.model_vgg.predict(np.zeros((1, 224, 224, 3)))
20 | 
21 |     def vgg_extract_feat(self, img_path):
22 |         img = image.load_img(img_path, target_size=(self.input_shape[0], self.input_shape[1]))
23 |         img = image.img_to_array(img)
24 |         img = np.expand_dims(img, axis=0)
25 |         img = preprocess_input_vgg(img)
26 |         feat = self.model_vgg.predict(img)
27 |         norm_feat = feat[0] / LA.norm(feat[0])
28 |         norm_feat = [i.item() for i in norm_feat]
29 |         return norm_feat
30 | 


--------------------------------------------------------------------------------