├── .devcontainer.json
├── .env
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── __init__.py
├── conf
└── server_config.yaml
├── data
├── test
│ ├── 001_accordion_image_0001.jpg
│ ├── 002_anchor_image_0001.jpg
│ ├── 003_ant_image_0001.jpg
│ └── 2012_000015.jpg
└── train
│ ├── 001_accordion_image_0002.jpg
│ ├── 001_accordion_image_0003.jpg
│ ├── 002_anchor_image_0002.jpg
│ ├── 002_anchor_image_0003.jpg
│ ├── 002_anchor_image_0004.jpg
│ ├── 003_ant_image_0002.jpg
│ ├── 003_ant_image_0003.jpg
│ ├── 2012_000003.jpg
│ ├── 2012_000004.jpg
│ ├── 2012_000007.jpg
│ ├── 2012_000010.jpg
│ ├── 2012_000014.jpg
│ ├── 2012_000162.jpg
│ ├── 2012_000166.jpg
│ ├── 2012_000168.jpg
│ ├── 2012_000169.jpg
│ ├── 2012_001337.jpg
│ ├── 2012_001339.jpg
│ ├── 2012_001341.jpg
│ ├── 2012_001344.jpg
│ ├── 2012_001346.jpg
│ ├── 2012_001347.jpg
│ ├── 2012_002049.jpg
│ ├── 2012_002050.jpg
│ ├── 2012_002051.jpg
│ └── 2012_002056.jpg
├── docker-compose-devcontainer.yml
├── docs
└── build.md
├── index.py
├── index
└── train.h5
├── pic
└── system_arch.png
├── requirements.txt
├── retrieval.py
├── scripts
├── clean.sh
└── devcontainer.sh
└── service
├── __init__.py
├── es_retrieval.py
├── faiss_retrieval.py
├── milvus_retrieval.py
├── numpy_retrieval.py
└── vggnet.py
/.devcontainer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Image Retrieval Dev Container Definition",
3 | "dockerComposeFile": ["./docker-compose-devcontainer.yml"],
4 | "service": "image-retrieval",
5 | "initializeCommand": "scripts/devcontainer.sh up",
6 | "workspaceFolder": "/www/server",
7 | "remoteEnv": { },
8 | "extensions": [
9 | "ms-python.python",
10 | "ms-python.vscode-pylance"
11 | ]
12 | }
13 |
--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | ES_HOST=es
2 | ES_PORT=9200
3 | MILVUS_HOST=milvus
4 | MILVUS_PORT=19530
5 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .idea/
3 | .DS_Store
4 | __pycache__/
5 |
6 | volumes
7 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM liyaodev/base-cpu-u18-py3.8:v1.0.0
2 | LABEL maintainer=liyaodev
3 |
4 | RUN rm -rf /usr/local/bin/python && ln -s /usr/local/bin/python3.8 /usr/local/bin/python
5 | RUN rm -rf /usr/local/bin/pip && ln -s /usr/local/bin/pip3 /usr/local/bin/pip
6 |
7 | RUN echo 'root:root' | chpasswd
8 |
9 | # 构建Tini的多服务容器
10 | RUN wget -O /tini https://github.com/krallin/tini/releases/download/v0.19.0/tini && \
11 | chmod +x /tini
12 | ENTRYPOINT ["/tini", "--"]
13 |
14 | WORKDIR /www/server
15 |
16 | COPY ./requirements.txt ./requirements.txt
17 | RUN pip3 install --no-cache-dir -r ./requirements.txt \
18 | -i http://pypi.douban.com/simple --trusted-host pypi.douban.com
19 |
20 | ENV PYTHONUNBUFFERED 1
21 |
22 | CMD ["tail", "-f", "/dev/null"]
23 | EXPOSE 8888
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | up:
3 | sh scripts/devcontainer.sh up
4 |
5 | down:
6 | sh scripts/devcontainer.sh down
7 |
8 | dev:
9 | docker exec -it image-retrieval /bin/bash
10 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## 图片向量检索服务构建
2 |
3 | 该系统使用VGG(图像特征提取模型)和Numpy、Faiss、ES、Milvus构建了图像搜索流程。 系统架构如下:
4 |
5 |
6 |
7 | ## 构建环境
8 |
9 | ### Docker-Compose
10 |
11 | ```shell
12 | # 启动
13 | make up
14 |
15 | # 开发运行
16 | make dev
17 |
18 | # 关闭
19 | make down
20 | ```
21 |
22 | ### Docker 环境
23 |
24 | 详见[环境安装](./docs/build.md)
25 |
26 | ### 操作简介
27 |
28 | 操作一:构建基础索引
29 |
30 | ```shell
31 | python index.py
32 | --train_data:自定义训练图片文件夹路径,默认为`/data/train`
33 | --index_file:自定义索引文件存储路径,默认为`/index/train.h5`
34 |
35 | # 示例:
36 | python index.py --train_data /www/server/data/train --index_file /www/server/index/train.h5
37 | ```
38 |
39 | 操作二:使用相似检索
40 |
41 | ```shell
42 | python retrieval.py --engine=numpy
43 | --test_data:自定义测试图片详细地址,默认为`/data/test/001_accordion_image_0001.jpg`
44 | --index_file:自定义索引文件存储路径,默认为`/index/train.h5`
45 | --db_name:自定义ES或者Milvus索引库名,默认为`image_retrieval`
46 | --engine:自定义检索引擎类型,默认为`numpy`,可选包括:numpy、faiss、es、milvus
47 |
48 | # 示例:
49 | python retrieval.py --engine=numpy --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg
50 |
51 | python retrieval.py --engine=faiss --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg
52 |
53 | python retrieval.py --engine=es --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg
54 |
55 | python retrieval.py --engine=milvus --index_file /www/server/index/train.h5 --test_data /www/server/data/test/001_accordion_image_0001.jpg
56 | ```
57 |
58 | ### 附录
59 |
60 | 参考1:https://github.com/willard-yuan/flask-keras-cnn-image-retrieval
61 | 参考2:https://github.com/zilliz-bootcamp/image_search
62 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/conf/server_config.yaml:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2019-2020 Zilliz. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
4 | # with the License. You may obtain a copy of the License at
5 | #
6 | # http://www.apache.org/licenses/LICENSE-2.0
7 | #
8 | # Unless required by applicable law or agreed to in writing, software distributed under the License
9 | # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10 | # or implied. See the License for the specific language governing permissions and limitations under the License.
11 |
12 | version: 0.5
13 |
14 | #----------------------+------------------------------------------------------------+------------+-----------------+
15 | # Cluster Config | Description | Type | Default |
16 | #----------------------+------------------------------------------------------------+------------+-----------------+
17 | # enable | If runinng with Mishards, set true, otherwise false. | Boolean | false |
18 | #----------------------+------------------------------------------------------------+------------+-----------------+
19 | # role | Milvus deployment role: rw / ro | role | rw |
20 | #----------------------+------------------------------------------------------------+------------+-----------------+
21 | cluster:
22 | enable: false
23 | role: rw
24 |
25 | #----------------------+------------------------------------------------------------+------------+-----------------+
26 | # General Config | Description | Type | Default |
27 | #----------------------+------------------------------------------------------------+------------+-----------------+
28 | # time_zone | Use UTC-x or UTC+x to specify a time zone. | Timezone | UTC+8 |
29 | #----------------------+------------------------------------------------------------+------------+-----------------+
30 | # meta_uri | URI for metadata storage, using SQLite (for single server | URL | sqlite://:@:/ |
31 | # | Milvus) or MySQL (for distributed cluster Milvus). | | |
32 | # | Format: dialect://username:password@host:port/database | | |
33 | # | Keep 'dialect://:@:/', 'dialect' can be either 'sqlite' or | | |
34 | # | 'mysql', replace other texts with real values. | | |
35 | #----------------------+------------------------------------------------------------+------------+-----------------+
36 | general:
37 | timezone: UTC+8
38 | meta_uri: sqlite://:@:/
39 |
40 | #----------------------+------------------------------------------------------------+------------+-----------------+
41 | # Network Config | Description | Type | Default |
42 | #----------------------+------------------------------------------------------------+------------+-----------------+
43 | # bind.address | IP address that Milvus server monitors. | IP | 0.0.0.0 |
44 | #----------------------+------------------------------------------------------------+------------+-----------------+
45 | # bind.port | Port that Milvus server monitors. Port range (1024, 65535) | Integer | 19530 |
46 | #----------------------+------------------------------------------------------------+------------+-----------------+
47 | # http.enable | Enable web server or not. | Boolean | true |
48 | #----------------------+------------------------------------------------------------+------------+-----------------+
49 | # http.port | Port that Milvus web server monitors. | Integer | 19121 |
50 | # | Port range (1024, 65535) | | |
51 | #----------------------+------------------------------------------------------------+------------+-----------------+
52 | network:
53 | bind.address: 0.0.0.0
54 | bind.port: 19530
55 | http.enable: true
56 | http.port: 19121
57 |
58 | #----------------------+------------------------------------------------------------+------------+-----------------+
59 | # Storage Config | Description | Type | Default |
60 | #----------------------+------------------------------------------------------------+------------+-----------------+
61 | # path | Path used to save meta data, vector data and index data. | Path | /var/lib/milvus |
62 | #----------------------+------------------------------------------------------------+------------+-----------------+
63 | # auto_flush_interval | The interval, in seconds, at which Milvus automatically | Integer | 1 (s) |
64 | # | flushes data to disk. | | |
65 | # | 0 means disable the regular flush. | | |
66 | #----------------------+------------------------------------------------------------+------------+-----------------+
67 | # s3_enabled | If using s3 storage backend. | Boolean | false |
68 | #----------------------+------------------------------------------------------------+------------+-----------------+
69 | # s3_address | The s3 server address, support domain/hostname/ipaddress | String | 127.0.0.1 |
70 | #----------------------+------------------------------------------------------------+------------+-----------------+
71 | # s3_port | The s3 server port. | Integer | 80 |
72 | #----------------------+------------------------------------------------------------+------------+-----------------+
73 | # s3_access_key | The access key for accessing s3 service. | String | s3_access_key |
74 | #----------------------+------------------------------------------------------------+------------+-----------------+
75 | # s3_secret_key | The secrey key for accessing s3 service. | String | s3_secret_key |
76 | #----------------------+------------------------------------------------------------+------------+-----------------+
77 | # s3_bucket | The s3 bucket name for store milvus's data. | String | s3_bucket |
78 | # | Note: please using differnet bucket for different milvus | | |
79 | # | cluster. | | |
80 | #----------------------+------------------------------------------------------------+------------+-----------------+
81 | storage:
82 | path: /var/lib/milvus
83 | auto_flush_interval: 1
84 |
85 |
86 | #----------------------+------------------------------------------------------------+------------+-----------------+
87 | # WAL Config | Description | Type | Default |
88 | #----------------------+------------------------------------------------------------+------------+-----------------+
89 | # enable | Whether to enable write-ahead logging (WAL) in Milvus. | Boolean | true |
90 | # | If WAL is enabled, Milvus writes all data changes to log | | |
91 | # | files in advance before implementing data changes. WAL | | |
92 | # | ensures the atomicity and durability for Milvus operations.| | |
93 | #----------------------+------------------------------------------------------------+------------+-----------------+
94 | # recovery_error_ignore| Whether to ignore logs with errors that happens during WAL | Boolean | false |
95 | # | recovery. If true, when Milvus restarts for recovery and | | |
96 | # | there are errors in WAL log files, log files with errors | | |
97 | # | are ignored. If false, Milvus does not restart when there | | |
98 | # | are errors in WAL log files. | | |
99 | #----------------------+------------------------------------------------------------+------------+-----------------+
100 | # buffer_size | Sum total of the read buffer and the write buffer in MBs. | Integer | 256 (MB) |
101 | # | buffer_size must be in range [64, 4096] (MB). | | |
102 | # | If the value you specified is out of range, Milvus | | |
103 | # | automatically uses the boundary value closest to the | | |
104 | # | specified value. It is recommended you set buffer_size to | | |
105 | # | a value greater than the inserted data size of a single | | |
106 | # | insert operation for better performance. | | |
107 | #----------------------+------------------------------------------------------------+------------+-----------------+
108 | # path | Location of WAL log files. | String | |
109 | #----------------------+------------------------------------------------------------+------------+-----------------+
110 | wal:
111 | enable: true
112 | recovery_error_ignore: false
113 | buffer_size: 256MB
114 | path: /var/lib/milvus/wal
115 |
116 | #----------------------+------------------------------------------------------------+------------+-----------------+
117 | # Cache Config | Description | Type | Default |
118 | #----------------------+------------------------------------------------------------+------------+-----------------+
119 | # cache_size | The size of CPU memory used for caching data for faster | Integer | 4 (GB) |
120 | # | query. The sum of 'cpu_cache_capacity' and | | |
121 | # | 'insert_buffer_size' must be less than system memory size. | | |
122 | #----------------------+------------------------------------------------------------+------------+-----------------+
123 | # insert_buffer_size | Buffer size used for data insertion. | Integer | 1 (GB) |
124 | # | The sum of 'insert_buffer_size' and 'cpu_cache_capacity' | | |
125 | # | must be less than system memory size. | | |
126 | #----------------------+------------------------------------------------------------+------------+-----------------+
127 | # preload_collection | A comma-separated list of collection names that need to | StringList | |
128 | # | be pre-loaded when Milvus server starts up. | | |
129 | # | '*' means preload all existing tables (single-quote or | | |
130 | # | double-quote required). | | |
131 | #----------------------+------------------------------------------------------------+------------+-----------------+
132 | cache:
133 | cache_size: 4GB
134 | insert_buffer_size: 1GB
135 | preload_collection:
136 |
137 | #----------------------+------------------------------------------------------------+------------+-----------------+
138 | # GPU Config | Description | Type | Default |
139 | #----------------------+------------------------------------------------------------+------------+-----------------+
140 | # enable | Enable GPU resources or not. | Boolean | false |
141 | #----------------------+------------------------------------------------------------+------------+-----------------+
142 | # cache_size | The size of GPU memory per card used for cache. | Integer | 1 (GB) |
143 | #----------------------+------------------------------------------------------------+------------+-----------------+
144 | # gpu_search_threshold | A Milvus performance tuning parameter. This value will be | Integer | 1000 |
145 | # | compared with 'nq' to decide if the search computation will| | |
146 | # | be executed on GPUs only. | | |
147 | # | If nq >= gpu_search_threshold, the search computation will | | |
148 | # | be executed on GPUs only; | | |
149 | # | if nq < gpu_search_threshold, the search computation will | | |
150 | # | be executed on CPUs only. | | |
151 | # | The SQ8H index is special, if nq < gpu_search_threshold, | | |
152 | # | the search will be executed on both CPUs and GPUs. | | |
153 | #----------------------+------------------------------------------------------------+------------+-----------------+
154 | # search_resources | The list of GPU devices used for search computation. | DeviceList | gpu0 |
155 | # | Must be in format gpux. | | |
156 | #----------------------+------------------------------------------------------------+------------+-----------------+
157 | # build_index_resources| The list of GPU devices used for index building. | DeviceList | gpu0 |
158 | # | Must be in format gpux. | | |
159 | #----------------------+------------------------------------------------------------+------------+-----------------+
160 | gpu:
161 | enable: false
162 | cache_size: 1GB
163 | gpu_search_threshold: 1000
164 | search_devices:
165 | - gpu0
166 | build_index_devices:
167 | - gpu0
168 |
169 | #----------------------+------------------------------------------------------------+------------+-----------------+
170 | # FPGA Config | Description | Type | Default |
171 | #----------------------+------------------------------------------------------------+------------+-----------------+
172 | # enable | Use FPGA devices or not. | Boolean | false |
173 | #----------------------+------------------------------------------------------------+------------+-----------------+
174 | # search_devices | The list of FPGA devices used for search computation. | DeviceList | fpga0 |
175 | # | Must be in format fpgax. | | |
176 | #----------------------+------------------------------------------------------------+------------+-----------------+
177 | fpga:
178 | enable: false
179 | search_devices:
180 | - fpga0
181 |
182 | #----------------------+------------------------------------------------------------+------------+-----------------+
183 | # Logs Config | Description | Type | Default |
184 | #----------------------+------------------------------------------------------------+------------+-----------------+
185 | # level | Log level in Milvus. Must be one of debug, info, warning, | String | debug |
186 | # | error, fatal | | |
187 | #----------------------+------------------------------------------------------------+------------+-----------------+
188 | # trace.enable | Whether to enable trace level logging in Milvus. | Boolean | true |
189 | #----------------------+------------------------------------------------------------+------------+-----------------+
190 | # path | Absolute path to the folder holding the log files. | String | |
191 | #----------------------+------------------------------------------------------------+------------+-----------------+
192 | # max_log_file_size | The maximum size of each log file, size range [512, 4096] | Integer | 1024 (MB) |
193 | #----------------------+------------------------------------------------------------+------------+-----------------+
194 | # log_rotate_num | The maximum number of log files that Milvus keeps for each | Integer | 0 |
195 | # | logging level, num range [0, 1024], 0 means unlimited. | | |
196 | #----------------------+------------------------------------------------------------+------------+-----------------+
197 | # log_to_stdout | Whether to write logs to standard output in Milvus. | Boolean | false |
198 | #----------------------+------------------------------------------------------------+------------+-----------------+
199 | # log_to_file | Whether to write logs to files in Milvus | Boolean | true |
200 | #----------------------+------------------------------------------------------------+------------+-----------------+
201 | logs:
202 | level: debug
203 | trace.enable: true
204 | path: /var/lib/milvus/logs
205 | max_log_file_size: 1024MB
206 | log_rotate_num: 0
207 | log_to_stdout: false
208 | log_to_file: true
209 |
210 | #----------------------+------------------------------------------------------------+------------+-----------------+
211 | # Metric Config | Description | Type | Default |
212 | #----------------------+------------------------------------------------------------+------------+-----------------+
213 | # enable | Enable monitoring function or not. | Boolean | false |
214 | #----------------------+------------------------------------------------------------+------------+-----------------+
215 | # address | Pushgateway address | IP | 127.0.0.1 +
216 | #----------------------+------------------------------------------------------------+------------+-----------------+
217 | # port | Pushgateway port, port range (1024, 65535) | Integer | 9091 |
218 | #----------------------+------------------------------------------------------------+------------+-----------------+
219 | metric:
220 | enable: false
221 | address: 127.0.0.1
222 | port: 9091
223 |
224 |
--------------------------------------------------------------------------------
/data/test/001_accordion_image_0001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/001_accordion_image_0001.jpg
--------------------------------------------------------------------------------
/data/test/002_anchor_image_0001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/002_anchor_image_0001.jpg
--------------------------------------------------------------------------------
/data/test/003_ant_image_0001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/003_ant_image_0001.jpg
--------------------------------------------------------------------------------
/data/test/2012_000015.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/test/2012_000015.jpg
--------------------------------------------------------------------------------
/data/train/001_accordion_image_0002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/001_accordion_image_0002.jpg
--------------------------------------------------------------------------------
/data/train/001_accordion_image_0003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/001_accordion_image_0003.jpg
--------------------------------------------------------------------------------
/data/train/002_anchor_image_0002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/002_anchor_image_0002.jpg
--------------------------------------------------------------------------------
/data/train/002_anchor_image_0003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/002_anchor_image_0003.jpg
--------------------------------------------------------------------------------
/data/train/002_anchor_image_0004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/002_anchor_image_0004.jpg
--------------------------------------------------------------------------------
/data/train/003_ant_image_0002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/003_ant_image_0002.jpg
--------------------------------------------------------------------------------
/data/train/003_ant_image_0003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/003_ant_image_0003.jpg
--------------------------------------------------------------------------------
/data/train/2012_000003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000003.jpg
--------------------------------------------------------------------------------
/data/train/2012_000004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000004.jpg
--------------------------------------------------------------------------------
/data/train/2012_000007.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000007.jpg
--------------------------------------------------------------------------------
/data/train/2012_000010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000010.jpg
--------------------------------------------------------------------------------
/data/train/2012_000014.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000014.jpg
--------------------------------------------------------------------------------
/data/train/2012_000162.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000162.jpg
--------------------------------------------------------------------------------
/data/train/2012_000166.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000166.jpg
--------------------------------------------------------------------------------
/data/train/2012_000168.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000168.jpg
--------------------------------------------------------------------------------
/data/train/2012_000169.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_000169.jpg
--------------------------------------------------------------------------------
/data/train/2012_001337.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001337.jpg
--------------------------------------------------------------------------------
/data/train/2012_001339.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001339.jpg
--------------------------------------------------------------------------------
/data/train/2012_001341.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001341.jpg
--------------------------------------------------------------------------------
/data/train/2012_001344.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001344.jpg
--------------------------------------------------------------------------------
/data/train/2012_001346.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001346.jpg
--------------------------------------------------------------------------------
/data/train/2012_001347.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_001347.jpg
--------------------------------------------------------------------------------
/data/train/2012_002049.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002049.jpg
--------------------------------------------------------------------------------
/data/train/2012_002050.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002050.jpg
--------------------------------------------------------------------------------
/data/train/2012_002051.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002051.jpg
--------------------------------------------------------------------------------
/data/train/2012_002056.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/data/train/2012_002056.jpg
--------------------------------------------------------------------------------
/docker-compose-devcontainer.yml:
--------------------------------------------------------------------------------
1 | version: '3.5'
2 |
3 | services:
4 | image-retrieval:
5 | container_name: image-retrieval
6 | image: liyaodev/image-retrieval-env:v1.0.0
7 | # 本地构建
8 | build:
9 | context: .
10 | dockerfile: ./Dockerfile
11 | ports:
12 | - 8888:8888
13 | environment:
14 | ES_HOST: ${ES_HOST}
15 | ES_PORT: ${ES_PORT}
16 | MILVUS_HOST: ${MILVUS_HOST}
17 | MILVUS_PORT: ${MILVUS_PORT}
18 | volumes:
19 | - .:/www/server:delegated
20 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/vscode-extensions:/www/.vscode-server:delegated
21 | depends_on:
22 | - es
23 | - milvus
24 | - milvus-em
25 |
26 | es:
27 | container_name: image-retrieval-es
28 | image: docker.elastic.co/elasticsearch/elasticsearch:7.5.0
29 | environment:
30 | - discovery.type=single-node
31 | ports:
32 | - 9200:9200
33 | - 9300:9300
34 | volumes:
35 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/es:/usr/share/elasticsearch/data
36 |
37 | milvus:
38 | container_name: image-retrieval-milvus
39 | image: milvusdb/milvus:1.1.1-cpu-d061621-330cc6
40 | ports:
41 | - 19530:19530
42 | - 19121:19121
43 | volumes:
44 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/conf:/var/lib/milvus/conf
45 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/milvus/db:/var/lib/milvus/db
46 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/milvus/logs:/var/lib/milvus/logs
47 | - ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/milvus/wal:/var/lib/milvus/wal
48 |
49 | milvus-em:
50 | container_name: image-retrieval-milvus-em
51 | image: milvusdb/milvus-em:v0.4.2
52 | environment:
53 | - API_URL=http://${MILVUS_HOST}:19121
54 | ports:
55 | - 3000:80
56 |
57 | networks:
58 | default:
59 | name: image_retrieval_dev
60 |
--------------------------------------------------------------------------------
/docs/build.md:
--------------------------------------------------------------------------------
1 |
2 | ## 手动构建环境
3 |
4 | ### 基础环境安装
5 |
6 | Python版本:3.8.12
7 |
8 | ```shell
9 | pip install -r requirements.txt
10 | ```
11 |
12 | ### ES服务端安装
13 |
14 | ```shell
15 | docker run -it -d -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:7.5.0
16 | ```
17 |
18 | ### Milvus服务端安装
19 |
20 | 安装指南:https://milvus.io/cn/docs/v1.1.1/milvus_docker-cpu.md
21 | 下载配置
22 |
23 | ```shell
24 | mkdir -p milvus/conf && cd milvus/conf
25 | wget https://raw.githubusercontent.com/milvus-io/milvus/v1.1.1/core/conf/demo/server_config.yaml
26 | ```
27 |
28 | 服务启动
29 |
30 | ```shell
31 | docker run -d --name milvus_cpu_1.1.1 \
32 | -p 19530:19530 \
33 | -p 19121:19121 \
34 | -v /milvus/db:/var/lib/milvus/db \
35 | -v /milvus/conf:/var/lib/milvus/conf \
36 | -v /milvus/logs:/var/lib/milvus/logs \
37 | -v /milvus/wal:/var/lib/milvus/wal \
38 | milvusdb/milvus:1.1.1-cpu-d061621-330cc6
39 | ```
40 |
--------------------------------------------------------------------------------
/index.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import h5py
4 | import argparse
5 | import numpy as np
6 | from service.vggnet import VGGNet
7 | import os
8 | import sys
9 | from os.path import dirname
10 | BASE_DIR = dirname(os.path.abspath(__file__))
11 | sys.path.append(BASE_DIR)
12 |
13 | def get_imlist(path):
14 | return [os.path.join(path,f) for f in os.listdir(path) if f.endswith('.jpg')]
15 |
16 |
17 | if __name__ == "__main__":
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument("--train_data", type=str, default=os.path.join(BASE_DIR, 'data', 'train'), help="train data path.")
20 | parser.add_argument("--index_file", type=str, default=os.path.join(BASE_DIR, 'index', 'train.h5'), help="index file path.")
21 | args = vars(parser.parse_args())
22 | img_list = get_imlist(args["train_data"])
23 | print("--------------------------------------------------")
24 | print(" feature extraction starts")
25 | print("--------------------------------------------------")
26 | feats = []
27 | names = []
28 | model = VGGNet()
29 | for i, img_path in enumerate(img_list):
30 | norm_feat = model.vgg_extract_feat(img_path)
31 | img_name = os.path.split(img_path)[1]
32 | feats.append(norm_feat)
33 | names.append(img_name)
34 | print("extracting feature from image No. %d , %d images in total" %((i+1), len(img_list)))
35 | feats = np.array(feats)
36 | print("--------------------------------------------------")
37 | print(" writing feature extraction results")
38 | print("--------------------------------------------------")
39 | h5f = h5py.File(args["index_file"], 'w')
40 | h5f.create_dataset('dataset_1', data = feats)
41 | h5f.create_dataset('dataset_2', data = np.string_(names))
42 | h5f.close()
43 |
--------------------------------------------------------------------------------
/index/train.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/index/train.h5
--------------------------------------------------------------------------------
/pic/system_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liyaodev/image-retrieval/c8bcaf2e4c9bbe47618af08ccf83fba24d7b95fb/pic/system_arch.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.19.5
2 | keras==2.4.3
3 | tensorflow==2.4.1
4 | pillow==8.1.0
5 | elasticsearch==7.11.0
6 | pymilvus==1.1.2
7 | faiss-cpu==1.7.0
8 |
--------------------------------------------------------------------------------
/retrieval.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import argparse
4 | from service.vggnet import VGGNet
5 | from service.numpy_retrieval import NumpyRetrieval
6 | from service.faiss_retrieval import FaissRetrieval
7 | from service.es_retrieval import ESRetrieval
8 | from service.milvus_retrieval import MilvusRetrieval
9 | import os
10 | import sys
11 | from os.path import dirname
12 | BASE_DIR = dirname(os.path.abspath(__file__))
13 | sys.path.append(BASE_DIR)
14 |
15 |
16 | class RetrievalEngine(object):
17 |
18 | def __init__(self, index_file, db_name):
19 | self.index_file = index_file
20 | self.db_name = db_name
21 | self.numpy_r = self.faiss_r = self.es_r = self.milvus_r = None
22 |
23 | def get_method(self, m_name):
24 | m_name = "%s_handler" % str(m_name)
25 | method = getattr(self, m_name, self.default_handler)
26 | return method
27 |
28 | def numpy_handler(self, query_vector, req_id=None):
29 | # numpy计算
30 | if self.numpy_r is None:
31 | self.numpy_r = NumpyRetrieval(self.index_file)
32 | return self.numpy_r.retrieve(query_vector)
33 |
34 | def faiss_handler(self, query_vector, req_id=None):
35 | # faiss计算
36 | if self.faiss_r is None:
37 | self.faiss_r = FaissRetrieval(self.index_file)
38 | return self.faiss_r.retrieve(query_vector)
39 |
40 | def es_handler(self, query_vector, req_id=None):
41 | # es计算
42 | if self.es_r is None:
43 | self.es_r = ESRetrieval(self.db_name, self.index_file)
44 | return self.es_r.retrieve(query_vector)
45 |
46 | def milvus_handler(self, query_vector, req_id=None):
47 | # milvus计算
48 | if self.milvus_r is None:
49 | self.milvus_r = MilvusRetrieval(self.db_name, self.index_file)
50 | return self.milvus_r.retrieve(query_vector)
51 |
52 | def default_handler(self, query_vector, req_id=None):
53 | return []
54 |
55 |
56 | if __name__ == '__main__':
57 | parser = argparse.ArgumentParser()
58 | parser.add_argument("--test_data", type=str, default=os.path.join(BASE_DIR, 'data', 'test', '001_accordion_image_0001.jpg'), help="test data path.")
59 | parser.add_argument("--index_file", type=str, default=os.path.join(BASE_DIR, 'index', 'train.h5'), help="index file path.")
60 | parser.add_argument("--db_name", type=str, default='image_retrieval', help="database name.")
61 | parser.add_argument("--engine", type=str, default='numpy', help="retrieval engine.")
62 | args = vars(parser.parse_args())
63 | # 1.图片推理
64 | model = VGGNet()
65 | query_vector = model.vgg_extract_feat(args["test_data"])
66 | # 2.图片检索
67 | re = RetrievalEngine(args["index_file"], args["db_name"])
68 | result = re.get_method(args["engine"])(query_vector, None)
69 | print(result)
70 |
71 |
--------------------------------------------------------------------------------
/scripts/clean.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 |
--------------------------------------------------------------------------------
/scripts/devcontainer.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ "${1-}" = "up" ]; then
4 | mkdir -p "${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes/vscode-extensions"
5 | chmod -R 777 "${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes"
6 |
7 | docker-compose -f ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/docker-compose-devcontainer.yml up -d
8 | fi
9 |
10 | if [ "${1-}" = "down" ]; then
11 | docker-compose -f ${IMAGE_RETRIEVAL_ROOT_DIR:-.}/docker-compose-devcontainer.yml down
12 | rm -rf "${IMAGE_RETRIEVAL_ROOT_DIR:-.}/volumes"
13 | fi
14 |
--------------------------------------------------------------------------------
/service/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/service/es_retrieval.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import h5py
5 | import numpy as np
6 | from elasticsearch import Elasticsearch
7 | from elasticsearch.helpers import bulk
8 |
9 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85')) # 检索阈值
10 | INDEX_TABLE = {
11 | "settings": {
12 | "number_of_shards": 2,
13 | "number_of_replicas": 1
14 | },
15 | "mappings": {
16 | "dynamic": "true",
17 | "_source": {
18 | "enabled": "true"
19 | },
20 | "properties": {
21 | "image_vector": {
22 | "type": "dense_vector",
23 | "dims": 512
24 | },
25 | "id": {
26 | "type": "keyword"
27 | },
28 | "name": {
29 | "type": "keyword"
30 | }
31 | }
32 | }
33 | }
34 |
35 |
36 | class ESRetrieval(object):
37 | def __init__(self, index_name, index_dir,
38 | host=os.environ.get("ES_HOST", "127.0.0.1"),
39 | port=os.environ.get("ES_PORT", 9200)):
40 | self.index_name = index_name
41 | self.client = Elasticsearch([host])
42 | self.load(index_dir)
43 |
44 | def load(self, index_dir):
45 | def index_batch(docs):
46 | requests = []
47 | for i, doc in enumerate(docs):
48 | request = doc
49 | request["_op_type"] = "index"
50 | request["_index"] = self.index_name
51 | requests.append(request)
52 | bulk(self.client, requests)
53 | # 1. 读取索引
54 | h5f = h5py.File(index_dir, 'r')
55 | self.retrieval_db = h5f['dataset_1'][:]
56 | self.retrieval_name = h5f['dataset_2'][:]
57 | h5f.close()
58 | # 2. 入库ES
59 | r_list = []
60 | for i, val in enumerate(self.retrieval_name):
61 | temp = {
62 | 'id': i,
63 | 'name': str(val),
64 | 'image_vector': self.retrieval_db[i].tolist()
65 | }
66 | r_list.append(temp)
67 | self.client.indices.delete(index=self.index_name, ignore=[404])
68 | self.client.indices.create(index=self.index_name, body=INDEX_TABLE)
69 | docs = []
70 | count = 0
71 | batch_size = 1000
72 | for doc in r_list:
73 | docs.append(doc)
74 | count += 1
75 | if count % batch_size == 0:
76 | index_batch(docs)
77 | docs = []
78 | if docs:
79 | index_batch(docs)
80 | self.client.indices.refresh(index=self.index_name)
81 | print("************* Done es indexing, Indexed {} documents *************".format(len(self.retrieval_db)))
82 |
83 | def retrieve(self, query_vector, search_size=3):
84 |
85 | # script_query = {
86 | # "script_score": {
87 | # "query": {"match_all": {}},
88 | # "script": {
89 | # "source": "cosineSimilarity(params.query_vector, doc['image_vector']) + 1.0",
90 | # "params": {"query_vector": query_vector}
91 | # }
92 | # }
93 | # }
94 |
95 | # script_query = {
96 | # "script_score": {
97 | # "query": {"match_all": {}},
98 | # "script": {
99 | # "source": """
100 | # double value = dotProduct(params.query_vector, doc['image_vector']);
101 | # return sigmoid(1, Math.E, -value);
102 | # """,
103 | # "params": {"query_vector": query_vector}
104 | # }
105 | # }
106 | # }
107 |
108 | # script_query = {
109 | # "script_score": {
110 | # "query": {"match_all": {}},
111 | # "script": {
112 | # "source": "1 / (1 + l1norm(params.queryVector, doc['image_vector']))",
113 | # "params": {
114 | # "queryVector": query_vector
115 | # }
116 | # }
117 | # }
118 | # }
119 |
120 | # script_query = {
121 | # "script_score": {
122 | # "query": {"match_all": {}},
123 | # "script": {
124 | # "source": "1 / (1 + l2norm(params.queryVector, doc['image_vector']))",
125 | # "params": {
126 | # "queryVector": query_vector
127 | # }
128 | # }
129 | # }
130 | # }
131 |
132 | script_query = {
133 | "script_score": {
134 | "query": {"match_all": {}},
135 | "script": {
136 | "source": """
137 | double value = doc['image_vector'].size() == 0 ? 0 : dotProduct(params.query_vector, doc['image_vector']);
138 | return value;
139 | """,
140 | "params": {"query_vector": query_vector}
141 | }
142 | }
143 | }
144 | response = self.client.search(
145 | index=self.index_name,
146 | body={
147 | "size": search_size,
148 | "query": script_query,
149 | "_source": {"includes": ["id", "name", "face_vector"]}
150 | }
151 | )
152 | r_list = []
153 | for hit in response["hits"]["hits"]:
154 | score = float(hit['_score']) * 0.5 + 0.5
155 | name = hit['_source']["name"]
156 | if name.encode("utf-8") and score > THRESHOLD:
157 | temp = {
158 | "id": hit['_source']["id"],
159 | "name": name,
160 | "score": round(score, 6)
161 | }
162 | r_list.append(temp)
163 |
164 | return r_list
165 |
--------------------------------------------------------------------------------
/service/faiss_retrieval.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import h5py
5 | import numpy as np
6 | import faiss
7 |
8 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85')) # 检索阈值
9 |
10 |
11 | class FaissRetrieval(object):
12 | def __init__(self, index_dir, emb_size=512):
13 | self.emb_size = emb_size
14 | self.load(index_dir)
15 |
16 | def load(self, index_dir):
17 | # 1.读取索引
18 | h5f = h5py.File(index_dir, 'r')
19 | self.retrieval_db = h5f['dataset_1'][:]
20 | self.retrieval_name = h5f['dataset_2'][:]
21 | h5f.close()
22 | # 2. 加载faiss
23 | self.retrieval_db = np.asarray(self.retrieval_db).astype(np.float32)
24 | self.index = faiss.IndexFlatIP(self.emb_size)
25 | # self.index.train(self.retrieval_db)
26 | self.index.add(self.retrieval_db)
27 | print("************* Done faiss indexing, Indexed {} documents *************".format(len(self.retrieval_db)))
28 |
29 | def retrieve(self, query_vector, search_size=3):
30 | score_list, index_list = self.index.search(np.array([query_vector]).astype(np.float32), search_size)
31 | r_list = []
32 | for i, val in enumerate(index_list[0]):
33 | name = self.retrieval_name[int(val)]
34 | score = float(score_list[0][i]) * 0.5 + 0.5
35 | if score > THRESHOLD:
36 | temp = {
37 | "name": name,
38 | "score": round(score, 6)
39 | }
40 | r_list.append(temp)
41 |
42 | return r_list
43 |
--------------------------------------------------------------------------------
/service/milvus_retrieval.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import h5py
5 | import numpy as np
6 | from pprint import pprint
7 | from milvus import Milvus, IndexType, MetricType
8 |
9 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85')) # 检索阈值
10 |
11 | class MilvusRetrieval(object):
12 | def __init__(self, index_name, index_dir,
13 | host=os.environ.get("MILVUS_HOST", "127.0.0.1"),
14 | port=os.environ.get("MILVUS_PORT", 19530)):
15 | self.client = Milvus(host, port)
16 | self.index_name = index_name
17 | self.load(index_dir)
18 |
19 | def load(self, index_dir):
20 | # 1. 读取索引
21 | h5f = h5py.File(index_dir, 'r')
22 | self.retrieval_db = h5f['dataset_1'][:]
23 | self.retrieval_name = h5f['dataset_2'][:]
24 | h5f.close()
25 | # 2. 入库Milvus
26 | if self.index_name in self.client.list_collections()[1]:
27 | self.client.drop_collection(collection_name=self.index_name)
28 | self.client.create_collection({'collection_name': self.index_name, 'dimension': 512, 'index_file_size': 1024, 'metric_type': MetricType.IP})
29 | self.id_dict = {}
30 | status, ids = self.client.insert(collection_name=self.index_name, records=[i.tolist() for i in self.retrieval_db])
31 | for i, val in enumerate(self.retrieval_name):
32 | self.id_dict[ids[i]] = str(val)
33 | self.client.create_index(self.index_name, IndexType.FLAT, {'nlist': 16384})
34 | # pprint(self.client.get_collection_info(self.index_name))
35 | print("************* Done milvus indexing, Indexed {} documents *************".format(len(self.retrieval_db)))
36 |
37 | def retrieve(self, query_vector, search_size=3):
38 | r_list = []
39 | _, vectors = self.client.search(collection_name=self.index_name, query_records=[query_vector], top_k=search_size, params={'nprobe': 16})
40 | for v in vectors[0]:
41 | score = float(v.distance) * 0.5 + 0.5
42 | if score > THRESHOLD:
43 | temp = {
44 | "id": v.id,
45 | "name": self.id_dict[v.id],
46 | "score": round(score, 6)
47 | }
48 | r_list.append(temp)
49 |
50 | return r_list
51 |
--------------------------------------------------------------------------------
/service/numpy_retrieval.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import h5py
5 | import numpy as np
6 |
7 | THRESHOLD = float(os.environ.get('THRESHOLD', '0.85')) # 检索阈值
8 |
9 |
10 | class NumpyRetrieval(object):
11 | def __init__(self, index_dir, emb_size=512):
12 | self.emb_size = emb_size
13 | self.load(index_dir)
14 |
15 | def load(self, index_dir):
16 | h5f = h5py.File(index_dir, 'r')
17 | self.retrieval_db = h5f['dataset_1'][:]
18 | self.retrieval_name = h5f['dataset_2'][:]
19 | h5f.close()
20 | print("************* Done numpy indexing, Indexed {} documents *************".format(len(self.retrieval_db)))
21 |
22 | def retrieve(self, query_vector, search_size=3):
23 | distance_db = np.dot(query_vector, self.retrieval_db.T)
24 | optinal_dis = np.argsort(-distance_db.T)
25 |
26 | r_list = []
27 | for i in optinal_dis[:search_size]:
28 | name = self.retrieval_name[i]
29 | score = float(distance_db[i]) * 0.5 + 0.5
30 | if score > THRESHOLD:
31 | temp = {
32 | "name": name,
33 | "score": round(score, 6)
34 | }
35 | r_list.append(temp)
36 |
37 | return r_list
38 |
--------------------------------------------------------------------------------
/service/vggnet.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import numpy as np
4 | from keras.applications.vgg16 import VGG16
5 | from keras.applications.vgg16 import preprocess_input as preprocess_input_vgg
6 | from keras.preprocessing import image
7 | from numpy import linalg as LA
8 |
9 |
10 | class VGGNet(object):
11 | def __init__(self):
12 | self.input_shape = (224, 224, 3)
13 | self.weight = 'imagenet'
14 | self.pooling = 'max'
15 | self.model_vgg = VGG16(weights=self.weight,
16 | input_shape=(self.input_shape[0], self.input_shape[1], self.input_shape[2]),
17 | pooling=self.pooling,
18 | include_top=False)
19 | self.model_vgg.predict(np.zeros((1, 224, 224, 3)))
20 |
21 | def vgg_extract_feat(self, img_path):
22 | img = image.load_img(img_path, target_size=(self.input_shape[0], self.input_shape[1]))
23 | img = image.img_to_array(img)
24 | img = np.expand_dims(img, axis=0)
25 | img = preprocess_input_vgg(img)
26 | feat = self.model_vgg.predict(img)
27 | norm_feat = feat[0] / LA.norm(feat[0])
28 | norm_feat = [i.item() for i in norm_feat]
29 | return norm_feat
30 |
--------------------------------------------------------------------------------