├── .github
└── workflows
│ └── go.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── config.yaml
├── core
├── initialize.go
└── parser.go
├── docs
├── TODO.md
├── api.md
├── compile.md
├── config.md
├── example.md
├── images
│ ├── img1.png
│ ├── img2.png
│ └── index.png
├── index.md
├── release.md
├── storage.md
└── test.md
├── global
├── config.go
└── global.go
├── go.mod
├── go.sum
├── gofound.d
├── gofound.sh
├── main.go
├── sdk
├── SDK 设计指南.md
├── base.go
├── client.go
├── database.go
├── index.go
└── word.go
├── searcher
├── arrays
│ └── arrays.go
├── container.go
├── container_test.go
├── engine.go
├── model
│ ├── doc.go
│ ├── item.go
│ └── search.go
├── pagination
│ ├── page_test.go
│ └── pagination.go
├── sorts
│ ├── fast.go
│ └── sort.go
├── storage
│ ├── leveldb_storage.go
│ └── leveldb_test.go
├── system
│ ├── cpu.go
│ ├── cpu_test.go
│ ├── disk.go
│ ├── disk_test.go
│ ├── mem.go
│ ├── mem_test.go
│ └── utils.go
├── utils
│ └── utils.go
└── words
│ ├── data
│ └── dictionary.txt
│ └── tokenizer.go
├── tests
├── array_test.go
├── benchmark
│ ├── array_test.go
│ ├── skiplist_test.go
│ └── utils.go
├── chan_test.go
├── func_test.go
├── http
│ ├── cut.http
│ ├── dump.http
│ ├── index.http
│ ├── index2.http
│ ├── query.http
│ └── status.http
├── index_test.go
├── merge_test.go
├── sort.md
├── sort_test.go
├── thread_test.go
├── time_test.go
└── word_test.go
└── web
├── admin
├── admin.go
└── assets
│ ├── assets.go
│ └── web
│ ├── .gitignore
│ ├── README.md
│ ├── dist
│ ├── assets
│ │ ├── dashboard.05738d5c.js
│ │ ├── dashboard.3b82ec2b.css
│ │ ├── document.03ee9c1a.js
│ │ ├── document.ad0de346.css
│ │ ├── index.47e321f4.js
│ │ ├── index.c3f99036.css
│ │ ├── status.2e4cf3b3.css
│ │ └── status.3af7fc63.js
│ ├── favicon.ico
│ └── index.html
│ ├── index.html
│ ├── package-lock.json
│ ├── package.json
│ ├── public
│ └── favicon.ico
│ ├── src
│ ├── App.vue
│ ├── api.js
│ ├── assets
│ │ └── logo.png
│ ├── components
│ │ ├── CPU.vue
│ │ ├── Disk.vue
│ │ ├── GC.vue
│ │ ├── Icon.vue
│ │ ├── IndexDialog.vue
│ │ ├── Memory.vue
│ │ ├── Menu.vue
│ │ ├── ProgressChat.vue
│ │ └── Runtime.vue
│ ├── main.js
│ ├── menus.js
│ ├── router.js
│ └── views
│ │ ├── dashboard.vue
│ │ ├── document.vue
│ │ └── status.vue
│ ├── vite.config.js
│ └── yarn.lock
├── controller
├── base.go
├── database.go
├── index.go
├── response.go
├── services.go
└── word.go
├── middleware
├── cors.go
└── exception.go
├── result.go
├── router
├── base.go
├── database.go
├── index.go
├── router.go
└── word.go
└── service
├── base.go
├── database.go
├── index.go
├── service.go
└── word.go
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
1 | name: Go
2 |
3 | on:
4 | push:
5 | tags:
6 | - "v*"
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v3
13 | - uses: actions/setup-node@v3
14 | with:
15 | node-version: 14
16 | - run: npm install --prefix=./web/admin/assets/web --force
17 | - run: npm run build --prefix=./web/admin/assets/web --force
18 |
19 | - name: Set up Go
20 | uses: actions/setup-go@v3
21 | with:
22 | go-version: 1.18
23 |
24 | - name: Install dependencies
25 | run: go get -v -t -d ./...
26 |
27 | - name: Build-Macos
28 | run: GOOS=darwin GOARCH=amd64 go build -o ./dist/gofound_macos_amd64
29 |
30 | - name: Build-Macos-Arm64
31 | run: GOOS=darwin GOARCH=arm64 go build -o ./dist/gofound_macos_apple_silicon
32 |
33 | - name: Build-Windows-x64
34 | run: GOOS=windows GOARCH=amd64 go build -o ./dist/gofound_windows_amd64.exe
35 |
36 | - name: Build-Windows-AMR64
37 | run: GOOS=windows GOARCH=arm64 go build -o ./dist/gofound_windows_arm64.exe
38 |
39 | - name: Build-Linux-x64
40 | run: GOOS=linux GOARCH=amd64 go build -o ./dist/gofound_linux_amd64
41 |
42 | - name: Build-Linux-AMR64
43 | run: GOOS=linux GOARCH=arm64 go build -o ./dist/gofound_linux_arm64
44 |
45 | - uses: "marvinpinto/action-automatic-releases@latest"
46 | with:
47 | repo_token: "${{ secrets.GITHUB_TOKEN }}"
48 | prerelease: false
49 | files: |
50 | ./dist/*
51 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | gofound
3 | /gofound
4 | /tmp/
5 | /index/
6 | /.idea/
7 | /*/*.bin
8 | /cache
9 | /tests/index
10 | /data
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM golang:1.18 as builder
2 |
3 | ENV GO111MODULE=on \
4 | GOPROXY=https://goproxy.io
5 |
6 | COPY . /app
7 | WORKDIR /app
8 |
9 | RUN go get && go build -ldflags="-s -w" -installsuffix cgo
10 |
11 | FROM debian:buster-slim
12 |
13 | ENV TZ=Asia/Shanghai \
14 | LANG=C.UTF-8 \
15 | APP_DIR=/usr/local/go_found
16 |
17 | COPY --from=builder /app/gofound ${APP_DIR}/gofound
18 |
19 | WORKDIR ${APP_DIR}
20 |
21 | RUN ln -snf /usr/share/zoneinfo/${TZ} /etc/localtime \
22 | && echo ${TZ} > /etc/timezone \
23 | && chmod +x gofound
24 |
25 | EXPOSE 5678
26 |
27 | CMD ["./gofound"]
28 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GoFound
2 |
3 | `GoFound` 一个golang实现的全文检索引擎,支持持久化和单机亿级数据毫秒级查找。
4 |
5 | 接口可以通过http调用。
6 |
7 | 详见 [API文档](./docs/api.md)
8 |
9 | ## 文档
10 |
11 | + [示例](./docs/example.md)
12 | + [API文档](./docs/api.md)
13 | + [索引原理](./docs/index.md)
14 | + [配置文档](./docs/config.md)
15 | + [持久化](./docs/storage.md)
16 | + [编译部署](./docs/compile.md)
17 |
18 | ## 在线体验
19 |
20 | > Simple社区使用的GoFound,可以直接模糊搜索相关帖子
21 |
22 | [在线体验](https://simpleui.72wo.com/search/simpleui)
23 |
24 | ## GoFound在线管理后台Demo
25 | [http://119.29.69.50:5678/admin](http://119.29.69.50:5678/admin)
26 |
27 | 
28 |
29 | 
30 |
31 | ## QQ交流群
32 |
33 | [556102631](https://qm.qq.com/cgi-bin/qm/qr?k=4OvO7bgRAhSLX0J2WXVbCWbY7hL7gMYd&jump_from=webapi)
34 |
35 | ## 二进制文件下载
36 |
37 | > 支持Windows、Linux、macOS、(amd64和arm64)和苹果M1 处理器
38 |
39 | [点击下载](https://github.com/newpanjing/gofound/releases)
40 |
41 | ## 技术栈
42 |
43 | + 二分法查找
44 | + 快速排序法
45 | + 倒排索引
46 | + 正排索引
47 | + 文件分片
48 | + golang-jieba分词
49 | + leveldb
50 |
51 | ### 为何要用golang实现一个全文检索引擎?
52 |
53 | + 正如其名,`GoFound`去探索全文检索的世界,一个小巧精悍的全文检索引擎,支持持久化和单机亿级数据毫秒级查找。
54 |
55 | + 传统的项目大多数会采用`ElasticSearch`来做全文检索,因为`ElasticSearch`够成熟,社区活跃、资料完善。缺点就是配置繁琐、基于JVM对内存消耗比较大。
56 |
57 | + 所以我们需要一个更高效的搜索引擎,而又不会消耗太多的内存。 以最低的内存达到全文检索的目的,相比`ElasticSearch`,`gofound`是原生编译,会减少系统资源的消耗。而且对外无任何依赖。
58 |
59 | ## 安装和启动
60 |
61 | > 下载好源码之后,进入到源码目录,执行下列两个命令
62 | >
63 |
64 | + 编译
65 |
66 | > 直接下载 [可执行文件](https://github.com/newpanjing/gofound/releases) 可以不用编译,省去这一步。
67 |
68 | ```shell
69 | go get && go build
70 | ```
71 |
72 | + 启动
73 |
74 | ```shell
75 | ./gofound --addr=:8080 --data=./data
76 | ```
77 |
78 | + docker部署
79 |
80 | ```shell
81 | docker build -t gofound .
82 | docker run -d --name gofound -p 5678:5678 -v /mnt/data/gofound:/usr/local/go_found/data gofound:latest
83 | ```
84 |
85 | + 其他命令
86 | 参考 [配置文档](./docs/config.md)
87 |
88 | ## 多语言SDK
89 |
90 | > 使用gofound的多语言SDK,可以在不同语言中使用gofound。但是请注意,版本号与gofound需要一致。主版本和子版本号,修订版不一致不影响。
91 |
92 | [Java](https://github.com/newpanjing/gofound-java)
93 |
94 | [Python](https://github.com/newpanjing/gofound-python)
95 |
96 | [Node.js](https://github.com/newpanjing/gofound-nodejs)
97 |
98 | 其他语言的SDK,正在陆续完善中。也可以直接通过[API文档](./docs/api.md)用HTTP请求实现。
99 |
100 | ## 和ES比较
101 |
102 | | ES | GoFound |
103 | |-------------|-----------------------|
104 | | 支持持久化 | 支持持久化 |
105 | | 基于内存索引 | 基于磁盘+内存缓存 |
106 | | 需要安装JDK | 原生二进制,无外部依赖 |
107 | | 需要安装第三方分词插件 | 自带中文分词和词库 |
108 | | 默认没有可视化管理界面 | 自带可视化管理界面 |
109 | | 内存占用大 | 基于Golang原生可执行文件,内存非常小 |
110 | | 配置复杂 | 默认可以不加任何参数启动,并且提供少量配置 |
111 |
112 |
113 | ## 待办
114 |
115 | [TODO](docs/TODO.md)
116 |
117 | ## 使用GoFound的用户
118 |
119 | [Simple社区](https://simpleui.72wo.com)| [贝塔博客](https://www.88cto.com) | [Book360](https://www.book360.cn)
120 |
121 | [深圳市十二点科技有限公司](https://www.72wo.com)|[深圳市恒一博科技有限公司](http://www.hooebo.com)
122 |
123 | [西安易神网络信息系统服务有限公司](http://www.hansonvip.com/)
124 |
125 | [影视资源搜索](https://movie.ipip.icu)|[酷易物联](https://cooleiot.tech)|[French博客](https://hoime.cn/)
126 |
127 | [好咪二次元之家](http://hoime.space)
128 |
129 | ## 发布日志
130 |
131 | [发布日志](https://github.com/newpanjing/gofound/releases)
132 |
133 | ## 开发成员
134 | |姓名|联系方式|贡献部分|
135 | |---|---|---|
136 | |[newpanjing](https://github.com/newpanjing)|newpanjing@icloud.com|负责人、引擎、UI|
137 | |[XiaoK29](https://github.com/XiaoK29)|-|引擎、接口|
138 | |[nightzjp](https://github.com/nightzjp)|-|引擎|
139 | |[xiao luobei](https://github.com/liu-cn)|-|引擎|
140 |
--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
1 | #监听地址
2 | addr: 0.0.0.0:5678
3 |
4 | #数据目录
5 | data: ./data
6 | #词典目录
7 | dictionary: ./data/dictionary.txt
8 |
9 | #是否启用admin
10 | enableAdmin: true
11 |
12 | # 最大线程数
13 | gomaxprocs: 4
14 |
15 | # admin 用户名和密码
16 | auth: admin:123456
17 |
18 | # 接口是否开启压缩
19 | enableGzip: true
20 |
21 | # 数据库关闭超时时间
22 | timeout: 600
23 |
24 | # 分片数量
25 | shard: 10
26 |
27 | # 分片缓冲数量
28 | bufferNum: 1000
--------------------------------------------------------------------------------
/core/initialize.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "github.com/sea-team/gofound/global"
7 | "github.com/sea-team/gofound/searcher"
8 | "github.com/sea-team/gofound/searcher/words"
9 | "github.com/sea-team/gofound/web/controller"
10 | "github.com/sea-team/gofound/web/router"
11 | "log"
12 | "net/http"
13 |
14 | //_ "net/http/pprof"
15 | "os"
16 | "os/signal"
17 |
18 | //"runtime"
19 | "syscall"
20 | "time"
21 | )
22 |
23 | // NewContainer 创建一个容器
24 | func NewContainer(tokenizer *words.Tokenizer) *searcher.Container {
25 | container := &searcher.Container{
26 | Dir: global.CONFIG.Data,
27 | Debug: global.CONFIG.Debug,
28 | Tokenizer: tokenizer,
29 | Shard: global.CONFIG.Shard,
30 | Timeout: global.CONFIG.Timeout,
31 | BufferNum: global.CONFIG.BufferNum,
32 | }
33 | if err := container.Init(); err != nil {
34 | panic(err)
35 | }
36 |
37 | return container
38 | }
39 |
40 | func NewTokenizer(dictionaryPath string) *words.Tokenizer {
41 | return words.NewTokenizer(dictionaryPath)
42 | }
43 |
44 | // Initialize 初始化
45 | func Initialize() {
46 |
47 | //runtime.SetMutexProfileFraction(1) // 开启对锁调用的跟踪
48 | //runtime.SetBlockProfileRate(1) // 开启对阻塞操作的跟踪
49 |
50 | //go func() { http.ListenAndServe("0.0.0.0:6060", nil) }()
51 |
52 | global.CONFIG = Parser()
53 |
54 | if !global.CONFIG.Debug {
55 | log.SetOutput(os.Stdout) //将记录器的输出设置为os.Stdout
56 | }
57 |
58 | defer func() {
59 |
60 | if r := recover(); r != nil {
61 | fmt.Printf("panic: %s\n", r)
62 | }
63 | }()
64 |
65 | //初始化分词器
66 | tokenizer := NewTokenizer(global.CONFIG.Dictionary)
67 | global.Container = NewContainer(tokenizer)
68 |
69 | // 初始化业务逻辑
70 | controller.NewServices()
71 |
72 | // 注册路由
73 | r := router.SetupRouter()
74 | // 启动服务
75 | srv := &http.Server{
76 | Addr: global.CONFIG.Addr,
77 | Handler: r,
78 | }
79 | go func() {
80 | // 开启一个goroutine启动服务
81 | if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
82 | log.Println("listen:", err)
83 | }
84 | }()
85 |
86 | // 优雅关机
87 | quit := make(chan os.Signal, 1)
88 | signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
89 | <-quit
90 | log.Println("Shutdown Server ...")
91 |
92 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
93 | defer cancel()
94 |
95 | if err := srv.Shutdown(ctx); err != nil {
96 | log.Println("Server Shutdown:", err)
97 | }
98 |
99 | log.Println("Server exiting")
100 | }
101 |
--------------------------------------------------------------------------------
/core/parser.go:
--------------------------------------------------------------------------------
1 | package core
2 |
3 | import (
4 | "flag"
5 | "fmt"
6 | "github.com/sea-team/gofound/global"
7 | "os"
8 | "runtime"
9 |
10 | "gopkg.in/yaml.v2"
11 | )
12 |
13 | // Parser 解析器
14 | func Parser() *global.Config {
15 |
16 | var addr = flag.String("addr", "127.0.0.1:5678", "设置监听地址和端口")
17 | //兼容windows
18 | dir := fmt.Sprintf(".%sdata", string(os.PathSeparator))
19 |
20 | var dataDir = flag.String("data", dir, "设置数据存储目录")
21 |
22 | var debug = flag.Bool("debug", true, "设置是否开启调试模式")
23 |
24 | var dictionaryPath = flag.String("dictionary", "./data/dictionary.txt", "设置词典路径")
25 |
26 | var enableAdmin = flag.Bool("enableAdmin", true, "设置是否开启后台管理")
27 |
28 | var gomaxprocs = flag.Int("gomaxprocs", runtime.NumCPU()*2, "设置GOMAXPROCS")
29 |
30 | var auth = flag.String("auth", "", "开启认证,例如: admin:123456")
31 |
32 | var enableGzip = flag.Bool("enableGzip", true, "是否开启gzip压缩")
33 | var timeout = flag.Int64("timeout", 10*60, "数据库超时关闭时间(秒)")
34 | var bufferNum = flag.Int("bufferNum", 1000, "分片缓冲数量")
35 |
36 | var configPath = flag.String("config", "", "配置文件路径,配置此项其他参数忽略")
37 |
38 | flag.Parse()
39 |
40 | config := &global.Config{}
41 |
42 | if *configPath != "" {
43 | //解析配置文件
44 | //file, err := ioutil.ReadFile(*configPath)
45 | file, err := os.ReadFile(*configPath) //详情:https://github.com/golang/go/issues/42026
46 | if err != nil {
47 | panic(err)
48 | }
49 | err = yaml.Unmarshal(file, config)
50 | if err != nil {
51 | panic(err)
52 | }
53 | return config
54 | }
55 | config = &global.Config{
56 | Addr: *addr,
57 | Data: *dataDir,
58 | Debug: *debug,
59 | Dictionary: *dictionaryPath,
60 | EnableAdmin: *enableAdmin,
61 | Gomaxprocs: *gomaxprocs,
62 | Auth: *auth,
63 | EnableGzip: *enableGzip,
64 | Timeout: *timeout,
65 | BufferNum: *bufferNum,
66 | }
67 |
68 | return config
69 | }
70 |
--------------------------------------------------------------------------------
/docs/TODO.md:
--------------------------------------------------------------------------------
1 | # 待办
2 |
3 | + 增加多库
4 | + 增加配置
5 | + 增加Web控制台
6 |
7 | ta shuo d
--------------------------------------------------------------------------------
/docs/api.md:
--------------------------------------------------------------------------------
1 | # API
2 |
3 | `gofound`启动之后,会监听一个TCP端口,接收来自客户端的搜索请求。处理http请求部分使用`gin`框架。
4 |
5 | ## 多数据库支持
6 |
7 | 从1.1版本开始,我们支持了多数据库,API接口中通过get参数来指定数据库。
8 |
9 | 如果不指定,默认数据库为`default`。
10 |
11 | 如:`api/index?database=db1` 其他post参数不变
12 |
13 | 如果指定的数据库名没有存在,将会自动创建一个新的数据库。如果需要删除,直接删除改数据库目录,然后重启gofound即可。
14 |
15 | ## 增加/修改索引
16 |
17 | 需要在query参数中指定数据库名`database=default`
18 |
19 | | 接口地址 | /api/index |
20 | |------|------------------|
21 | | 请求方式 | POST |
22 | | 请求类型 | application/json |
23 |
24 | ### 请求
25 |
26 | | 字段 | 类型 | 必选 | 描述 |
27 | |----------|--------|-----|-----------------------------------|
28 | | id | uint32 | 是 | 文档的主键id,需要保持唯一性,如果id重复,将会覆盖直接的文档。 |
29 | | text | string | 是 | 需要索引的文本块 |
30 | | document | object | 是 | 附带的文档数据,json格式,搜索的时候原样返回 |
31 |
32 | query参数(params-data)
33 |
34 | | 字段 | 类型 | 必选 | 描述 |
35 | |----------|--------|-----|--------|
36 | | database | string | 是 | 指定数据库名 |
37 |
38 | + POST /api/index
39 |
40 | ```json
41 | {
42 | "id": 88888,
43 | "text": "深圳北站",
44 | "document": {
45 | "title": "阿森松岛所445",
46 | "number": 223
47 | }
48 | }
49 | ```
50 |
51 | + 命令行
52 |
53 | ```bash
54 | curl -H "Content-Type:application/json" -X POST --data '{"id":88888,"text":"深圳北站","document":{"title":"阿森松岛所445","number":223}}' http://127.0.0.1:5678/api/index?database=default
55 | ```
56 |
57 | ### 响应
58 |
59 | ```json
60 | {
61 | "state": true,
62 | "message": "success"
63 | }
64 | ```
65 |
66 | ## 批量增加/修改索引
67 | 与添加单个索引一样,也需要在query参数中指定数据库名`database=default`
68 |
69 | | 接口地址 | /api/index/batch |
70 | |------|------------------|
71 | | 请求方式 | POST |
72 | | 请求类型 | application/json |
73 |
74 | 参数与单个一致,只是需要用数组包裹多个json对象,例如:
75 |
76 | ```json
77 | [
78 | {
79 | "id": 88888,
80 | "text": "深圳北站",
81 | "document": {
82 | "title": "阿森松岛所445",
83 | "number": 223
84 | }
85 | },
86 | {
87 | "id": 22222,
88 | "text": "北京东站",
89 | "document": {
90 | "title": "123123123",
91 | "number": 123123
92 | }
93 | }
94 | ]
95 | ```
96 |
97 | ## 删除索引
98 | 与以上接口一样,也需要在query参数中指定数据库名`database=default`
99 |
100 | | 接口地址 | /api/index/remove |
101 | |------|-------------------|
102 | | 请求方式 | POST |
103 | | 请求类型 | application/json |
104 |
105 | ### 请求
106 |
107 | | 字段 | 类型 | 必选 | 描述 |
108 | |-----|--------|-----|---------|
109 | | id | uint32 | 是 | 文档的主键id |
110 |
111 | + POST /api/remove
112 |
113 | ```json
114 | {
115 | "id": 88888
116 | }
117 | ```
118 |
119 | + 命令行
120 |
121 | ```bash
122 | curl -H "Content-Type:application/json" -X POST --data '{"id":88888}' http://127.0.0.1:5678/api/remove?database=default
123 | ```
124 |
125 | ### 响应
126 |
127 | ```json
128 | {
129 | "state": true,
130 | "message": "success"
131 | }
132 | ```
133 |
134 | ## 查询索引
135 |
136 | `GoFound`提供了一种查询方式,按照文本查询。与其他Nosql数据库不同,`GoFound`不支持按照文档的其他查询。
137 |
138 | | 接口地址 | /api/query |
139 | |------|------------------|
140 | | 请求方式 | POST |
141 | | 请求类型 | application/json |
142 |
143 | ### 请求
144 |
145 | | 字段 | 类型 | 必选 | 描述 |
146 | |-----------|--------|-----|----------------------------------------------------------------------------------------------|
147 | | query | string | 是 | 查询的关键词,都是or匹配 |
148 | | page | int | 否 | 页码,默认为1 |
149 | | limit | int | 否 | 返回的文档数量,默认为100,没有最大限制,最好不要超过1000,超过之后速度会比较慢,内存占用会比较多 |
150 | | order | string | 否 | 排序方式,取值`asc`和`desc`,默认为`desc`,按id排序,然后根据结果得分排序 |
151 | | highlight | object | 否 | 关键字高亮,相对text字段中的文本 |
152 | | scoreExp | string | 否 | 根据文档的字段计算分数,然后再进行排序,例如:score+[document.hot]*10,表达式中score为关键字的分数,document.hot为document中的hot字段 |
153 |
154 |
155 | query参数(params-data)
156 |
157 | | 字段 | 类型 | 必选 | 描述 |
158 | |----------|--------|-----|---------------------|
159 | | database | string | 否 | 指定数据库名,不填默认为default |
160 |
161 |
162 | ### highlight
163 |
164 | > 配置以后,符合条件的关键词将会被preTag和postTag包裹
165 |
166 | | 字段 | 描述 |
167 | |---------|-------|
168 | | preTag | 关键词前缀 |
169 | | postTag | 关键词后缀 |
170 |
171 | + 示例
172 |
173 | ```json
174 | {
175 | "query": "上海哪里好玩",
176 | "page": 1,
177 | "limit": 10,
178 | "order": "desc",
179 | "highlight": {
180 | "preTag": "",
181 | "postTag": ""
182 | }
183 | }
184 | ```
185 |
186 | + POST /api/query
187 |
188 | ```json
189 | {
190 | "query": "深圳北站",
191 | "page": 1,
192 | "limit": 10,
193 | "order": "desc"
194 | }
195 | ```
196 |
197 | + 命令行
198 |
199 | ```bash
200 | curl -H "Content-Type:application/json" -X POST --data '{"query":"深圳北站","page":1,"limit":10,"order":"desc"}' http://127.0.0.1:5678/api/query
201 | ```
202 |
203 | ### 响应
204 |
205 | | 字段 | 类型 | 描述 |
206 | |-----------|---------|-------------------------|
207 | | time | float32 | 搜索文档用时 |
208 | | total | int | 符合条件的数量 |
209 | | pageCount | int | 页总数 |
210 | | page | int | 当前页码 |
211 | | limit | int | 每页数量 |
212 | | documents | array | 文档列表,[参考索引文档](#增加/修改索引) |
213 |
214 | ```json
215 | {
216 | "state": true,
217 | "message": "success",
218 | "data": {
219 | "time": 2.75375,
220 | "total": 13487,
221 | "pageCount": 1340,
222 | "page": 1,
223 | "limit": 10,
224 | "documents": [
225 | {
226 | "id": 1675269553,
227 | "text": "【深圳消费卡/购物券转让/求购信息】- 深圳赶集网",
228 | "document": {
229 | "id": "8c68e948de7c7eb4362de15434a3ace7",
230 | "title": "【深圳消费卡/购物券转让/求购信息】- 深圳赶集网"
231 | },
232 | "score": 3
233 | },
234 | {
235 | "id": 88888,
236 | "text": "深圳北站",
237 | "document": {
238 | "number": 223,
239 | "title": "阿森松岛所445"
240 | },
241 | "score": 2
242 | },
243 | {
244 | "id": 212645608,
245 | "text": "【深圳美容美发卡转让/深圳美容美发卡求购信息】- 深圳赶集网",
246 | "document": {
247 | "id": "d3ce16b68a90833cbc20b8a49e93b9cd",
248 | "title": "【深圳美容美发卡转让/深圳美容美发卡求购信息】- 深圳赶集网"
249 | },
250 | "score": 1.5
251 | },
252 | {
253 | "id": 1191140208,
254 | "text": "【深圳赶集网】-免费发布信息-深圳分类信息门户",
255 | "document": {
256 | "id": "44be60a1d8b54c431e5511804062ae62",
257 | "title": "【深圳赶集网】-免费发布信息-深圳分类信息门户"
258 | },
259 | "score": 1.5
260 | },
261 | {
262 | "id": 4133884907,
263 | "text": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网",
264 | "document": {
265 | "id": "f25bb8136e8c2b02e3fcd65627a9ddbc",
266 | "title": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网"
267 | },
268 | "score": 1
269 | },
270 | {
271 | "id": 206909132,
272 | "text": "【沙嘴门票/电影票转让/求购信息】- 深圳赶集网",
273 | "document": {
274 | "id": "63ca3ea4ffd254454e738a0957efedc2",
275 | "title": "【沙嘴门票/电影票转让/求购信息】- 深圳赶集网"
276 | },
277 | "score": 1
278 | },
279 | {
280 | "id": 220071473,
281 | "text": "【深圳健身卡转让/深圳健身卡求购信息】- 深圳赶集网",
282 | "document": {
283 | "id": "72d3d650c8a8a4e73b89b406f6dc76ef",
284 | "title": "【深圳健身卡转让/深圳健身卡求购信息】- 深圳赶集网"
285 | },
286 | "score": 1
287 | },
288 | {
289 | "id": 461974720,
290 | "text": "铁路_论坛_深圳热线",
291 | "document": {
292 | "id": "73c96ac2c23bc0cb4fb12ce7660c8b35",
293 | "title": "铁路_论坛_深圳热线"
294 | },
295 | "score": 1
296 | },
297 | {
298 | "id": 490922879,
299 | "text": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网",
300 | "document": {
301 | "id": "93be0f35c484ddcd8c83602e27535d96",
302 | "title": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网"
303 | },
304 | "score": 1
305 | },
306 | {
307 | "id": 525810194,
308 | "text": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网",
309 | "document": {
310 | "id": "e489dd19dce0de2c9f4e59c969ec9ec0",
311 | "title": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网"
312 | },
313 | "score": 1
314 | }
315 | ],
316 | "words": [
317 | "深圳",
318 | "北站"
319 | ]
320 | }
321 | }
322 | ```
323 |
324 | ## 查询状态
325 |
326 | | 接口地址 | /api/status |
327 | |------|------------------|
328 | | 请求方式 | GET |
329 |
330 | ### 请求
331 |
332 | ```bash
333 | curl http://127.0.0.1:5678/api/status
334 | ```
335 |
336 | ### 响应
337 |
338 | ```json
339 | {
340 | "state": true,
341 | "message": "success",
342 | "data": {
343 | "index": {
344 | "queue": 0,
345 | "shard": 10,
346 | "size": 531971
347 | },
348 | "memory": {
349 | "alloc": 1824664656,
350 | "heap": 1824664656,
351 | "heap_idle": 10008625152,
352 | "heap_inuse": 2100068352,
353 | "heap_objects": 3188213,
354 | "heap_released": 9252003840,
355 | "heap_sys": 12108693504,
356 | "sys": 12700504512,
357 | "total": 11225144273040
358 | },
359 | "status": "ok",
360 | "system": {
361 | "arch": "arm64",
362 | "cores": 10,
363 | "os": "darwin",
364 | "version": "go1.18"
365 | }
366 | }
367 | }
368 | ```
369 |
370 | ## 删除数据库
371 |
372 | | 接口地址 | /api/db/drop |
373 | |------|--------------|
374 | | 请求方式 | GET |
375 |
376 | ### 请求
377 |
378 | ```bash
379 | curl http://127.0.0.1:5678/api/drop?database=db_name
380 | ```
381 |
382 | ### 响应
383 |
384 | ```json
385 | {
386 | "state": true,
387 | "message": "success"
388 | }
389 | ```
390 |
391 | ## 在线分词
392 |
393 | | 接口地址 | /api/word/cut |
394 | |------|-----------------|
395 | | 请求方式 | GET |
396 |
397 | ### 请求参数
398 |
399 | | 字段 | 类型 | 必选 | 描述 |
400 | |-----|--------|-----|-----|
401 | | q | string | 关键词 |
402 |
403 | ### 请求
404 |
405 | ```bash
406 | curl http://127.0.0.1:5678/api/word/cut?q=上海和深圳哪个城市幸福指数高
407 | ```
408 |
409 | ### 响应
410 |
411 | ```json
412 | {
413 | "state": true,
414 | "message": "success",
415 | "data": [
416 | "上海",
417 | "深圳",
418 | "哪个",
419 | "城市",
420 | "幸福",
421 | "指数"
422 | ]
423 | }
424 | ```
--------------------------------------------------------------------------------
/docs/compile.md:
--------------------------------------------------------------------------------
1 | # 编译
2 |
3 | `gofound` 基于`golang-1.18`,编译之前需要安装对于的golang版本。
4 |
5 | 推荐使用编译好的[二进制文件](https://github.com/newpanjing/github.com/sea-team/gofound/releases)
6 |
7 | ## Admin
8 | > 如果需要Admin部分,请先构建admin,admin基于vue+element-ui+vite,而这些也需要安装nodejs
9 |
10 | 构建命令:
11 |
12 | ```shell
13 | cd ./web/admin/assets/web/
14 |
15 | npm install
16 |
17 | npm run build
18 | ```
19 |
20 | 完成以上步骤之后,才能使用admin
21 |
22 | ## 编译
23 |
24 | ```shell
25 | go get
26 | go build -o gofound
27 | ```
28 |
29 | ## 依赖
30 |
31 | ```shell
32 | go 1.18
33 |
34 | require (
35 | github.com/emirpasic/gods v1.12.0
36 | github.com/gin-gonic/gin v1.7.7
37 | github.com/yanyiwu/gojieba v1.1.2
38 | )
39 |
40 | require (
41 | github.com/gin-contrib/sse v0.1.0 // indirect
42 | github.com/go-playground/locales v0.13.0 // indirect
43 | github.com/go-playground/universal-translator v0.17.0 // indirect
44 | github.com/go-playground/validator/v10 v10.4.1 // indirect
45 | github.com/golang/protobuf v1.3.3 // indirect
46 | github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db // indirect
47 | github.com/json-iterator/go v1.1.9 // indirect
48 | github.com/leodido/go-urn v1.2.0 // indirect
49 | github.com/mattn/go-isatty v0.0.12 // indirect
50 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect
51 | github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 // indirect
52 | github.com/syndtr/goleveldb v1.0.0 // indirect
53 | github.com/ugorji/go/codec v1.1.7 // indirect
54 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 // indirect
55 | golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf // indirect
56 | gopkg.in/yaml.v2 v2.2.8 // indirect
57 | )
58 |
59 | ```
--------------------------------------------------------------------------------
/docs/config.md:
--------------------------------------------------------------------------------
1 | # 配置
2 |
3 | 在编译好[gofound](./compile.md)之后,就可以启动了。
4 |
5 | ```shell
6 | ./gofound
7 | ```
8 |
9 | ## 参数
10 |
11 | ```shell
12 | ./gofound -h
13 |
14 | -addr string
15 | 设置监听地址和端口 (default "0.0.0.0:5678")
16 | -auth string
17 | 开启认证,例如: admin:123456
18 | -config string
19 | 配置文件路径,配置此项其他参数忽略
20 | -data string
21 | 设置数据存储目录 (default "./data")
22 | -debug
23 | 设置是否开启调试模式 (default true)
24 | -dictionary string
25 | 设置词典路径 (default "./data/dictionary.txt")
26 | -enableAdmin
27 | 设置是否开启后台管理 (default true)
28 | -enableGzip
29 | 是否开启gzip压缩 (default true)
30 | -gomaxprocs int
31 | 设置GOMAXPROCS (default 20)
32 | -timeout int
33 | 数据库超时关闭时间(秒) (default 600)
34 |
35 |
36 | ```
37 |
38 | ### addr
39 |
40 | 指定要监听的地址和端口。默认为`127.0.0.1:5678` 监听本地地址。
41 |
42 | ```shell
43 | ./gofound --addr=127.0.0.1:5678
44 | ./gofound --addr=:5678
45 | ./gofound --addr=0.0.0.0:5678
46 | ./gofound --addr=192.168.1.1:5678
47 | ```
48 |
49 | ### auth
50 |
51 | 设置admin和api接口的用户名密码,采用basic auth
52 |
53 | ```shell
54 | ./gofound --auth=admin:123456
55 | ```
56 |
57 | ### data
58 |
59 | 指定索引数据存储的目录,可以是相对路径,也可以是绝对路径。
60 |
61 | 相对路径是存在`gofound`所在目录下的。
62 |
63 | ```shell
64 |
65 | ```shell
66 | ./gofound --data=./data
67 | ./gofound --data=/www/data
68 | ```
69 |
70 | ### debug
71 |
72 | 设置是否开启调试模式。默认为`true`。
73 |
74 | ```shell
75 | ./gofound --debug=false
76 | ```
77 |
78 | ### dictionary
79 |
80 | 设置自定义词典路径。默认为`./data/dictionary.txt`。
81 |
82 | ```shell
83 | ./gofound --dictionary=./data/dictionary.txt
84 | ```
85 |
86 | ### enableAdmin
87 |
88 | 设置是否开启后台管理。默认为`true`。
89 |
90 | ```shell
91 | ./gofound --enableAdmin=false
92 | ```
93 |
94 | ### enableGzip
95 |
96 | 设置是否开启gzip压缩。默认为`true`。
97 |
98 | ```shell
99 | ./gofound --enableGzip=false
100 | ```
101 |
102 | ### gomaxprocs
103 |
104 | 设置GOMAXPROCS。默认为CPU数量X2。
105 |
106 | ```shell
107 | ./gofound --gomaxprocs=10
108 | ```
109 |
110 | ### shard
111 |
112 | 设置文件分片数量。默认为`10`。分片越多查询会越快,相反的磁盘IO和CPU会越多。
113 |
114 | ```shell
115 | ./gofound --shard=10
116 | ```
117 |
118 | ### timeout
119 |
120 | 单位为秒。默认为600秒。
121 |
122 | 数据库超时关闭时间,如果设置为-1,表示永不关闭,适合频繁查询的。如果时间过久会造成内存占用过多
123 |
124 | ```shell
125 | ./gofound --timeout=600
126 | ```
--------------------------------------------------------------------------------
/docs/example.md:
--------------------------------------------------------------------------------
1 | # 示例
2 |
3 | 编译好的下载地址:
4 | [https://github.com/newpanjing/github.com/sea-team/gofound/releases](https://github.com/newpanjing/github.com/sea-team/gofound/releases)
5 |
6 | 将编译后的`gofound`文件复制到`/usr/local/bin`目录下,然后在命令行中运行`gofound`命令即可。
7 |
8 | ```shell
9 | gofound --addr=:5678 --data=./data
10 | ```
11 |
12 | 启动成后,就可以调用[API](./api.md)来进行索引和查询了。
13 |
14 |
--------------------------------------------------------------------------------
/docs/images/img1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sea-team/gofound/eec89a008c64a278978db27ef40e9e248f2c6aac/docs/images/img1.png
--------------------------------------------------------------------------------
/docs/images/img2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sea-team/gofound/eec89a008c64a278978db27ef40e9e248f2c6aac/docs/images/img2.png
--------------------------------------------------------------------------------
/docs/images/index.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sea-team/gofound/eec89a008c64a278978db27ef40e9e248f2c6aac/docs/images/index.png
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # 索引原理和流程
2 |
3 | `gofound` 采用平衡二叉树对文本关键词进行索引,然后利用`leveldb`存储id值,以及对应的文档。
4 |
5 | ## 原理图
6 |
7 | 
8 |
9 | ## 二叉平衡查找树
10 |
11 | 二叉平衡查找树是一个高效的查找树,它的查找速度是`O(log n)`,并且每个节点的子树都是平衡的。
12 | `gofound`默认是分10个文件块,也就是10个平衡查找树,每个平衡查找树的深度是`log10(n)`。
13 |
14 | 1亿条索引在一颗树查找最大26次,如果10亿数据,最大查找也是26次,会根据key的hash值取模shard数量,来找到对应的索引进行检索。
--------------------------------------------------------------------------------
/docs/release.md:
--------------------------------------------------------------------------------
1 | # GoFound发布日志
2 |
3 | ## 1.1
4 | + 优化内存占用
5 | + 提升查询速度
6 | + 增加自定义词库配置
7 | + 增加Admin界面
8 | + 增加认证功能
9 |
10 | ## 1.0.2
11 | + 完成基础功能
--------------------------------------------------------------------------------
/docs/storage.md:
--------------------------------------------------------------------------------
1 | # 持久化
2 |
3 | 持久化采用golang版本的leveldb
4 |
5 | + 关键词与ID映射
6 |
7 | 二叉树的每个关键词都与ID相关联,这样在搜索的时候,可以先找到索引的key,然后在通过key找到对应的id数组。
8 |
9 | 映射文件采用的是`leveldb`存储,编码格式为`gob`
10 |
11 | [查看源码](../searcher/storage/leveldb_storage.go)
12 |
13 |
14 | + 文档
15 |
16 | 文档是指在索引时传入的数据,在搜索的时候会原样返回。
17 |
18 | 存储文件采用的是leveldb存储,编码格式为gob
19 |
20 | [查看源码](../searcher/storage/leveldb_storage.go)
--------------------------------------------------------------------------------
/docs/test.md:
--------------------------------------------------------------------------------
1 | # 内存
2 |
3 | 我们的目标是以最小的内存使用和最大的性能,带来非凡的体验。
4 |
5 | 测试以1000万数据为基数。
6 |
7 | ## 内存理论
8 |
9 | 索引100亿条数据,只需要27.3MB的内存(经过bitmap的压缩),磁盘空间与1.0x一致。
10 |
11 | 查询100亿条搜索结果,只需要27.3MB的内存(经过bitmap的压缩)。
12 |
13 | 相比1.0x版本,内存可以减少 2794.43倍。同时不兼容1.0x版本的索引,需要重新索引一次。
14 |
15 | ## 查询理论
16 |
17 | 索引时间相比1.0x减少33.8倍,查询时间比1.0x快23%。如果数据超过亿级,查询速度比1.0x版本慢3倍。
--------------------------------------------------------------------------------
/global/config.go:
--------------------------------------------------------------------------------
1 | package global
2 |
3 | // Config 服务器设置
4 | type Config struct {
5 | Addr string `yaml:"addr"` // 监听地址
6 | Data string `json:"data"` // 数据目录
7 | Debug bool `yaml:"debug"` // 调试模式
8 | Dictionary string `json:"dictionary"` // 字典路径
9 | EnableAdmin bool `yaml:"enableAdmin"` //启用admin
10 | Gomaxprocs int `json:"gomaxprocs"` //GOMAXPROCS
11 | Shard int `yaml:"shard"` //分片数
12 | Auth string `json:"auth"` //认证
13 | EnableGzip bool `yaml:"enableGzip"` //是否开启gzip压缩
14 | Timeout int64 `json:"timeout"` //超时时间
15 | BufferNum int `yaml:"bufferNum"` //分片缓冲数
16 | }
17 |
--------------------------------------------------------------------------------
/global/global.go:
--------------------------------------------------------------------------------
1 | package global
2 |
3 | import (
4 | "github.com/sea-team/gofound/searcher"
5 | )
6 |
7 | var (
8 | CONFIG *Config // 服务器设置
9 | Container *searcher.Container
10 | )
11 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/sea-team/gofound
2 |
3 | go 1.18
4 |
5 | require (
6 | github.com/Knetic/govaluate v3.0.0+incompatible
7 | github.com/emirpasic/gods v1.12.0
8 | github.com/gin-contrib/gzip v0.0.5
9 | github.com/gin-gonic/gin v1.7.7
10 | github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46
11 | github.com/shirou/gopsutil/v3 v3.22.4
12 | github.com/syndtr/goleveldb v1.0.0
13 | github.com/wangbin/jiebago v0.3.2
14 | gopkg.in/yaml.v2 v2.4.0
15 | )
16 |
17 | require (
18 | github.com/gin-contrib/sse v0.1.0 // indirect
19 | github.com/go-ole/go-ole v1.2.6 // indirect
20 | github.com/go-playground/locales v0.13.0 // indirect
21 | github.com/go-playground/universal-translator v0.17.0 // indirect
22 | github.com/go-playground/validator/v10 v10.4.1 // indirect
23 | github.com/golang/protobuf v1.5.2 // indirect
24 | github.com/golang/snappy v0.0.3 // indirect
25 | github.com/json-iterator/go v1.1.12 // indirect
26 | github.com/kr/pretty v0.2.0 // indirect
27 | github.com/leodido/go-urn v1.2.0 // indirect
28 | github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
29 | github.com/mattn/go-isatty v0.0.14 // indirect
30 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
31 | github.com/modern-go/reflect2 v1.0.2 // indirect
32 | github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
33 | github.com/tklauser/go-sysconf v0.3.10 // indirect
34 | github.com/tklauser/numcpus v0.4.0 // indirect
35 | github.com/ugorji/go/codec v1.1.7 // indirect
36 | github.com/yusufpapurcu/wmi v1.2.2 // indirect
37 | golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 // indirect
38 | golang.org/x/net v0.0.0-20220412020605-290c469a71a5 // indirect
39 | golang.org/x/sys v0.0.0-20220412211240-33da011f77ad // indirect
40 | golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f // indirect
41 | google.golang.org/protobuf v1.28.0 // indirect
42 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
43 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
44 | )
45 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/Knetic/govaluate v3.0.0+incompatible h1:7o6+MAPhYTCF0+fdvoz1xDedhRb4f6s9Tn1Tt7/WTEg=
2 | github.com/Knetic/govaluate v3.0.0+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
3 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
6 | github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg=
7 | github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o=
8 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
9 | github.com/gin-contrib/gzip v0.0.5 h1:mhnVU32YnnBh2LPH2iqRqsA/eR7SAqRaD388jL2s/j0=
10 | github.com/gin-contrib/gzip v0.0.5/go.mod h1:OPIK6HR0Um2vNmBUTlayD7qle4yVVRZT0PyhdUigrKk=
11 | github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
12 | github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
13 | github.com/gin-gonic/gin v1.7.4/go.mod h1:jD2toBW3GZUr5UMcdrwQA10I7RuaFOl/SGeDjXkfUtY=
14 | github.com/gin-gonic/gin v1.7.7 h1:3DoBmSbJbZAWqXJC3SLjAPfutPJJRN1U5pALB7EeTTs=
15 | github.com/gin-gonic/gin v1.7.7/go.mod h1:axIBovoeJpVj8S3BwE0uPMTeReE4+AfFtqpqaZ1qq1U=
16 | github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
17 | github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
18 | github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A=
19 | github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
20 | github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q=
21 | github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
22 | github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no=
23 | github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
24 | github.com/go-playground/validator/v10 v10.4.1 h1:pH2c5ADXtd66mxoE0Zm9SUhxE20r7aM3F26W0hOn+GE=
25 | github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4=
26 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
27 | github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
28 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
29 | github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
30 | github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
31 | github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
32 | github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
33 | github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
34 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
35 | github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
36 | github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o=
37 | github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
38 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
39 | github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
40 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
41 | github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
42 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
43 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
44 | github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs=
45 | github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
46 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
47 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
48 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
49 | github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y=
50 | github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII=
51 | github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
52 | github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
53 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
54 | github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
55 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
56 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
57 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
58 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
59 | github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
60 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
61 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
62 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
63 | github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs=
64 | github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
65 | github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU=
66 | github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
67 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
68 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
69 | github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
70 | github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
71 | github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 h1:GHRpF1pTW19a8tTFrMLUcfWwyC0pnifVo2ClaLq+hP8=
72 | github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46/go.mod h1:uAQ5PCi+MFsC7HjREoAz1BU+Mq60+05gifQSsHSDG/8=
73 | github.com/shirou/gopsutil/v3 v3.22.4 h1:srAQaiX6jX/cYL6q29aE0m8lOskT9CurZ9N61YR3yoI=
74 | github.com/shirou/gopsutil/v3 v3.22.4/go.mod h1:D01hZJ4pVHPpCTZ3m3T2+wDF2YAGfd+H4ifUguaQzHM=
75 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
76 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
77 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
78 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
79 | github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
80 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
81 | github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE=
82 | github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
83 | github.com/tklauser/go-sysconf v0.3.10 h1:IJ1AZGZRWbY8T5Vfk04D9WOA5WSejdflXxP03OUqALw=
84 | github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk=
85 | github.com/tklauser/numcpus v0.4.0 h1:E53Dm1HjH1/R2/aoCtXtPgzmElmn51aOkhCFSuZq//o=
86 | github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ=
87 | github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
88 | github.com/ugorji/go/codec v1.1.7 h1:2SvQaVZ1ouYrrKKwoSk2pzd4A9evlKJb9oTL+OaLUSs=
89 | github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
90 | github.com/wangbin/jiebago v0.3.2 h1:reQKp0xTXWFK7eQ19L6Ofq5xODSR2hcam55qcdCCNpw=
91 | github.com/wangbin/jiebago v0.3.2/go.mod h1:PAqQLauF0qAzy/63jBvO7Goh0oYBq1ocr0OXHLlujwQ=
92 | github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg=
93 | github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
94 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
95 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
96 | golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 h1:kUhD7nTDoI3fVd9G4ORWrbV5NY0liEs/Jg2pv5f+bBA=
97 | golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
98 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
99 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
100 | golang.org/x/net v0.0.0-20220412020605-290c469a71a5 h1:bRb386wvrE+oBNdF1d/Xh9mQrfQ4ecYhW5qJ5GvTGT4=
101 | golang.org/x/net v0.0.0-20220412020605-290c469a71a5/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
102 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
103 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
104 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
105 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
106 | golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
107 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
108 | golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
109 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
110 | golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
111 | golang.org/x/sys v0.0.0-20220412211240-33da011f77ad h1:ntjMns5wyP/fN65tdBD4g8J5w8n015+iIIs9rtjXkY0=
112 | golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
113 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
114 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
115 | golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
116 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
117 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
118 | golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f h1:GGU+dLjvlC3qDwqYgL6UgRmHXhOOgns0bZu2Ty5mm6U=
119 | golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
120 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
121 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
122 | google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
123 | google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
124 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
125 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
126 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
127 | gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
128 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
129 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
130 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
131 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
132 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
133 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
134 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
135 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
136 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
137 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
138 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
139 |
--------------------------------------------------------------------------------
/gofound.d:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # chkconfig: 2345 90 10
3 | # Description: Startup script for gofound on Debian. Place in /etc/init.d and
4 | # run 'update-rc.d -f gofound defaults', or use the appropriate command on your
5 | # distro. For CentOS/Redhat run: 'chkconfig --add gofound'
6 |
7 | ### BEGIN INIT INFO
8 | #
9 | # Provides: gofound.d
10 | # Required-Start: $local_fs $remote_fs
11 | # Required-Stop: $local_fs $remote_fs
12 | # Default-Start: 2 3 4 5
13 | # Default-Stop: 0 1 6
14 | # Short-Description: starts gofound
15 | # Description: This file should be used to gofound scripts to be placed in /etc/init.d.
16 | #
17 | ### END INIT INFO
18 |
19 |
20 | ## 2345是默认启动级别,级别有0-6共7个级别。 90是启动优先级,10是停止优先级,优先级范围是0-100,数字越大,优先级越低。
21 |
22 | ## Fill in name of program here.
23 | PROG="gofound"
24 | PROG_PATH="/usr/local/bin" ## Not need, but sometimes helpful (if $PROG resides in /opt for example).
25 | PROG_ARGS="--config=/gofound_path/config.yaml"
26 | PID_PATH="/var/run/"
27 |
28 | start() {
29 | if [ -e "$PID_PATH/$PROG.pid" ]; then
30 | ## Program is running, exit with error.
31 | echo "Error! $PROG_PATH/$PROG is currently running!" 1>&2
32 | exit 1
33 | else
34 | ## Change from /dev/null to something like /var/log/$PROG if you want to save output.
35 | $PROG_PATH/$PROG $PROG_ARGS 2>&1 >>/var/log/$PROG &
36 | #pid=`ps ax | grep -i '/usr/bin/frps' | grep -v 'grep' | sed 's/^\([0-9]\{1,\}\).*/\1/g' | head -n 1`
37 | pid=`ps -ef | grep $PROG_PATH/$PROG | grep -v grep | awk '{print $2}'`
38 | #echo $PROG_PATH/$PROG $PROG_ARGS
39 | echo "$PROG_PATH/$PROG($pid) started"
40 | echo $pid > "$PID_PATH/$PROG.pid"
41 | fi
42 | }
43 |
44 | stop() {
45 | echo "begin stop"
46 | if [ -e "$PID_PATH/$PROG.pid" ]; then
47 | ## Program is running, so stop it
48 | #pid=`ps ax | grep -i '/usr/bin/frps' | grep -v 'grep' | sed 's/^\([0-9]\{1,\}\).*/\1/g' | head -n 1`
49 | pid=`ps -ef | grep $PROG_PATH/$PROG | grep -v grep | awk '{print $2}'`
50 | kill $pid
51 |
52 | rm -f "$PID_PATH/$PROG.pid"
53 | echo "$PROG_PATH/$PROG($pid) stopped"
54 | else
55 | ## Program is not running, exit with error.
56 | echo "Error! $PROG_PATH/$PROG not started!" 1>&2
57 | fi
58 | }
59 |
60 | status() {
61 | if [ -e "$PID_PATH/$PROG.pid" ]; then
62 | ## Program is running, so stop it
63 | #pid=`ps ax | grep -i '/usr/bin/frps' | grep -v 'grep' | sed 's/^\([0-9]\{1,\}\).*/\1/g' | head -n 1`
64 | pid=`ps -ef | grep $PROG_PATH/$PROG | grep -v grep | awk '{print $2}'`
65 |
66 | if [ $pid ]; then
67 | echo "$PROG_PATH/$PROG($pid) is running..."
68 | else
69 | echo "$PROG_PATH/$PROG dead but pid file exists" 1>&2
70 | fi
71 | else
72 | ## Program is not running, exit with error.
73 | echo "Error! $PROG_PATH/$PROG not started!" 1>&2
74 | fi
75 | }
76 |
77 |
78 | ## Check to see if we are running as root first.
79 | ## Found at http://www.cyberciti.biz/tips/shell-root-user-check-script.html
80 | if [ "$(id -u)" != "0" ]; then
81 | echo "This script must be run as root" 1>&2
82 | exit 1
83 | fi
84 |
85 | case "$1" in
86 | start)
87 | start
88 | exit 0
89 | ;;
90 | stop)
91 | echo '' > /var/log/$PROG
92 | stop
93 | exit 0
94 | ;;
95 | reload|restart|force-reload)
96 | stop
97 | start
98 | exit 0
99 | ;;
100 | status)
101 | status
102 | exit 0
103 | ;;
104 | *)
105 | echo "Usage: $0 {start|stop|restart|status}" 1>&2
106 | exit 1
107 | ;;
108 | esac
109 |
--------------------------------------------------------------------------------
/gofound.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #每分钟检测gofound运行
4 | #*/1 * * * * /data/gofound/gofound.sh > /dev/null 2>&1
5 |
6 | #每3点 重启gofound
7 | #0 3 * * * /etc/init.d/gofound.d restart
8 |
9 | count=`ps -fe |grep "gofound"|grep "config.yaml" -c`
10 |
11 | echo $count
12 | if [ $count -lt 1 ]; then
13 | echo "restart"
14 | echo $(date +%Y-%m-%d_%H:%M:%S) >/data/gofound/restart.log
15 | /etc/init.d/gofound.d restart
16 | else
17 | echo "is running"
18 | fi
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "github.com/sea-team/gofound/core"
5 | )
6 |
7 | func main() {
8 | //初始化容器和参数解析
9 | core.Initialize()
10 | }
11 |
--------------------------------------------------------------------------------
/sdk/SDK 设计指南.md:
--------------------------------------------------------------------------------
1 | # GoFound SDK设计指南
2 |
3 | ## 支持自定义配置
4 | 在支持自定义配置的时候,同时提供默认配置项
5 | ##
6 | 支持`gofound` 提供的所有操作,增删改查等
7 |
8 |
--------------------------------------------------------------------------------
/sdk/base.go:
--------------------------------------------------------------------------------
1 | package gofound
2 |
3 | import (
4 | "github.com/sea-team/gofound/searcher/model"
5 | "github.com/sea-team/gofound/searcher/system"
6 | "runtime"
7 | )
8 |
9 | // Query 查询
10 | func (c *Client) Query(req *model.SearchRequest) (*model.SearchResult, error) {
11 | r, err := c.container.GetDataBase(req.Database).MultiSearch(req)
12 | if err != nil {
13 | return nil, err
14 | }
15 |
16 | return r, nil
17 | }
18 |
19 | func (*Client) GC() {
20 | runtime.GC()
21 | }
22 | func (c *Client) Status() (map[string]interface{}, error) {
23 | var m runtime.MemStats
24 | runtime.ReadMemStats(&m)
25 |
26 | // TODO 其他系统信息
27 | r := map[string]interface{}{
28 | "memory": system.GetMemStat(),
29 | "cpu": system.GetCPUStatus(),
30 | "disk": system.GetDiskStat(),
31 | }
32 | return r, nil
33 | }
34 |
--------------------------------------------------------------------------------
/sdk/client.go:
--------------------------------------------------------------------------------
1 | package gofound
2 |
3 | import (
4 | "fmt"
5 | "github.com/sea-team/gofound/core"
6 | "github.com/sea-team/gofound/global"
7 | "github.com/sea-team/gofound/searcher"
8 | "os"
9 | "runtime"
10 | "sync"
11 | )
12 |
13 | var once sync.Once
14 |
15 | // Client 应该对外部屏蔽细节
16 | // 尽量少的提供接口,但是又要保证功能性
17 | type Client struct {
18 | config *global.Config //服务配置
19 | container *searcher.Container //运行实体
20 | }
21 |
22 | func newDefaultConfig() *global.Config {
23 | return &global.Config{
24 | Addr: "127.0.0.1:5678",
25 | Data: fmt.Sprintf(".%sdata", string(os.PathSeparator)),
26 | Debug: true,
27 | Dictionary: "./data/dictionary.txt",
28 | EnableAdmin: true,
29 | Gomaxprocs: runtime.NumCPU() * 2,
30 | Shard: 0,
31 | Auth: "",
32 | EnableGzip: true,
33 | Timeout: 10 * 60,
34 | }
35 | }
36 | func newTokenizerAndContainer(config *global.Config) *searcher.Container {
37 | tokenizer := core.NewTokenizer(global.CONFIG.Dictionary)
38 | return core.NewContainer(tokenizer)
39 | }
40 |
41 | // NewClient 通过参数进行配置,必须指定全部参数
42 | func NewClient(config *global.Config) *Client {
43 | global.CONFIG = config
44 | //初始化分词器
45 | container := newTokenizerAndContainer(config)
46 | global.Container = container
47 | return &Client{
48 | config: config,
49 | container: container,
50 | }
51 | }
52 |
53 | // Default 使用默认参数创建服务
54 | func Default() *Client {
55 | global.CONFIG = newDefaultConfig()
56 | container := newTokenizerAndContainer(global.CONFIG)
57 | global.Container = container
58 | return &Client{
59 | config: global.CONFIG,
60 | container: container,
61 | }
62 | }
63 |
64 | // SetAddr 设置Web服务地址
65 | func (c *Client) SetAddr(addr string) *Client {
66 | if addr == "" {
67 | return c
68 | }
69 | c.config.Addr = addr
70 | return c
71 | }
72 |
73 | // SetData 设置数据存放地址
74 | func (c *Client) SetData(dir string) *Client {
75 | if dir == "" {
76 | return c
77 | }
78 | c.config.Data = dir
79 | return c
80 | }
81 |
82 | //TODO 其他配置项
83 |
--------------------------------------------------------------------------------
/sdk/database.go:
--------------------------------------------------------------------------------
1 | package gofound
2 |
3 | import (
4 | "github.com/sea-team/gofound/searcher"
5 |
6 | "github.com/syndtr/goleveldb/leveldb/errors"
7 | )
8 |
9 | // Show 查看数据库
10 | func (c *Client) Show() (map[string]*searcher.Engine, error) {
11 | // 保持分格一致
12 | return c.container.GetDataBases(), nil
13 | }
14 |
15 | // Drop 删除数据库
16 | func (c *Client) Drop(dbName string) error {
17 | if dbName == "" {
18 | return errors.New("database not exist")
19 | }
20 | if err := c.container.DropDataBase(dbName); err != nil {
21 | return err
22 | }
23 | return nil
24 | }
25 |
26 | // Create 创建数据库
27 | func (c *Client) Create(dbName string) (*searcher.Engine, error) {
28 | if dbName == "" {
29 | return nil, errors.New("database name is empty")
30 | }
31 | return c.container.GetDataBase(dbName), nil
32 | }
33 |
--------------------------------------------------------------------------------
/sdk/index.go:
--------------------------------------------------------------------------------
1 | package gofound
2 |
3 | import (
4 | "errors"
5 | "github.com/sea-team/gofound/searcher/model"
6 | )
7 |
8 | // AddIndex 添加索引
9 | func (c *Client) AddIndex(dbName string, request *model.IndexDoc) error {
10 | if request.Text == "" {
11 | return errors.New("text is empty")
12 | }
13 | c.container.GetDataBase(dbName).IndexDocument(request)
14 |
15 | return nil
16 | }
17 |
18 | // BatchAddIndex 批次添加索引
19 | func (c *Client) BatchAddIndex(dbName string, documents []*model.IndexDoc) error {
20 | db := c.container.GetDataBase(dbName)
21 | // 数据预处理
22 | for _, doc := range documents {
23 | if doc.Text == "" {
24 | return errors.New("text is empty")
25 | }
26 | if doc.Document == nil {
27 | return errors.New("document is empty")
28 | }
29 | }
30 | for _, doc := range documents {
31 | go db.IndexDocument(doc)
32 | }
33 | return nil
34 | }
35 |
36 | // RemoveIndex 删除索引
37 | func (c *Client) RemoveIndex(dbName string, data *model.RemoveIndexModel) error {
38 | db := c.container.GetDataBase(dbName)
39 | if err := db.RemoveIndex(data.Id); err != nil {
40 | return err
41 | }
42 | return nil
43 | }
44 |
--------------------------------------------------------------------------------
/sdk/word.go:
--------------------------------------------------------------------------------
1 | package gofound
2 |
3 | // WordCut 分词
4 | func (c *Client) WordCut(keyword string) []string {
5 | return c.container.Tokenizer.Cut(keyword)
6 | }
7 |
8 | // BatchWordCut 批量分词
9 | func (c *Client) BatchWordCut(keywords []string) *[][]string {
10 | res := make([][]string, len(keywords))
11 | for _, w := range keywords {
12 | res = append(res, c.container.Tokenizer.Cut(w))
13 | }
14 | return &res
15 | }
16 |
--------------------------------------------------------------------------------
/searcher/arrays/arrays.go:
--------------------------------------------------------------------------------
1 | package arrays
2 |
3 | const (
4 | LOW = 0
5 | HIGH = 1
6 | )
7 |
8 | // BinarySearch 二分查找
9 | func BinarySearch(arr []uint32, target uint32) bool {
10 | low := 0
11 | high := len(arr) - 1
12 | for low < high {
13 | mid := (low + high) >> 1
14 | if arr[mid] >= target {
15 | high = mid
16 | } else {
17 | low = mid + 1
18 | }
19 | }
20 | return arr != nil && arr[low] == target
21 | }
22 |
23 | func ArrayUint32Exists(arr []uint32, target uint32) bool {
24 | for _, v := range arr {
25 | if v == target {
26 | return true
27 | }
28 | }
29 | return false
30 | }
31 |
32 | func ArrayStringExists(arr []string, str string) bool {
33 | for _, v := range arr {
34 | if v == str {
35 | return true
36 | }
37 | }
38 | return false
39 | }
40 |
41 | // MergeArrayUint32 合并两个数组
42 | func MergeArrayUint32(target []uint32, source []uint32) []uint32 {
43 |
44 | for _, val := range source {
45 | if !BinarySearch(target, val) {
46 | target = append(target, val)
47 | }
48 | }
49 | return target
50 | }
51 |
52 | func Find(arr []uint32, target uint32) int {
53 | for index, v := range arr {
54 | if v == target {
55 | return index
56 | }
57 | }
58 | return -1
59 | }
60 |
--------------------------------------------------------------------------------
/searcher/container.go:
--------------------------------------------------------------------------------
1 | package searcher
2 |
3 | import (
4 | "errors"
5 | "fmt"
6 | "github.com/sea-team/gofound/searcher/words"
7 | "log"
8 | "os"
9 | "runtime"
10 | "unsafe"
11 | )
12 |
13 | type Container struct {
14 | Dir string //文件夹
15 | engines map[string]*Engine //引擎
16 | Debug bool //调试
17 | Tokenizer *words.Tokenizer //分词器
18 | Shard int //分片
19 | Timeout int64 //超时关闭数据库
20 | BufferNum int //分片缓冲数
21 | }
22 |
23 | func (c *Container) Init() error {
24 |
25 | c.engines = make(map[string]*Engine)
26 |
27 | //读取当前路径下的所有目录,就是数据库名称
28 | dirs, err := os.ReadDir(c.Dir)
29 | if err != nil {
30 | if os.IsNotExist(err) {
31 | //创建
32 | err := os.MkdirAll(c.Dir, os.ModePerm)
33 | if err != nil {
34 | return err
35 | }
36 | } else {
37 | return err
38 | }
39 | }
40 | //初始化数据库
41 | for _, dir := range dirs {
42 | if dir.IsDir() {
43 | c.engines[dir.Name()] = c.GetDataBase(dir.Name())
44 | log.Println("db:", dir.Name())
45 | }
46 | }
47 |
48 | return nil
49 | }
50 |
51 | // NewEngine 创建一个引擎
52 | func (c *Container) NewEngine(name string) *Engine {
53 | var engine = &Engine{
54 | IndexPath: fmt.Sprintf("%s%c%s", c.Dir, os.PathSeparator, name),
55 | DatabaseName: name,
56 | Tokenizer: c.Tokenizer,
57 | Shard: c.Shard,
58 | Timeout: c.Timeout,
59 | BufferNum: c.BufferNum,
60 | }
61 | option := engine.GetOptions()
62 |
63 | engine.InitOption(option)
64 | engine.IsDebug = c.Debug
65 |
66 | return engine
67 | }
68 |
69 | // GetDataBase 获取或创建引擎
70 | func (c *Container) GetDataBase(name string) *Engine {
71 |
72 | //默认数据库名为default
73 | if name == "" {
74 | name = "default"
75 | }
76 |
77 | //log.Println("Get DataBase:", name)
78 | engine, ok := c.engines[name]
79 | if !ok {
80 | //创建引擎
81 | engine = c.NewEngine(name)
82 | c.engines[name] = engine
83 | //释放引擎
84 | }
85 |
86 | return engine
87 | }
88 |
89 | // GetDataBases 获取数据库列表
90 | func (c *Container) GetDataBases() map[string]*Engine {
91 | for _, engine := range c.engines {
92 | size := unsafe.Sizeof(&engine)
93 | fmt.Printf("%s:%d\n", engine.DatabaseName, size)
94 | }
95 | return c.engines
96 | }
97 |
98 | func (c *Container) GetDataBaseNumber() int {
99 | return len(c.engines)
100 | }
101 |
102 | func (c *Container) GetIndexCount() int64 {
103 | var count int64
104 | for _, engine := range c.engines {
105 | count += engine.GetIndexCount()
106 | }
107 | return count
108 | }
109 |
110 | func (c *Container) GetDocumentCount() int64 {
111 | var count int64
112 | for _, engine := range c.engines {
113 | count += engine.GetDocumentCount()
114 | }
115 | return count
116 | }
117 |
118 | // DropDataBase 删除数据库
119 | func (c *Container) DropDataBase(name string) error {
120 | if _, ok := c.engines[name]; !ok {
121 | return errors.New("数据库不存在")
122 | }
123 | err := c.engines[name].Drop()
124 | if err != nil {
125 | return err
126 | }
127 |
128 | delete(c.engines, name)
129 | //释放资源
130 | runtime.GC()
131 |
132 | return nil
133 | }
134 |
--------------------------------------------------------------------------------
/searcher/container_test.go:
--------------------------------------------------------------------------------
1 | package searcher
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 | )
7 |
8 | func TestContainer_Init(t *testing.T) {
9 | c := &Container{
10 | Dir: "/Users/panjing/GolandProjects/github.com/sea-team/gofound/dbs",
11 | Debug: true,
12 | }
13 | err := c.Init()
14 | if err != nil {
15 | panic(err)
16 | }
17 |
18 | test := c.GetDataBase("test")
19 |
20 | fmt.Println(test.GetIndexCount())
21 |
22 | all := c.GetDataBases()
23 | for name, engine := range all {
24 | fmt.Println(name)
25 | fmt.Println(engine)
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/searcher/engine.go:
--------------------------------------------------------------------------------
1 | package searcher
2 |
3 | import (
4 | "fmt"
5 | "github.com/sea-team/gofound/searcher/arrays"
6 | "github.com/sea-team/gofound/searcher/model"
7 | "github.com/sea-team/gofound/searcher/pagination"
8 | "github.com/sea-team/gofound/searcher/sorts"
9 | "github.com/sea-team/gofound/searcher/storage"
10 | "github.com/sea-team/gofound/searcher/utils"
11 | "github.com/sea-team/gofound/searcher/words"
12 | "log"
13 | "os"
14 | "runtime"
15 | "sort"
16 | "strings"
17 | "sync"
18 | "time"
19 |
20 | "github.com/Knetic/govaluate"
21 | "github.com/syndtr/goleveldb/leveldb/errors"
22 | )
23 |
24 | type Engine struct {
25 | IndexPath string //索引文件存储目录
26 | Option *Option //配置
27 |
28 | invertedIndexStorages []*storage.LeveldbStorage //关键字和Id映射,倒排索引,key=id,value=[]words
29 | positiveIndexStorages []*storage.LeveldbStorage //ID和key映射,用于计算相关度,一个id 对应多个key,正排索引
30 | docStorages []*storage.LeveldbStorage //文档仓
31 |
32 | sync.Mutex //锁
33 | sync.WaitGroup //等待
34 | addDocumentWorkerChan []chan *model.IndexDoc //添加索引的通道
35 | IsDebug bool //是否调试模式
36 | Tokenizer *words.Tokenizer //分词器
37 | DatabaseName string //数据库名
38 |
39 | Shard int //分片数
40 | Timeout int64 //超时时间,单位秒
41 | BufferNum int //分片缓冲数
42 |
43 | documentCount int64 //文档总数量
44 | }
45 |
46 | type Option struct {
47 | InvertedIndexName string //倒排索引
48 | PositiveIndexName string //正排索引
49 | DocIndexName string //文档存储
50 | }
51 |
52 | // Init 初始化索引引擎
53 | func (e *Engine) Init() {
54 | e.Add(1)
55 | defer e.Done()
56 |
57 | if e.Option == nil {
58 | e.Option = e.GetOptions()
59 | }
60 | if e.Timeout == 0 {
61 | e.Timeout = 10 * 3 // 默认30s
62 | }
63 | //-1代表没有初始化
64 | e.documentCount = -1
65 | //log.Println("数据存储目录:", e.IndexPath)
66 | log.Println("chain num:", e.Shard*e.BufferNum)
67 |
68 | e.addDocumentWorkerChan = make([]chan *model.IndexDoc, e.Shard)
69 | //初始化文件存储
70 | for shard := 0; shard < e.Shard; shard++ {
71 |
72 | //初始化chan
73 | worker := make(chan *model.IndexDoc, e.BufferNum)
74 | e.addDocumentWorkerChan[shard] = worker
75 |
76 | //初始化chan
77 | go e.DocumentWorkerExec(worker)
78 |
79 | s, err := storage.NewStorage(e.getFilePath(fmt.Sprintf("%s_%d", e.Option.DocIndexName, shard)), e.Timeout)
80 | if err != nil {
81 | panic(err)
82 | }
83 | e.docStorages = append(e.docStorages, s)
84 |
85 | //初始化Keys存储
86 | ks, kerr := storage.NewStorage(e.getFilePath(fmt.Sprintf("%s_%d", e.Option.InvertedIndexName, shard)), e.Timeout)
87 | if kerr != nil {
88 | panic(err)
89 | }
90 | e.invertedIndexStorages = append(e.invertedIndexStorages, ks)
91 |
92 | //id和keys映射
93 | iks, ikerr := storage.NewStorage(e.getFilePath(fmt.Sprintf("%s_%d", e.Option.PositiveIndexName, shard)), e.Timeout)
94 | if ikerr != nil {
95 | panic(ikerr)
96 | }
97 | e.positiveIndexStorages = append(e.positiveIndexStorages, iks)
98 | }
99 | go e.automaticGC()
100 | //log.Println("初始化完成")
101 | }
102 |
103 | // 自动保存索引,10秒钟检测一次
104 | func (e *Engine) automaticGC() {
105 | ticker := time.NewTicker(time.Second * 10)
106 | for {
107 | <-ticker.C
108 | //定时GC
109 | runtime.GC()
110 | if e.IsDebug {
111 | log.Println("waiting:", e.GetQueue())
112 | }
113 | }
114 | }
115 |
116 | func (e *Engine) IndexDocument(doc *model.IndexDoc) error {
117 | //数量增加
118 | e.documentCount++
119 | e.addDocumentWorkerChan[e.getShard(doc.Id)] <- doc
120 | return nil
121 | /*
122 | select {
123 | case e.addDocumentWorkerChan[e.getShard(doc.Id)] <- doc:
124 | e.documentCount++
125 | default:
126 | return errors.New("处理缓冲已满")
127 | }
128 | return nil
129 | */
130 | }
131 |
132 | // GetQueue 获取队列剩余
133 | func (e *Engine) GetQueue() int {
134 | total := 0
135 | for _, v := range e.addDocumentWorkerChan {
136 | total += len(v)
137 | }
138 | return total
139 | }
140 |
141 | // DocumentWorkerExec 添加文档队列
142 | func (e *Engine) DocumentWorkerExec(worker chan *model.IndexDoc) {
143 | for {
144 | doc := <-worker
145 | e.AddDocument(doc)
146 | }
147 | }
148 |
149 | // getShard 计算索引分布在哪个文件块
150 | func (e *Engine) getShard(id uint32) int {
151 | return int(id % uint32(e.Shard))
152 | }
153 |
154 | func (e *Engine) getShardByWord(word string) int {
155 |
156 | return int(utils.StringToInt(word) % uint32(e.Shard))
157 | }
158 |
159 | func (e *Engine) InitOption(option *Option) {
160 |
161 | if option == nil {
162 | //默认值
163 | option = e.GetOptions()
164 | }
165 | e.Option = option
166 | //shard默认值
167 | if e.Shard <= 0 {
168 | e.Shard = 10
169 | }
170 | if e.BufferNum <= 0 {
171 | e.BufferNum = 1000
172 | }
173 | //初始化其他的
174 | e.Init()
175 |
176 | }
177 |
178 | func (e *Engine) getFilePath(fileName string) string {
179 | return e.IndexPath + string(os.PathSeparator) + fileName
180 | }
181 |
182 | func (e *Engine) GetOptions() *Option {
183 | return &Option{
184 | DocIndexName: "docs",
185 | InvertedIndexName: "inverted_index",
186 | PositiveIndexName: "positive_index",
187 | }
188 | }
189 |
190 | // AddDocument 分词索引
191 | func (e *Engine) AddDocument(index *model.IndexDoc) {
192 | //等待初始化完成
193 | e.Wait()
194 | text := index.Text
195 |
196 | splitWords := e.Tokenizer.Cut(text)
197 |
198 | id := index.Id
199 | // 检查是否需要更新倒排索引 words变更/id不存在
200 | inserts, needUpdateInverted := e.optimizeIndex(id, splitWords)
201 |
202 | // 将新增的word剔出单独处理,减少I/O操作
203 | if needUpdateInverted {
204 | for _, word := range inserts {
205 | e.addInvertedIndex(word, id)
206 | }
207 | }
208 |
209 | // TODO: 是否需要更新正排索引 - 检测document变更
210 | e.addPositiveIndex(index, splitWords)
211 | }
212 |
213 | // 添加倒排索引
214 | func (e *Engine) addInvertedIndex(word string, id uint32) {
215 | e.Lock()
216 | defer e.Unlock()
217 |
218 | shard := e.getShardByWord(word)
219 |
220 | s := e.invertedIndexStorages[shard]
221 |
222 | //string作为key
223 | key := []byte(word)
224 |
225 | //存在
226 | //添加到列表
227 | buf, find := s.Get(key)
228 | ids := make([]uint32, 0)
229 | if find {
230 | utils.Decoder(buf, &ids)
231 | }
232 |
233 | if !arrays.ArrayUint32Exists(ids, id) {
234 | ids = append(ids, id)
235 | }
236 |
237 | s.Set(key, utils.Encoder(ids))
238 | }
239 |
240 | // 移除删去的词
241 | func (e *Engine) optimizeIndex(id uint32, newWords []string) ([]string, bool) {
242 | // 判断id是否存在
243 | e.Lock()
244 | defer e.Unlock()
245 |
246 | // 计算差值
247 | removes, inserts, changed := e.getDifference(id, newWords)
248 | if changed {
249 | if removes != nil && len(removes) > 0 {
250 | // 移除正排索引
251 | for _, word := range removes {
252 | e.removeIdInWordIndex(id, word)
253 | }
254 | }
255 | }
256 | return inserts, changed
257 | }
258 |
259 | func (e *Engine) removeIdInWordIndex(id uint32, word string) {
260 |
261 | shard := e.getShardByWord(word)
262 |
263 | wordStorage := e.invertedIndexStorages[shard]
264 |
265 | //string作为key
266 | key := []byte(word)
267 |
268 | buf, found := wordStorage.Get(key)
269 | if found {
270 | ids := make([]uint32, 0)
271 | utils.Decoder(buf, &ids)
272 |
273 | //移除
274 | index := arrays.Find(ids, id)
275 | if index != -1 {
276 | ids = utils.DeleteArray(ids, index)
277 | if len(ids) == 0 {
278 | err := wordStorage.Delete(key)
279 | if err != nil {
280 | panic(err)
281 | }
282 | } else {
283 | wordStorage.Set(key, utils.Encoder(ids))
284 | }
285 | }
286 | }
287 |
288 | }
289 |
290 | // 计算差值
291 | // @return []string: 需要删除的词
292 | // @return bool : words出现变更返回true,否则返回false
293 | func (e *Engine) getDifference(id uint32, newWords []string) ([]string, []string, bool) {
294 | shard := e.getShard(id)
295 | wordStorage := e.positiveIndexStorages[shard]
296 | key := utils.Uint32ToBytes(id)
297 | buf, found := wordStorage.Get(key)
298 | if found {
299 | oldWords := make([]string, 0)
300 | utils.Decoder(buf, &oldWords)
301 |
302 | // 计算需要移除的
303 | removes := make([]string, 0)
304 | for _, word := range oldWords {
305 | // 旧的在新的里面不存在,就是需要移除的
306 | if !arrays.ArrayStringExists(newWords, word) {
307 | removes = append(removes, word)
308 | }
309 | }
310 | // 计算需要新增的
311 | inserts := make([]string, 0)
312 | for _, word := range newWords {
313 | if !arrays.ArrayStringExists(oldWords, word) {
314 | inserts = append(inserts, word)
315 | }
316 | }
317 | if len(removes) != 0 || len(inserts) != 0 {
318 | return removes, inserts, true
319 | }
320 | // 没有改变
321 | return removes, inserts, false
322 | }
323 | // id不存在,相当于insert
324 | return nil, newWords, true
325 | }
326 |
327 | // 添加正排索引 id=>keys id=>doc
328 | func (e *Engine) addPositiveIndex(index *model.IndexDoc, keys []string) {
329 | e.Lock()
330 | defer e.Unlock()
331 |
332 | key := utils.Uint32ToBytes(index.Id)
333 | shard := e.getShard(index.Id)
334 | docStorage := e.docStorages[shard]
335 |
336 | //id和key的映射
337 | positiveIndexStorage := e.positiveIndexStorages[shard]
338 |
339 | doc := &model.StorageIndexDoc{
340 | IndexDoc: index,
341 | Keys: keys,
342 | }
343 |
344 | //存储id和key以及文档的映射
345 | docStorage.Set(key, utils.Encoder(doc))
346 |
347 | //设置到id和key的映射中
348 | positiveIndexStorage.Set(key, utils.Encoder(keys))
349 | }
350 |
351 | // MultiSearch 多线程搜索
352 | func (e *Engine) MultiSearch(request *model.SearchRequest) (*model.SearchResult, error) {
353 | //等待搜索初始化完成
354 | e.Wait()
355 |
356 | //分词搜索
357 | words := e.Tokenizer.Cut(request.Query)
358 |
359 | fastSort := &sorts.FastSort{
360 | IsDebug: e.IsDebug,
361 | Order: request.Order,
362 | }
363 |
364 | _time := utils.ExecTime(func() {
365 |
366 | base := len(words)
367 | wg := &sync.WaitGroup{}
368 | wg.Add(base)
369 |
370 | for _, word := range words {
371 | go e.processKeySearch(word, fastSort, wg)
372 | }
373 | wg.Wait()
374 | })
375 | if e.IsDebug {
376 | log.Println("搜索时间:", _time, "ms")
377 | }
378 | // 处理分页
379 | request = request.GetAndSetDefault()
380 |
381 | //计算交集得分和去重
382 | fastSort.Process()
383 |
384 | wordMap := make(map[string]bool)
385 | for _, word := range words {
386 | wordMap[word] = true
387 | }
388 |
389 | //读取文档
390 | var result = &model.SearchResult{
391 | Total: fastSort.Count(),
392 | Page: request.Page,
393 | Limit: request.Limit,
394 | Words: words,
395 | }
396 |
397 | t, err := utils.ExecTimeWithError(func() error {
398 |
399 | pager := new(pagination.Pagination)
400 |
401 | pager.Init(request.Limit, fastSort.Count())
402 | //设置总页数
403 | result.PageCount = pager.PageCount
404 |
405 | //读取单页的id
406 | if pager.PageCount != 0 {
407 |
408 | start, end := pager.GetPage(request.Page)
409 | if request.ScoreExp != "" {
410 | // 分数表达式不为空,获取所有的数据
411 | start, end = 0, pager.Total
412 | }
413 |
414 | var resultItems = make([]model.SliceItem, 0)
415 | fastSort.GetAll(&resultItems, start, end)
416 |
417 | count := len(resultItems)
418 |
419 | result.Documents = make([]model.ResponseDoc, count)
420 | //只读取前面100个
421 | wg := new(sync.WaitGroup)
422 | wg.Add(count)
423 | for index, item := range resultItems {
424 | go e.getDocument(item, &result.Documents[index], request, &wordMap, wg)
425 | }
426 | wg.Wait()
427 | if request.ScoreExp != "" {
428 | // 生成计算表达式
429 | exp, err := govaluate.NewEvaluableExpression(request.ScoreExp)
430 | if err != nil {
431 | return err
432 | }
433 | parameters := make(map[string]interface{})
434 | // 根据表达式计算分数
435 | for i, doc := range result.Documents {
436 | parameters["score"] = doc.Score
437 | for k, v := range doc.Document {
438 | parameters["document."+k] = v
439 | }
440 | val, err := exp.Evaluate(parameters)
441 | if err != nil {
442 | log.Printf("表达式执行'%v'错误: %v 值内容: %v", request.ScoreExp, err, parameters)
443 | } else {
444 | result.Documents[i].Score = int(val.(float64))
445 | }
446 | }
447 | if request.Order == "desc" {
448 | sort.Sort(sort.Reverse(model.ResponseDocSort(result.Documents)))
449 | } else {
450 | sort.Sort(model.ResponseDocSort(result.Documents))
451 | }
452 | // 取出page
453 | start, end := pager.GetPage(request.Page)
454 | result.Documents = result.Documents[start:end]
455 | }
456 | }
457 | return nil
458 | })
459 | if e.IsDebug {
460 | log.Println("处理数据耗时:", _time, "ms")
461 | }
462 | if err != nil {
463 | return nil, err
464 | }
465 | result.Time = _time + t
466 |
467 | return result, nil
468 | }
469 |
470 | func (e *Engine) getDocument(item model.SliceItem, doc *model.ResponseDoc, request *model.SearchRequest, wordMap *map[string]bool, wg *sync.WaitGroup) {
471 | buf := e.GetDocById(item.Id)
472 | defer wg.Done()
473 | doc.Score = item.Score
474 |
475 | if buf != nil {
476 | //gob解析
477 | storageDoc := new(model.StorageIndexDoc)
478 | utils.Decoder(buf, &storageDoc)
479 | doc.Document = storageDoc.Document
480 | doc.Keys = storageDoc.Keys
481 | text := storageDoc.Text
482 | //处理关键词高亮
483 | highlight := request.Highlight
484 | if highlight != nil {
485 | //全部小写
486 | text = strings.ToLower(text)
487 | //还可以优化,只替换击中的词
488 | for _, key := range storageDoc.Keys {
489 | if ok := (*wordMap)[key]; ok {
490 | text = strings.ReplaceAll(text, key, fmt.Sprintf("%s%s%s", highlight.PreTag, key, highlight.PostTag))
491 | }
492 | }
493 | //放置原始文本
494 | doc.OriginalText = storageDoc.Text
495 | }
496 | doc.Text = text
497 | doc.Id = item.Id
498 |
499 | }
500 |
501 | }
502 |
503 | func (e *Engine) processKeySearch(word string, fastSort *sorts.FastSort, wg *sync.WaitGroup) {
504 | defer wg.Done()
505 |
506 | shard := e.getShardByWord(word)
507 | //读取id
508 | invertedIndexStorage := e.invertedIndexStorages[shard]
509 | key := []byte(word)
510 |
511 | buf, find := invertedIndexStorage.Get(key)
512 | if find {
513 | ids := make([]uint32, 0)
514 | //解码
515 | utils.Decoder(buf, &ids)
516 | fastSort.Add(&ids)
517 | }
518 |
519 | }
520 |
521 | // GetIndexCount 获取索引数量
522 | func (e *Engine) GetIndexCount() int64 {
523 | var size int64
524 | for i := 0; i < e.Shard; i++ {
525 | size += e.invertedIndexStorages[i].GetCount()
526 | }
527 | return size
528 | }
529 |
530 | // GetDocumentCount 获取文档数量
531 | func (e *Engine) GetDocumentCount() int64 {
532 | if e.documentCount == -1 {
533 | var count int64
534 | //使用多线程加速统计
535 | wg := sync.WaitGroup{}
536 | wg.Add(e.Shard)
537 | //这里的统计可能会出现数据错误,因为没加锁
538 | for i := 0; i < e.Shard; i++ {
539 | go func(i int) {
540 | count += e.docStorages[i].GetCount()
541 | wg.Done()
542 | }(i)
543 | }
544 | wg.Wait()
545 | e.documentCount = count
546 | }
547 |
548 | return e.documentCount
549 | }
550 |
551 | // GetDocById 通过id获取文档
552 | func (e *Engine) GetDocById(id uint32) []byte {
553 | shard := e.getShard(id)
554 | key := utils.Uint32ToBytes(id)
555 | buf, found := e.docStorages[shard].Get(key)
556 | if found {
557 | return buf
558 | }
559 |
560 | return nil
561 | }
562 |
563 | // RemoveIndex 根据ID移除索引
564 | func (e *Engine) RemoveIndex(id uint32) error {
565 | //移除
566 | e.Lock()
567 | defer e.Unlock()
568 |
569 | shard := e.getShard(id)
570 | key := utils.Uint32ToBytes(id)
571 |
572 | //关键字和Id映射
573 | //invertedIndexStorages []*storage.LeveldbStorage
574 | //ID和key映射,用于计算相关度,一个id 对应多个key
575 | ik := e.positiveIndexStorages[shard]
576 | keysValue, found := ik.Get(key)
577 | if !found {
578 | return errors.New(fmt.Sprintf("没有找到id=%d", id))
579 | }
580 |
581 | keys := make([]string, 0)
582 | utils.Decoder(keysValue, &keys)
583 |
584 | //符合条件的key,要移除id
585 | for _, word := range keys {
586 | e.removeIdInWordIndex(id, word)
587 | }
588 |
589 | //删除id映射
590 | err := ik.Delete(key)
591 | if err != nil {
592 | return errors.New(err.Error())
593 | }
594 |
595 | //文档仓
596 | err = e.docStorages[shard].Delete(key)
597 | if err != nil {
598 | return err
599 | }
600 | //减少数量
601 | e.documentCount--
602 |
603 | return nil
604 | }
605 |
606 | func (e *Engine) Close() {
607 | e.Lock()
608 | defer e.Unlock()
609 |
610 | for i := 0; i < e.Shard; i++ {
611 | e.invertedIndexStorages[i].Close()
612 | e.positiveIndexStorages[i].Close()
613 | }
614 | }
615 |
616 | // Drop 删除
617 | func (e *Engine) Drop() error {
618 | e.Lock()
619 | defer e.Unlock()
620 | //删除文件
621 | if err := os.RemoveAll(e.IndexPath); err != nil {
622 | return err
623 | }
624 |
625 | //清空内存
626 | for i := 0; i < e.Shard; i++ {
627 | e.docStorages = make([]*storage.LeveldbStorage, 0)
628 | e.invertedIndexStorages = make([]*storage.LeveldbStorage, 0)
629 | e.positiveIndexStorages = make([]*storage.LeveldbStorage, 0)
630 | }
631 |
632 | return nil
633 | }
634 |
--------------------------------------------------------------------------------
/searcher/model/doc.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | // IndexDoc 索引实体
4 | type IndexDoc struct {
5 | Id uint32 `json:"id,omitempty"`
6 | Text string `json:"text,omitempty"`
7 | Document map[string]interface{} `json:"document,omitempty"`
8 | }
9 |
10 | // StorageIndexDoc 文档对象
11 | type StorageIndexDoc struct {
12 | *IndexDoc
13 | Keys []string `json:"keys,omitempty"`
14 | }
15 |
16 | type ResponseDoc struct {
17 | IndexDoc
18 | OriginalText string `json:"originalText,omitempty"`
19 | Score int `json:"score,omitempty"` //得分
20 | Keys []string `json:"keys,omitempty"`
21 | }
22 |
23 | type RemoveIndexModel struct {
24 | Id uint32 `json:"id,omitempty"`
25 | }
26 |
27 | type ResponseDocSort []ResponseDoc
28 |
29 | func (r ResponseDocSort) Len() int {
30 | return len(r)
31 | }
32 |
33 | func (r ResponseDocSort) Less(i, j int) bool {
34 | return r[i].Score < r[j].Score
35 | }
36 |
37 | func (r ResponseDocSort) Swap(i, j int) {
38 | r[i], r[j] = r[j], r[i]
39 | }
40 |
--------------------------------------------------------------------------------
/searcher/model/item.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | type SliceItem struct {
4 | Id uint32
5 | Score int
6 | }
7 |
--------------------------------------------------------------------------------
/searcher/model/search.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | // Highlight 关键词高亮
4 | type Highlight struct {
5 | PreTag string `json:"preTag"` //高亮前缀
6 | PostTag string `json:"postTag"` //高亮后缀
7 | }
8 |
9 | // SearchRequest 搜索请求
10 | type SearchRequest struct {
11 | Query string `json:"query,omitempty" form:"database"` // 搜索关键词
12 | Order string `json:"order,omitempty" form:"database"` // 排序类型
13 | ScoreExp string `json:"scoreExp,omitempty" form:"scoreExp"` // 分数计算表达式
14 | Page int `json:"page,omitempty" form:"database"` // 页码
15 | Limit int `json:"limit,omitempty" form:"database"` // 每页大小,最大1000,超过报错
16 | Highlight *Highlight `json:"highlight,omitempty" form:"database"` // 关键词高亮
17 | Database string `json:"database" form:"database"` // 数据库名字
18 | }
19 |
20 | func (s *SearchRequest) GetAndSetDefault() *SearchRequest {
21 |
22 | if s.Limit == 0 {
23 | s.Limit = 100
24 | }
25 | if s.Page == 0 {
26 | s.Page = 1
27 | }
28 |
29 | if s.Order == "" {
30 | s.Order = "desc"
31 | }
32 |
33 | return s
34 | }
35 |
36 | // SearchResult 搜索响应
37 | type SearchResult struct {
38 | Time float64 `json:"time,omitempty"` //查询用时
39 | Total int `json:"total"` //总数
40 | PageCount int `json:"pageCount"` //总页数
41 | Page int `json:"page,omitempty"` //页码
42 | Limit int `json:"limit,omitempty"` //页大小
43 | Documents []ResponseDoc `json:"documents,omitempty"` //文档
44 | Words []string `json:"words,omitempty"` //搜索关键词
45 | }
46 |
--------------------------------------------------------------------------------
/searcher/pagination/page_test.go:
--------------------------------------------------------------------------------
1 | package pagination
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 | )
7 |
8 | func TestPagination_GetPage(t *testing.T) {
9 | pagination := new(Pagination)
10 |
11 | var data []int64
12 | for i := 0; i < 100; i++ {
13 | data = append(data, int64(i))
14 | }
15 |
16 | pagination.Init(10, 100)
17 |
18 | for i := 1; i <= 10; i++ {
19 | start, end := pagination.GetPage(i)
20 | fmt.Println(start, end)
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/searcher/pagination/pagination.go:
--------------------------------------------------------------------------------
1 | package pagination
2 |
3 | import (
4 | "math"
5 | )
6 |
7 | type Pagination struct {
8 | Limit int //限制大小
9 |
10 | PageCount int //总页数
11 | Total int //总数据量
12 | }
13 |
14 | func (p *Pagination) Init(limit int, total int) {
15 | p.Limit = limit
16 |
17 | //计算总页数
18 |
19 | p.Total = total
20 |
21 | pageCount := math.Ceil(float64(total) / float64(limit))
22 | p.PageCount = int(pageCount)
23 |
24 | }
25 |
26 | func (p *Pagination) GetPage(page int) (s int, e int) {
27 | //获取指定页数的数据
28 | if page > p.PageCount {
29 | page = p.PageCount
30 | }
31 | if page < 0 {
32 | page = 1
33 | }
34 |
35 | //从1开始
36 | page -= 1
37 |
38 | //计算起始位置
39 | start := page * p.Limit
40 | end := start + p.Limit
41 |
42 | if start > p.Total {
43 | return 0, p.Total - 1
44 | }
45 | if end > p.Total {
46 | end = p.Total
47 | }
48 |
49 | return start, end
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/searcher/sorts/fast.go:
--------------------------------------------------------------------------------
1 | package sorts
2 |
3 | import (
4 | "github.com/sea-team/gofound/searcher/model"
5 | "sort"
6 | "strings"
7 | "sync"
8 | )
9 |
10 | const (
11 | DESC = "desc"
12 | )
13 |
14 | type ScoreSlice []model.SliceItem
15 |
16 | func (x ScoreSlice) Len() int {
17 | return len(x)
18 | }
19 | func (x ScoreSlice) Less(i, j int) bool {
20 | return x[i].Score < x[j].Score
21 | }
22 | func (x ScoreSlice) Swap(i, j int) {
23 | x[i], x[j] = x[j], x[i]
24 | }
25 |
26 | type SortSlice []uint32
27 |
28 | func (x SortSlice) Len() int {
29 | return len(x)
30 | }
31 | func (x SortSlice) Less(i, j int) bool {
32 | return x[i] < x[j]
33 | }
34 | func (x SortSlice) Swap(i, j int) {
35 | x[i], x[j] = x[j], x[i]
36 |
37 | }
38 |
39 | type Uint32Slice []uint32
40 |
41 | func (x Uint32Slice) Len() int { return len(x) }
42 | func (x Uint32Slice) Less(i, j int) bool { return x[i] < x[j] }
43 | func (x Uint32Slice) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
44 |
45 | type FastSort struct {
46 | sync.Mutex
47 |
48 | IsDebug bool
49 |
50 | data []model.SliceItem
51 |
52 | temps []uint32
53 |
54 | count int //总数
55 |
56 | Order string //排序方式
57 | }
58 |
59 | func (f *FastSort) Add(ids *[]uint32) {
60 | //f.Lock()
61 | //defer f.Unlock()
62 |
63 | //for _, id := range *ids {
64 | //
65 | // found, index := f.find(&id)
66 | // if found {
67 | // f.data[index].Score += 1
68 | // } else {
69 | //
70 | // f.data = append(f.data, model.SliceItem{
71 | // Id: id,
72 | // Score: 1,
73 | // })
74 | // f.Sort()
75 | // }
76 | //}
77 | //f.count = len(f.data)
78 | f.temps = append(f.temps, *ids...)
79 | }
80 |
81 | // 二分法查找
82 | func (f *FastSort) find(target *uint32) (bool, int) {
83 |
84 | low := 0
85 | high := f.count - 1
86 | for low <= high {
87 | mid := (low + high) / 2
88 | if f.data[mid].Id == *target {
89 | return true, mid
90 | } else if f.data[mid].Id > *target {
91 | high = mid - 1
92 | } else {
93 | low = mid + 1
94 | }
95 | }
96 | return false, -1
97 | //for index, item := range f.data {
98 | // if item.Id == *target {
99 | // return true, index
100 | // }
101 | //}
102 | //return false, -1
103 | }
104 |
105 | // Count 获取数量
106 | func (f *FastSort) Count() int {
107 | return f.count
108 | }
109 |
110 | // Sort 排序
111 | func (f *FastSort) Sort() {
112 | if strings.ToLower(f.Order) == DESC {
113 | sort.Sort(sort.Reverse(SortSlice(f.temps)))
114 | } else {
115 | sort.Sort(SortSlice(f.temps))
116 | }
117 | }
118 |
119 | // Process 处理数据
120 | func (f *FastSort) Process() {
121 | //计算重复
122 | f.Sort()
123 |
124 | for _, temp := range f.temps {
125 | if found, index := f.find(&temp); found {
126 | f.data[index].Score += 1
127 | } else {
128 | f.data = append(f.data, model.SliceItem{
129 | Id: temp,
130 | Score: 1,
131 | })
132 | f.count++
133 | }
134 | }
135 | //对分数进行排序
136 | sort.Sort(sort.Reverse(ScoreSlice(f.data)))
137 | }
138 | func (f *FastSort) GetAll(result *[]model.SliceItem, start int, end int) {
139 |
140 | *result = f.data[start:end]
141 | }
142 |
--------------------------------------------------------------------------------
/searcher/sorts/sort.go:
--------------------------------------------------------------------------------
1 | package sorts
2 |
3 | import (
4 | "github.com/emirpasic/gods/trees/avltree"
5 | "github.com/sea-team/gofound/searcher/utils"
6 | "log"
7 | "sync"
8 | )
9 |
10 | // IdSort 二叉树对id 进行打分和排序
11 | type IdSort struct {
12 | Tree *avltree.Tree
13 | sync.Mutex
14 | }
15 |
16 | func NewIdSortTree() *IdSort {
17 | return &IdSort{
18 | Tree: &avltree.Tree{Comparator: utils.Uint32Comparator},
19 | }
20 |
21 | }
22 | func (e *IdSort) Add(key uint32) {
23 | count, found := e.Tree.Get(key)
24 | val := 1
25 | if found {
26 | val = count.(int) + 1
27 | }
28 | e.Lock()
29 | defer e.Unlock()
30 | e.Tree.Put(key, val)
31 | }
32 |
33 | func (e *IdSort) Size() int {
34 | return e.Tree.Size()
35 | }
36 |
37 | // GetAll 正序获取
38 | func (e *IdSort) GetAll(order string) []uint32 {
39 | scores := make([]int, 0)
40 | ids := make([]uint32, 0)
41 | it := e.Tree.Iterator()
42 | _tt := utils.ExecTime(func() {
43 | for it.Next() {
44 | scores = append(scores, it.Value().(int))
45 | ids = append(ids, it.Key().(uint32))
46 | }
47 | })
48 | log.Println("迭代耗时:", _tt)
49 |
50 | _t := utils.ExecTime(func() {
51 | //ids 降序
52 | if order == "desc" {
53 | for i, j := 0, len(ids)-1; i < j; i, j = i+1, j-1 {
54 | ids[i], ids[j] = ids[j], ids[i]
55 | scores[i], scores[j] = scores[j], scores[i]
56 | }
57 | }
58 | })
59 | log.Println("id排序耗时:", _t)
60 |
61 | _t = utils.ExecTime(func() {
62 | // 排序,得分越高 排越前
63 | for i := 0; i < len(scores); i++ {
64 | for j := i + 1; j < len(scores); j++ {
65 | if scores[i] < scores[j] {
66 | scores[i], scores[j] = scores[j], scores[i]
67 | ids[i], ids[j] = ids[j], ids[i]
68 | }
69 | }
70 | }
71 | })
72 |
73 | log.Println("得分排序耗时:", _t)
74 |
75 | return ids
76 | }
77 |
--------------------------------------------------------------------------------
/searcher/storage/leveldb_storage.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "log"
5 | "sync"
6 | "time"
7 |
8 | "github.com/syndtr/goleveldb/leveldb"
9 | "github.com/syndtr/goleveldb/leveldb/filter"
10 | "github.com/syndtr/goleveldb/leveldb/opt"
11 | )
12 |
13 | // LeveldbStorage TODO 要支持事务
14 | type LeveldbStorage struct {
15 | db *leveldb.DB
16 | path string
17 | mu sync.RWMutex //加锁
18 | closed bool
19 | timeout int64
20 | lastTime int64
21 | count int64
22 | }
23 |
24 | func (s *LeveldbStorage) autoOpenDB() {
25 | if s.isClosed() {
26 | s.ReOpen()
27 | }
28 | s.lastTime = time.Now().Unix()
29 | }
30 |
31 | // NewStorage 打开数据库
32 | func NewStorage(path string, timeout int64) (*LeveldbStorage, error) {
33 |
34 | db := &LeveldbStorage{
35 | path: path,
36 | closed: true,
37 | timeout: timeout,
38 | lastTime: time.Now().Unix(),
39 | }
40 |
41 | go db.task()
42 |
43 | return db, nil
44 | }
45 |
46 | func (s *LeveldbStorage) task() {
47 | if s.timeout == -1 {
48 | //不检查
49 | return
50 | }
51 | for {
52 |
53 | if !s.isClosed() && time.Now().Unix()-s.lastTime > s.timeout {
54 | s.Close()
55 | //log.Println("leveldb storage timeout", s.path)
56 | }
57 |
58 | time.Sleep(time.Duration(5) * time.Second)
59 |
60 | }
61 | }
62 |
63 | func openDB(path string) (*leveldb.DB, error) {
64 |
65 | ////使用布隆过滤器
66 | o := &opt.Options{
67 | Filter: filter.NewBloomFilter(10),
68 | }
69 |
70 | db, err := leveldb.OpenFile(path, o)
71 | return db, err
72 | }
73 | func (s *LeveldbStorage) ReOpen() {
74 | if !s.isClosed() {
75 | log.Println("db is not closed")
76 | return
77 | }
78 | s.mu.Lock()
79 | db, err := openDB(s.path)
80 | if err != nil {
81 | panic(err)
82 | }
83 | s.db = db
84 | s.closed = false
85 | s.mu.Unlock()
86 | //计算总条数
87 | go s.compute()
88 | }
89 |
90 | func (s *LeveldbStorage) Get(key []byte) ([]byte, bool) {
91 | s.autoOpenDB()
92 | buffer, err := s.db.Get(key, nil)
93 | if err != nil {
94 | return nil, false
95 | }
96 | return buffer, true
97 | }
98 |
99 | func (s *LeveldbStorage) Has(key []byte) bool {
100 | s.autoOpenDB()
101 | has, err := s.db.Has(key, nil)
102 | if err != nil {
103 | panic(err)
104 | }
105 | return has
106 | }
107 |
108 | func (s *LeveldbStorage) Set(key []byte, value []byte) {
109 | s.autoOpenDB()
110 | err := s.db.Put(key, value, nil)
111 | if err != nil {
112 | panic(err)
113 | }
114 | }
115 |
116 | // Delete 删除
117 | func (s *LeveldbStorage) Delete(key []byte) error {
118 | s.autoOpenDB()
119 | return s.db.Delete(key, nil)
120 | }
121 |
122 | // Close 关闭
123 | func (s *LeveldbStorage) Close() error {
124 | if s.isClosed() {
125 | return nil
126 | }
127 | s.mu.Lock()
128 | err := s.db.Close()
129 | if err != nil {
130 | return err
131 | }
132 | s.closed = true
133 | s.mu.Unlock()
134 | return nil
135 | }
136 |
137 | func (s *LeveldbStorage) isClosed() bool {
138 | s.mu.RLock()
139 | defer s.mu.RUnlock()
140 | return s.closed
141 | }
142 |
143 | func (s *LeveldbStorage) compute() {
144 | var count int64
145 | iter := s.db.NewIterator(nil, nil)
146 | for iter.Next() {
147 | count++
148 | }
149 | iter.Release()
150 | s.count = count
151 | }
152 |
153 | func (s *LeveldbStorage) GetCount() int64 {
154 | if s.count == 0 && s.isClosed() {
155 | s.ReOpen()
156 | s.compute()
157 | }
158 | return s.count
159 | }
160 |
--------------------------------------------------------------------------------
/searcher/storage/leveldb_test.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "fmt"
5 | "github.com/syndtr/goleveldb/leveldb"
6 | "testing"
7 | )
8 |
9 | func TestLeveldb(t *testing.T) {
10 | db, err := leveldb.OpenFile("/Users/panjing/GolandProjects/github.com/sea-team/gofound/cache/doc_6.db", nil)
11 | if err != nil {
12 | t.Fatal(err)
13 | }
14 | defer db.Close()
15 |
16 | //_time := utils.ExecTime(func() {
17 | //
18 | // for i := 0; i < 10000; i++ {
19 | // db.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)), nil)
20 | // }
21 | //})
22 | //fmt.Println("leveldb put 1000:", _time)
23 | db.Put([]byte("1"), []byte("1"), nil)
24 | value, err := db.Get([]byte("1"), nil)
25 | fmt.Println(string(value), err)
26 | }
27 |
--------------------------------------------------------------------------------
/searcher/system/cpu.go:
--------------------------------------------------------------------------------
1 | package system
2 |
3 | import (
4 | "github.com/shirou/gopsutil/v3/cpu"
5 | "runtime"
6 | "time"
7 | )
8 |
9 | type CPUStatus struct {
10 | Cores int `json:"cores"`
11 | UsedPercent float64 `json:"usedPercent"`
12 | ModelName string `json:"modelName"`
13 | }
14 |
15 | func GetCPUStatus() CPUStatus {
16 | percent, _ := cpu.Percent(time.Second, false)
17 | info, _ := cpu.Info()
18 | c := CPUStatus{
19 | UsedPercent: GetPercent(percent[0]),
20 | Cores: runtime.NumCPU(),
21 | ModelName: info[0].ModelName,
22 | }
23 |
24 | return c
25 | }
26 |
--------------------------------------------------------------------------------
/searcher/system/cpu_test.go:
--------------------------------------------------------------------------------
1 | package system
2 |
3 | import (
4 | "fmt"
5 | "github.com/shirou/gopsutil/v3/cpu"
6 | "testing"
7 | )
8 |
9 | func TestCPU(t *testing.T) {
10 | fmt.Println(GetCPUStatus())
11 | c, _ := cpu.Info()
12 | fmt.Println(c)
13 | }
14 |
--------------------------------------------------------------------------------
/searcher/system/disk.go:
--------------------------------------------------------------------------------
1 | package system
2 |
3 | import (
4 | "encoding/json"
5 | "github.com/shirou/gopsutil/v3/disk"
6 | )
7 |
8 | type DiskStatus struct {
9 | Total float64 `json:"total"`
10 | Used float64 `json:"used"`
11 | Free float64 `json:"free"`
12 | FsType string `json:"fsType"`
13 | UsedPercent float64 `json:"usedPercent"`
14 | Path string `json:"path"`
15 | }
16 |
17 | func (d *DiskStatus) String() string {
18 | buf, _ := json.Marshal(d)
19 | return string(buf)
20 | }
21 |
22 | func GetDiskStat() DiskStatus {
23 | parts, _ := disk.Partitions(true)
24 | diskInfo, _ := disk.Usage(parts[0].Mountpoint)
25 |
26 | d := DiskStatus{
27 | Path: diskInfo.Path,
28 | Total: GetUint64GB(diskInfo.Total),
29 | Free: GetUint64GB(diskInfo.Free),
30 | Used: GetUint64GB(diskInfo.Used),
31 | UsedPercent: GetPercent(diskInfo.UsedPercent),
32 | FsType: diskInfo.Fstype,
33 | }
34 | return d
35 | }
36 |
--------------------------------------------------------------------------------
/searcher/system/disk_test.go:
--------------------------------------------------------------------------------
1 | package system
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 | )
7 |
8 | func TestDisk(t *testing.T) {
9 |
10 | fmt.Println(GetDiskStat())
11 | }
12 |
--------------------------------------------------------------------------------
/searcher/system/mem.go:
--------------------------------------------------------------------------------
1 | package system
2 |
3 | import (
4 | "encoding/json"
5 | "github.com/shirou/gopsutil/v3/mem"
6 | "runtime"
7 | )
8 |
9 | type MemStatus struct {
10 | Total float64 `json:"total"`
11 | Used float64 `json:"used"`
12 | Free float64 `json:"free"`
13 | Self float64 `json:"self"`
14 | UsedPercent float64 `json:"usedPercent"`
15 | }
16 |
17 | func (m *MemStatus) String() string {
18 | buf, _ := json.Marshal(m)
19 | return string(buf)
20 | }
21 |
22 | func GetMemStat() MemStatus {
23 |
24 | //内存信息
25 | info, _ := mem.VirtualMemory()
26 | m := MemStatus{
27 | Total: GetUint64GB(info.Total),
28 | Used: GetUint64GB(info.Used),
29 | Free: GetUint64GB(info.Free),
30 | UsedPercent: GetPercent(info.UsedPercent),
31 | }
32 |
33 | //自身占用
34 | memStat := new(runtime.MemStats)
35 | runtime.ReadMemStats(memStat)
36 | m.Self = GetUint64GB(memStat.Alloc)
37 |
38 | return m
39 | }
40 |
--------------------------------------------------------------------------------
/searcher/system/mem_test.go:
--------------------------------------------------------------------------------
1 | package system
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 | )
7 |
8 | func TestMem(t *testing.T) {
9 |
10 | m := GetMemStat()
11 | fmt.Println(m)
12 | }
13 |
--------------------------------------------------------------------------------
/searcher/system/utils.go:
--------------------------------------------------------------------------------
1 | package system
2 |
3 | import (
4 | "fmt"
5 | "strconv"
6 | )
7 |
8 | func GetFloat64MB(size int64) float64 {
9 | val, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", float64(size)/1024/1024), 64)
10 | return val
11 | }
12 | func GetUint64GB(size uint64) float64 {
13 | val, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", float64(size)/1024/1024/1024), 64)
14 | return val
15 | }
16 |
17 | func GetPercent(val float64) float64 {
18 | v, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", val), 64)
19 | return v
20 | }
21 |
--------------------------------------------------------------------------------
/searcher/utils/utils.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "bytes"
5 | "encoding/binary"
6 | "encoding/gob"
7 | "io/fs"
8 | "os"
9 | "path/filepath"
10 | "regexp"
11 | "time"
12 | )
13 |
14 | func ExecTime(fn func()) float64 {
15 | start := time.Now()
16 | fn()
17 | tc := float64(time.Since(start).Nanoseconds())
18 | return tc / 1e6
19 | }
20 |
21 | func ExecTimeWithError(fn func() error) (float64, error) {
22 | start := time.Now()
23 | err := fn()
24 | tc := float64(time.Since(start).Nanoseconds())
25 | return tc / 1e6, err
26 | }
27 |
28 | func Encoder(data interface{}) []byte {
29 | if data == nil {
30 | return nil
31 | }
32 | buffer := new(bytes.Buffer)
33 | encoder := gob.NewEncoder(buffer)
34 | err := encoder.Encode(data)
35 | if err != nil {
36 | panic(err)
37 | }
38 | return buffer.Bytes()
39 | }
40 |
41 | func Decoder(data []byte, v interface{}) {
42 | if data == nil {
43 | return
44 | }
45 | buffer := bytes.NewBuffer(data)
46 | decoder := gob.NewDecoder(buffer)
47 | err := decoder.Decode(v)
48 | if err != nil {
49 | panic(err)
50 | }
51 | }
52 |
53 | const (
54 | c1 = 0xcc9e2d51
55 | c2 = 0x1b873593
56 | c3 = 0x85ebca6b
57 | c4 = 0xc2b2ae35
58 | r1 = 15
59 | r2 = 13
60 | m = 5
61 | n = 0xe6546b64
62 | )
63 |
64 | var (
65 | Seed = uint32(1)
66 | )
67 |
68 | func Murmur3(key []byte) (hash uint32) {
69 | hash = Seed
70 | iByte := 0
71 | for ; iByte+4 <= len(key); iByte += 4 {
72 | k := uint32(key[iByte]) | uint32(key[iByte+1])<<8 | uint32(key[iByte+2])<<16 | uint32(key[iByte+3])<<24
73 | k *= c1
74 | k = (k << r1) | (k >> (32 - r1))
75 | k *= c2
76 | hash ^= k
77 | hash = (hash << r2) | (hash >> (32 - r2))
78 | hash = hash*m + n
79 | }
80 |
81 | var remainingBytes uint32
82 | switch len(key) - iByte {
83 | case 3:
84 | remainingBytes += uint32(key[iByte+2]) << 16
85 | fallthrough
86 | case 2:
87 | remainingBytes += uint32(key[iByte+1]) << 8
88 | fallthrough
89 | case 1:
90 | remainingBytes += uint32(key[iByte])
91 | remainingBytes *= c1
92 | remainingBytes = (remainingBytes << r1) | (remainingBytes >> (32 - r1))
93 | remainingBytes = remainingBytes * c2
94 | hash ^= remainingBytes
95 | }
96 |
97 | hash ^= uint32(len(key))
98 | hash ^= hash >> 16
99 | hash *= c3
100 | hash ^= hash >> 13
101 | hash *= c4
102 | hash ^= hash >> 16
103 |
104 | // 出发吧,狗嬷嬷!
105 | return
106 | }
107 |
108 | // StringToInt 字符串转整数
109 | func StringToInt(value string) uint32 {
110 | return Murmur3([]byte(value))
111 | }
112 |
113 | func Uint32Comparator(a, b interface{}) int {
114 | aAsserted := a.(uint32)
115 | bAsserted := b.(uint32)
116 | switch {
117 | case aAsserted > bAsserted:
118 | return 1
119 | case aAsserted < bAsserted:
120 | return -1
121 | default:
122 | return 0
123 | }
124 | }
125 |
126 | func Uint32ToBytes(i uint32) []byte {
127 | var buf = make([]byte, 4)
128 | binary.BigEndian.PutUint32(buf, i)
129 | return buf
130 | }
131 |
132 | // QuickSortAsc 快速排序
133 | func QuickSortAsc(arr []int, start, end int, cmp func(int, int)) {
134 | if start < end {
135 | i, j := start, end
136 | key := arr[(start+end)/2]
137 | for i <= j {
138 | for arr[i] < key {
139 | i++
140 | }
141 | for arr[j] > key {
142 | j--
143 | }
144 | if i <= j {
145 | arr[i], arr[j] = arr[j], arr[i]
146 | if cmp != nil {
147 | cmp(i, j)
148 | }
149 | i++
150 | j--
151 | }
152 | }
153 |
154 | if start < j {
155 | QuickSortAsc(arr, start, j, cmp)
156 | }
157 | if end > i {
158 | QuickSortAsc(arr, i, end, cmp)
159 | }
160 | }
161 | }
162 | func DeleteArray(array []uint32, index int) []uint32 {
163 | return append(array[:index], array[index+1:]...)
164 | }
165 |
166 | func ReleaseAssets(file fs.File, out string) {
167 | if file == nil {
168 | return
169 | }
170 |
171 | if out == "" {
172 | panic("out is empty")
173 | }
174 |
175 | //判断out文件是否存在
176 | if _, err := os.Stat(out); os.IsNotExist(err) {
177 | //读取文件信息
178 | fileInfo, err := file.Stat()
179 | if err != nil {
180 | panic(err)
181 | }
182 | buffer := make([]byte, fileInfo.Size())
183 | _, err = file.Read(buffer)
184 | if err != nil {
185 | panic(err)
186 | }
187 |
188 | // 读取输出文件目录
189 | outDir := filepath.Dir(out)
190 | err = os.MkdirAll(outDir, os.ModePerm)
191 | if err != nil {
192 | panic(err)
193 | }
194 |
195 | //创建文件
196 | outFile, _ := os.Create(out)
197 | defer func(outFile *os.File) {
198 | err := outFile.Close()
199 | if err != nil {
200 | panic(err)
201 | }
202 | }(outFile)
203 |
204 | err = os.WriteFile(out, buffer, os.ModePerm)
205 | if err != nil {
206 | panic(err)
207 | }
208 | }
209 |
210 | }
211 |
212 | // DirSizeB DirSizeMB getFileSize get file size by path(B)
213 | func DirSizeB(path string) int64 {
214 | var size int64
215 | filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
216 | if !info.IsDir() {
217 | size += info.Size()
218 | }
219 | return err
220 | })
221 |
222 | return size
223 | }
224 |
225 | // RemovePunctuation 移除所有的标点符号
226 | func RemovePunctuation(str string) string {
227 | reg := regexp.MustCompile(`\p{P}+`)
228 | return reg.ReplaceAllString(str, "")
229 | }
230 |
231 | // RemoveSpace 移除所有的空格
232 | func RemoveSpace(str string) string {
233 | reg := regexp.MustCompile(`\s+`)
234 | return reg.ReplaceAllString(str, "")
235 | }
236 |
237 | // init 注册数据类型
238 | // 防止 gob: type not registered for interface: map[string]interface {}
239 | func init() {
240 | gob.Register(map[string]interface{}{})
241 | gob.Register([]interface{}{})
242 | }
243 |
--------------------------------------------------------------------------------
/searcher/words/tokenizer.go:
--------------------------------------------------------------------------------
1 | package words
2 |
3 | import (
4 | "embed"
5 | "github.com/sea-team/gofound/searcher/utils"
6 | "strings"
7 |
8 | "github.com/wangbin/jiebago"
9 | )
10 |
11 | var (
12 | //go:embed data/*.txt
13 | dictionaryFS embed.FS
14 | )
15 |
16 | type Tokenizer struct {
17 | seg jiebago.Segmenter
18 | }
19 |
20 | func NewTokenizer(dictionaryPath string) *Tokenizer {
21 | file, err := dictionaryFS.Open("data/dictionary.txt")
22 | if err != nil {
23 | panic(err)
24 | }
25 | utils.ReleaseAssets(file, dictionaryPath)
26 |
27 | tokenizer := &Tokenizer{}
28 |
29 | err = tokenizer.seg.LoadDictionary(dictionaryPath)
30 | if err != nil {
31 | panic(err)
32 | }
33 |
34 | return tokenizer
35 | }
36 |
37 | func (t *Tokenizer) Cut(text string) []string {
38 | //不区分大小写
39 | text = strings.ToLower(text)
40 | //移除所有的标点符号
41 | text = utils.RemovePunctuation(text)
42 | //移除所有的空格
43 | text = utils.RemoveSpace(text)
44 |
45 | var wordMap = make(map[string]struct{})
46 |
47 | resultChan := t.seg.CutForSearch(text, true)
48 | var wordsSlice []string
49 | for {
50 | w, ok := <-resultChan
51 | if !ok {
52 | break
53 | }
54 | _, found := wordMap[w]
55 | if !found {
56 | //去除重复的词
57 | wordMap[w] = struct{}{}
58 | wordsSlice = append(wordsSlice, w)
59 | }
60 | }
61 |
62 | return wordsSlice
63 | }
64 |
--------------------------------------------------------------------------------
/tests/array_test.go:
--------------------------------------------------------------------------------
1 | package tests
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 | )
7 |
8 | func DeleteArray(array []uint32, index int) []uint32 {
9 | return append(array[:index], array[index+1:]...)
10 | }
11 |
12 | func TestArray(t *testing.T) {
13 | array := []uint32{1}
14 | fmt.Println(DeleteArray(array, 0))
15 | }
16 |
--------------------------------------------------------------------------------
/tests/benchmark/array_test.go:
--------------------------------------------------------------------------------
1 | package benchmark
2 |
3 | import (
4 | "github.com/sea-team/gofound/searcher/arrays"
5 | "testing"
6 | )
7 |
8 | func Benchmark(b *testing.B) {
9 |
10 | //测试两种方法的性能
11 | size := 100
12 | arrayList := make([][]uint32, size)
13 | for i := 0; i < size; i++ {
14 | arrayList[i] = GetRandomUint32(1000)
15 | }
16 |
17 | b.Run("array", func(b *testing.B) {
18 | for i := 0; i < b.N; i++ {
19 | var temp []uint32
20 | for _, nums := range arrayList {
21 |
22 | for _, num := range nums {
23 | if !arrays.BinarySearch(temp, num) {
24 | temp = append(temp, num)
25 | }
26 | }
27 | }
28 | }
29 | })
30 |
31 | b.Run("sort", func(b *testing.B) {
32 | for i := 0; i < b.N; i++ {
33 | var temp []uint32
34 | for _, v := range arrayList {
35 | temp = append(temp, v...)
36 | }
37 | //去重
38 | var as []uint32
39 | for _, v := range temp {
40 | if !arrays.BinarySearch(as, v) {
41 | as = append(as, v)
42 | }
43 | }
44 | }
45 | })
46 | }
47 |
--------------------------------------------------------------------------------
/tests/benchmark/skiplist_test.go:
--------------------------------------------------------------------------------
1 | package benchmark
2 |
3 | import (
4 | "github.com/sea-team/gofound/searcher/arrays"
5 | "math/rand"
6 | "testing"
7 | )
8 | import "github.com/ryszard/goskiplist/skiplist"
9 |
10 | func BenchmarkSkipList(b *testing.B) {
11 |
12 | //产生1万个随机数
13 | var nums []int
14 | for i := 0; i < 10000; i++ {
15 | num := rand.Intn(100000)
16 | nums = append(nums, num)
17 | }
18 |
19 | b.ResetTimer()
20 |
21 | b.Run("skip", func(b *testing.B) {
22 | for i := 0; i < b.N; i++ {
23 | sl := skiplist.NewIntSet()
24 |
25 | for _, num := range nums {
26 | if !sl.Contains(num) {
27 | sl.Add(num)
28 | }
29 | }
30 | }
31 | })
32 |
33 | b.Run("binary", func(b *testing.B) {
34 | for i := 0; i < b.N; i++ {
35 | temps := make([]uint32, len(nums))
36 | for index, num := range nums {
37 | if !arrays.BinarySearch(temps, uint32(num)) {
38 | temps[index] = uint32(num)
39 | }
40 | }
41 | }
42 | })
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/tests/benchmark/utils.go:
--------------------------------------------------------------------------------
1 | package benchmark
2 |
3 | import "math/rand"
4 |
5 | func GetRandomUint32(n int) []uint32 {
6 | var array = make([]uint32, n)
7 | for i := 0; i < n; i++ {
8 | array[i] = rand.Uint32()
9 | }
10 | return array
11 | }
12 |
--------------------------------------------------------------------------------
/tests/chan_test.go:
--------------------------------------------------------------------------------
1 | package tests
2 |
3 | import (
4 | "fmt"
5 | "math/rand"
6 | "testing"
7 | "time"
8 | )
9 |
10 | func TestChan(t *testing.T) {
11 |
12 | data := make(chan int)
13 |
14 | go func() {
15 | for {
16 | time.Sleep(time.Second * 1)
17 | data <- rand.Intn(100)
18 | break
19 | }
20 | }()
21 |
22 | r := <-data
23 | fmt.Println(r)
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/tests/func_test.go:
--------------------------------------------------------------------------------
1 | package tests
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 | )
7 |
8 | type FuncTest struct {
9 | name string
10 | }
11 |
12 | func aa(a int, b *int, d *FuncTest) int {
13 | a = 111
14 | *b = 3
15 | fmt.Printf("b=%p\n", &b)
16 | fmt.Printf("d=%p\n", d)
17 | d.name = "aa"
18 | return a + *b
19 | }
20 |
21 | func TestA(t *testing.T) {
22 |
23 | var a int = 1
24 | var b int = 2
25 | d := &FuncTest{name: "test"}
26 |
27 | fmt.Printf("b=%p\n", &b)
28 | fmt.Printf("d=%p\n", d)
29 |
30 | fmt.Println(aa(a, &b, d))
31 | fmt.Println(d)
32 | }
33 |
--------------------------------------------------------------------------------
/tests/http/cut.http:
--------------------------------------------------------------------------------
1 | GET localhost:5678/api/word/cut?q=开发者程序员是什么职业
2 | Content-Type: application/json
3 |
4 | {}
5 |
6 | ###
7 |
--------------------------------------------------------------------------------
/tests/http/dump.http:
--------------------------------------------------------------------------------
1 | GET localhost:5678/api/dump
2 | Accept: application/json
3 |
4 | ###
5 |
--------------------------------------------------------------------------------
/tests/http/index.http:
--------------------------------------------------------------------------------
1 | POST localhost:5678/api/index
2 | Content-Type: application/json
3 |
4 | {
5 | "id": 1,
6 | "text": "上海哪里好玩",
7 | "document": {
8 | "title": "阿森松岛所445",
9 | "number": 223
10 | }
11 | }
12 |
13 | ###
14 |
15 | POST localhost:5678/api/index
16 | Content-Type: application/json
17 |
18 | {
19 | "id": 2,
20 | "text": "深圳哪里好玩",
21 | "document": {
22 | "title": "阿森松岛所445",
23 | "number": 223
24 | }
25 | }
26 |
27 | ###
28 |
29 | POST localhost:5678/api/index
30 | Content-Type: application/json
31 |
32 | {
33 | "id": 3,
34 | "text": "哪里是人最多的地方",
35 | "document": {
36 | "title": "阿森松岛所445",
37 | "number": 223
38 | }
39 | }
40 |
41 | ###
42 |
43 | POST localhost:5678/api/index
44 | Content-Type: application/json
45 |
46 | {
47 | "id": 4,
48 | "text": "哪个城市人是最多的",
49 | "document": {
50 | "title": "阿森松岛所445",
51 | "number": 223
52 | }
53 | }
54 |
55 | ###
56 |
57 | POST localhost:5678/api/index
58 | Content-Type: application/json
59 |
60 | {
61 | "id": 5,
62 | "text": "上海的景点哪里好玩",
63 | "document": {
64 | "title": "阿森松岛所445",
65 | "number": 223
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/tests/http/index2.http:
--------------------------------------------------------------------------------
1 | POST localhost:5678/api/index/batch
2 | Content-Type: application/json
3 |
4 | [
5 | {
6 | "id": 4194561,
7 | "text": "人一生必须培养的好习惯有哪些?",
8 | "document": {
9 | "category": "news_culture",
10 | "cid": "101",
11 | "id": "6422885744473276673",
12 | "title": "人一生必须培养的好习惯有哪些?"
13 | }
14 | },
15 | {
16 | "id": 29360647,
17 | "text": "去茶店喝茶,这些坏习惯一定要改",
18 | "document": {
19 | "category": "news_culture",
20 | "cid": "101",
21 | "id": "6526026244490789383",
22 | "title": "去茶店喝茶,这些坏习惯一定要改"
23 | }
24 | },
25 | {
26 | "id": 482345479,
27 | "text": "终于等到你 大连一方迎中超首胜",
28 | "document": {
29 | "category": "news_sports",
30 | "cid": "103",
31 | "id": "6552361064552464903",
32 | "title": "终于等到你 大连一方迎中超首胜"
33 | }
34 | },
35 | {
36 | "id": 2273313284,
37 | "text": "范丞丞人气有多高?付费自拍睡一觉起来就躺赚了480万",
38 | "document": {
39 | "category": "news_entertainment",
40 | "cid": "102",
41 | "id": "6552353988237328900",
42 | "title": "范丞丞人气有多高?付费自拍睡一觉起来就躺赚了480万"
43 | }
44 | },
45 | {
46 | "id": 2688549390,
47 | "text": "比特币交易平台官网",
48 | "document": {
49 | "category": "news_finance",
50 | "cid": "104",
51 | "id": "6552399416521654798",
52 | "title": "比特币交易平台官网"
53 | }
54 | },
55 | {
56 | "id": 3162505486,
57 | "text": "有哪些优秀的阅读习惯和技巧?",
58 | "document": {
59 | "category": "news_culture",
60 | "cid": "101",
61 | "id": "6552470670503051534",
62 | "title": "有哪些优秀的阅读习惯和技巧?"
63 | }
64 | },
65 | {
66 | "id": 3602907656,
67 | "text": "大学英语四级考试500个高频词汇",
68 | "document": {
69 | "category": "news_edu",
70 | "cid": "108",
71 | "id": "6552332699414037000",
72 | "title": "大学英语四级考试500个高频词汇"
73 | }
74 | },
75 | {
76 | "id": 3921674756,
77 | "text": "农村深山里的这种植物,看似其貌不扬,它竟还有这样的功效!",
78 | "document": {
79 | "category": "news_agriculture",
80 | "cid": "115",
81 | "id": "6552380653042663940",
82 | "title": "农村深山里的这种植物,看似其貌不扬,它竟还有这样的功效!"
83 | }
84 | },
85 | {
86 | "id": 4034920967,
87 | "text": "北大百廿燕归来",
88 | "document": {
89 | "category": "news_edu",
90 | "cid": "108",
91 | "id": "6552368906420355591",
92 | "title": "北大百廿燕归来"
93 | }
94 | }
95 | ]
96 |
97 |
--------------------------------------------------------------------------------
/tests/http/query.http:
--------------------------------------------------------------------------------
1 | POST http://localhost:5678/api/query
2 | Content-Type: application/json
3 |
4 | {
5 | "query":"手机",
6 | "page":1,
7 | "limit":10,
8 | "order": "desc"
9 | }
10 |
11 | ###
12 |
--------------------------------------------------------------------------------
/tests/http/status.http:
--------------------------------------------------------------------------------
1 | GET http://localhost:5678/api/status
2 | Accept: application/json
3 |
4 | ###
5 |
--------------------------------------------------------------------------------
/tests/index_test.go:
--------------------------------------------------------------------------------
1 | package tests
2 |
3 | import (
4 | "bufio"
5 | "fmt"
6 | "github.com/sea-team/gofound/searcher"
7 | "github.com/sea-team/gofound/searcher/model"
8 | "github.com/sea-team/gofound/searcher/utils"
9 | "github.com/sea-team/gofound/searcher/words"
10 | "os"
11 | "strings"
12 | "testing"
13 | )
14 |
15 | func TestIndex(t *testing.T) {
16 |
17 | tokenizer := words.NewTokenizer("../searcher/words/data/dictionary.txt")
18 |
19 | var engine = &searcher.Engine{
20 | IndexPath: "./index/db2",
21 | Tokenizer: tokenizer,
22 | }
23 | option := engine.GetOptions()
24 |
25 | engine.InitOption(option)
26 |
27 | f, err := os.Open("index/toutiao_cat_data.txt")
28 | if err != nil {
29 | t.Errorf("open file: %v", err)
30 | }
31 |
32 | id := uint32(0)
33 | rd := bufio.NewReader(f)
34 | index := 0
35 | for {
36 | line, isPrefix, err := rd.ReadLine()
37 | if err != nil {
38 | return
39 | }
40 | if isPrefix {
41 | t.Errorf("A long line has been cut, %s", line)
42 | }
43 |
44 | if len(line) == 0 {
45 | break
46 | }
47 |
48 | lineString := string(line)
49 | //fmt.Println(lineString)
50 | array := strings.Split(lineString, "_!_")
51 | if index%1000 == 0 {
52 | fmt.Println(index)
53 | }
54 | index++
55 | //if index == 6000 {
56 | // break
57 | //}
58 | data := make(map[string]interface{})
59 | id++
60 |
61 | data["id"] = id
62 | data["title"] = array[3]
63 | data["category"] = array[2]
64 | data["cid"] = array[1]
65 |
66 | doc := model.IndexDoc{
67 | Id: id,
68 | Text: array[3],
69 | Document: data,
70 | }
71 | engine.IndexDocument(&doc)
72 | }
73 | for engine.GetQueue() > 0 {
74 | }
75 | fmt.Println("index finish")
76 | }
77 |
78 | func TestRepeat(t *testing.T) {
79 | //判断是否重复
80 |
81 | tokenizer := words.NewTokenizer("../searcher/words/data/dictionary.txt")
82 | var engine = &searcher.Engine{
83 | IndexPath: "./index",
84 | Tokenizer: tokenizer,
85 | }
86 | option := engine.GetOptions()
87 |
88 | engine.InitOption(option)
89 |
90 | f, err := os.Open("index/toutiao_cat_data.txt")
91 | if err != nil {
92 | t.Errorf("open file: %v", err)
93 | }
94 |
95 | container := make(map[uint32][]string)
96 |
97 | rd := bufio.NewReader(f)
98 | index := 0
99 | for {
100 |
101 | line, _, err := rd.ReadLine()
102 | if err != nil {
103 | break
104 | }
105 |
106 | lineString := string(line)
107 | array := strings.Split(lineString, "_!_")
108 | if index%10000 == 0 {
109 | fmt.Println(index)
110 | }
111 | index++
112 |
113 | data := struct {
114 | Id string
115 | Title string
116 | Category string
117 | Cid string
118 | }{
119 | Id: array[0],
120 | Title: array[3],
121 | Category: array[2],
122 | Cid: array[1],
123 | }
124 |
125 | //分词
126 | words := engine.Tokenizer.Cut(data.Title)
127 | for _, word := range words {
128 | key := Murmur3([]byte(word))
129 | val := container[key]
130 | if val == nil {
131 | val = make([]string, 0)
132 | }
133 | if !exists(val, word) {
134 | val = append(val, word)
135 | }
136 | container[key] = val
137 | }
138 | }
139 |
140 | //输出 value大于2的key
141 | for key, val := range container {
142 | if len(val) > 1 {
143 | fmt.Println("key:", key, "value:", val)
144 | }
145 | }
146 |
147 | fmt.Println("index finish")
148 |
149 | }
150 |
151 | func exists(values []string, value string) bool {
152 | for _, v := range values {
153 | if v == value {
154 | return true
155 | }
156 | }
157 | return false
158 |
159 | }
160 |
161 | func TestStringToInt(t *testing.T) {
162 | /*
163 | key: 3756240089 value: [现场版 58.6]
164 | key: 2832448212 value: [树下 初展]
165 | */
166 |
167 | fmt.Println(utils.StringToInt("现场版"))
168 | fmt.Println(utils.StringToInt("58.6"))
169 |
170 | fmt.Println(utils.StringToInt("树下"))
171 | fmt.Println(utils.StringToInt("初展"))
172 | }
173 |
174 | const (
175 | c1 = 0xcc9e2d51
176 | c2 = 0x1b873593
177 | c3 = 0x85ebca6b
178 | c4 = 0xc2b2ae35
179 | r1 = 15
180 | r2 = 13
181 | m = 5
182 | n = 0xe6546b64
183 | )
184 |
185 | var (
186 | Seed = uint32(1)
187 | )
188 |
189 | func Murmur3(key []byte) (hash uint32) {
190 | hash = Seed
191 | iByte := 0
192 | for ; iByte+4 <= len(key); iByte += 4 {
193 | k := uint32(key[iByte]) | uint32(key[iByte+1])<<8 | uint32(key[iByte+2])<<16 | uint32(key[iByte+3])<<24
194 | k *= c1
195 | k = (k << r1) | (k >> (32 - r1))
196 | k *= c2
197 | hash ^= k
198 | hash = (hash << r2) | (hash >> (32 - r2))
199 | hash = hash*m + n
200 | }
201 |
202 | var remainingBytes uint32
203 | switch len(key) - iByte {
204 | case 3:
205 | remainingBytes += uint32(key[iByte+2]) << 16
206 | fallthrough
207 | case 2:
208 | remainingBytes += uint32(key[iByte+1]) << 8
209 | fallthrough
210 | case 1:
211 | remainingBytes += uint32(key[iByte])
212 | remainingBytes *= c1
213 | remainingBytes = (remainingBytes << r1) | (remainingBytes >> (32 - r1))
214 | remainingBytes = remainingBytes * c2
215 | hash ^= remainingBytes
216 | }
217 |
218 | hash ^= uint32(len(key))
219 | hash ^= hash >> 16
220 | hash *= c3
221 | hash ^= hash >> 13
222 | hash *= c4
223 | hash ^= hash >> 16
224 |
225 | // 出发吧,狗嬷嬷!
226 | return
227 | }
228 |
--------------------------------------------------------------------------------
/tests/merge_test.go:
--------------------------------------------------------------------------------
1 | package tests
2 |
3 | import "testing"
4 |
5 | func merge(array *[]int, val int) {
6 |
7 | }
8 |
9 | func TestName(t *testing.T) {
10 | merge(nil, 0)
11 | }
12 |
--------------------------------------------------------------------------------
/tests/sort.md:
--------------------------------------------------------------------------------
1 | ```
2 |
3 | if(low=high
22 | while(iarr[j])//后端比temp小,符合降序,不管它,low下标前移
23 | j--;//while完后指比temp大的那个
24 | if(i data[j+1] {
72 | data[j], data[j+1] = data[j+1], data[j]
73 | }
74 | }
75 | }
76 | }
77 |
78 | func SelectSort(arr []int) {
79 | for j := 0; j < len(arr)-1; j++ {
80 | max := arr[j]
81 | maxIndex := j
82 | for i := j + 1; i < len(arr); i++ {
83 | if max < arr[i] {
84 | //记录
85 | max = arr[i]
86 | maxIndex = i
87 | }
88 | }
89 | //交换
90 | if maxIndex != j {
91 | arr[j], arr[maxIndex] = arr[maxIndex], arr[j]
92 | }
93 | //fmt.Printf("数据第 %v 次交换后为:\t%v\n", j+1, arr)
94 | }
95 | }
96 |
97 | // 快速排序
98 | func QuickSort(arr []int, start, end int) {
99 | if start < end {
100 | i, j := start, end
101 | key := arr[(start+end)/2]
102 | for i <= j {
103 | for arr[i] < key {
104 | i++
105 | }
106 | for arr[j] > key {
107 | j--
108 | }
109 | if i <= j {
110 | arr[i], arr[j] = arr[j], arr[i]
111 | i++
112 | j--
113 | }
114 | }
115 |
116 | if start < j {
117 | QuickSort(arr, start, j)
118 | }
119 | if end > i {
120 | QuickSort(arr, i, end)
121 | }
122 | }
123 | }
124 |
125 | func InsertSort(list []int) {
126 | n := len(list)
127 | // 进行 N-1 轮迭代
128 | for i := 1; i <= n-1; i++ {
129 | deal := list[i] // 待排序的数
130 | j := i - 1 // 待排序的数左边的第一个数的位置
131 |
132 | // 如果第一次比较,比左边的已排好序的第一个数小,那么进入处理
133 | if deal < list[j] {
134 | // 一直往左边找,比待排序大的数都往后挪,腾空位给待排序插入
135 | for ; j >= 0 && deal < list[j]; j-- {
136 | list[j+1] = list[j] // 某数后移,给待排序留空位
137 | }
138 | list[j+1] = deal // 结束了,待排序的数插入空位
139 | }
140 | }
141 | }
142 |
143 | func TestFastSort(t *testing.T) {
144 |
145 | //QuickSortDesc
146 | //测试数据
147 | var data []int
148 |
149 | for i := 0; i < 1000; i++ {
150 | //随机数
151 | data = append(data, rand.Intn(100))
152 |
153 | }
154 |
155 | _time := utils.ExecTime(func() {
156 | //utils.QuickSortDesc(data, 0, len(data)-1, func(i int, j int) {
157 |
158 | //})
159 | //sort.Ints(data)
160 | sort.Sort(sort.Reverse(sort.IntSlice(data)))
161 | //sort.Reverse(data)
162 | })
163 | fmt.Println("时间", _time)
164 | fmt.Println(data)
165 |
166 | }
167 |
168 | // 获取数组最大值
169 | func getMaxInArr(arr []int) int {
170 | max := arr[0]
171 | for i := 1; i < len(arr); i++ {
172 | if arr[i] > max {
173 | max = arr[i]
174 | }
175 | }
176 | return max
177 | }
178 | func sortInBucket(bucket []int) { //此处实现插入排序方式,其实可以用任意其他排序方式
179 | length := len(bucket)
180 | if length == 1 {
181 | return
182 | }
183 | for i := 1; i < length; i++ {
184 | backup := bucket[i]
185 | j := i - 1
186 | //将选出的被排数比较后插入左边有序区
187 | for j >= 0 && backup < bucket[j] { //注意j >= 0必须在前边,否则会数组越界
188 | bucket[j+1] = bucket[j] //移动有序数组
189 | j-- //反向移动下标
190 | }
191 | bucket[j+1] = backup //插队插入移动后的空位
192 | }
193 | }
194 |
195 | // 桶排序
196 | func BucketSort(arr []int) []int {
197 | //桶数
198 | num := len(arr)
199 | //k(数组最大值)
200 | max := getMaxInArr(arr)
201 | //二维切片
202 | buckets := make([][]int, num)
203 | //分配入桶
204 | index := 0
205 | for i := 0; i < num; i++ {
206 | index = arr[i] * (num - 1) / max //分配桶index = value * (n-1) /k
207 | buckets[index] = append(buckets[index], arr[i])
208 | }
209 | //桶内排序
210 | tmpPos := 0
211 | for i := 0; i < num; i++ {
212 | bucketLen := len(buckets[i])
213 | if bucketLen > 0 {
214 | sortInBucket(buckets[i])
215 | copy(arr[tmpPos:], buckets[i])
216 | tmpPos += bucketLen
217 | }
218 | }
219 | return arr
220 | }
221 |
222 | func TestFind(t *testing.T) {
223 |
224 | data := make([]int, 0)
225 | data2 := make([]int, 0)
226 | for i := 0; i < 100000; i++ {
227 | val := rand.Intn(100000)
228 | data = append(data, val)
229 | data2 = append(data2, val)
230 | }
231 |
232 | t1 := utils.ExecTime(func() {
233 | sort.Sort(sort.IntSlice(data))
234 | })
235 | fmt.Println("快排用时", t1)
236 |
237 | //fmt.Println(find(data, 1))
238 | t2 := utils.ExecTime(func() {
239 | BucketSort(data2)
240 | for i, j := 0, len(data2)-1; i < j; i, j = i+1, j-1 {
241 | data2[i], data2[j] = data2[j], data2[i]
242 | }
243 | })
244 | fmt.Println("捅排", t2)
245 | //fmt.Println("捅排", sort.Reverse(sort.IntSlice(data2)))
246 |
247 | //查找优化,桶排序+map去重
248 |
249 | }
250 | func find(data []uint32, target uint32) (bool, int) {
251 | low := 0
252 | high := len(data) - 1
253 | for low <= high {
254 | mid := (low + high) / 2
255 | if data[mid] == target {
256 | return true, mid
257 | } else if data[mid] < target {
258 | high = mid - 1
259 | } else {
260 | low = mid + 1
261 | }
262 | }
263 | return false, -1
264 | }
265 | func TestMerge(t *testing.T) {
266 |
267 | data1 := make([]uint32, 0)
268 | data2 := make([]uint32, 0)
269 | for i := 0; i < 10000; i++ {
270 | v := rand.Intn(10)
271 | data1 = append(data1, uint32(v))
272 | data2 = append(data2, uint32(v))
273 | }
274 |
275 | t1 := utils.ExecTime(func() {
276 | temp := make([]uint32, 0)
277 | for _, v := range data1 {
278 | if found, _ := find(temp, v); found {
279 | temp = append(temp, v)
280 | }
281 | }
282 | fmt.Println(temp)
283 | })
284 |
285 | fmt.Println("二分法去重", t1)
286 |
287 | t2 := utils.ExecTime(func() {
288 | temp := make(map[uint32]bool, len(data2))
289 | d := make([]uint32, 0)
290 | for _, val := range data2 {
291 | if _, ok := temp[val]; !ok {
292 | temp[val] = true
293 | d = append(d, val)
294 | }
295 | }
296 | fmt.Println(d)
297 | })
298 | fmt.Println("map去重", t2)
299 | }
300 |
--------------------------------------------------------------------------------
/tests/thread_test.go:
--------------------------------------------------------------------------------
1 | package tests
2 |
3 | import (
4 | "fmt"
5 | "sync"
6 | "testing"
7 | "time"
8 | )
9 |
10 | type ThreadTest struct {
11 | sync.Mutex
12 | }
13 |
14 | var wg sync.WaitGroup
15 |
16 | func (t *ThreadTest) Test(name int) {
17 | defer t.Unlock()
18 | t.Lock()
19 | time.Sleep(time.Second * 1)
20 | fmt.Println("我是线程", name, "执行结束")
21 | wg.Done()
22 | }
23 |
24 | func TestThread(t *testing.T) {
25 |
26 | //sync.Mutex
27 | test := new(ThreadTest)
28 | for i := 0; i < 10; i++ {
29 | wg.Add(1)
30 | go test.Test(i)
31 | }
32 | wg.Wait()
33 | fmt.Println("完成了")
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/tests/time_test.go:
--------------------------------------------------------------------------------
1 | package tests
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 | "time"
7 | )
8 |
9 | func TestExecTime(t *testing.T) {
10 | startT := time.Now()
11 | time.Sleep(time.Millisecond * 10)
12 |
13 | tc := time.Since(startT)
14 | fmt.Println(tc)
15 | }
16 |
--------------------------------------------------------------------------------
/tests/word_test.go:
--------------------------------------------------------------------------------
1 | package tests
2 |
3 | import (
4 | "fmt"
5 | "github.com/wangbin/jiebago"
6 | "strings"
7 | "testing"
8 | )
9 |
10 | func TestWord(t *testing.T) {
11 | var seg jiebago.Segmenter
12 |
13 | seg.LoadDictionary("/Users/panjing/GolandProjects/github.com/sea-team/gofound/data/dictionary.txt")
14 | r := seg.CutForSearch("想在西安买房投资,哪个区域比较好,最好有具体楼盘?", true)
15 | words := make([]string, 0)
16 | for {
17 | w, ok := <-r
18 | if !ok {
19 | break
20 | }
21 | words = append(words, w)
22 | }
23 | for _, w := range words {
24 | f := int(seg.SuggestFrequency(w))
25 | if len([]rune(w)) <= 1 {
26 | f = 0
27 | } else {
28 | f = f % len(words)
29 | }
30 |
31 | fmt.Printf("%s\t%d\n", w, f)
32 | }
33 | }
34 | func contains(s []string, e string, skipIndex int) bool {
35 | for index, a := range s {
36 | if index != skipIndex && strings.Contains(a, e) {
37 | return true
38 | }
39 | }
40 | return false
41 | }
42 | func getLongWords(words []string) []string {
43 |
44 | var newWords = make([]string, 0)
45 | for index, w := range words {
46 | if !contains(words, w, index) {
47 | newWords = append(newWords, w)
48 | }
49 | }
50 | return newWords
51 | }
52 |
53 | func TestLongWord(t *testing.T) {
54 | words := []string{"博物", "博物馆", "深圳北", "深圳", "深圳东"}
55 | r := getLongWords(words)
56 | fmt.Println(r)
57 | }
58 |
59 | func BenchmarkTest(b *testing.B) {
60 | var r []string
61 | for i := 0; i < b.N; i++ {
62 | words := []string{"博物", "博物馆", "深圳北", "深圳", "深圳东"}
63 | r = getLongWords(words)
64 | }
65 | fmt.Println(r)
66 | }
67 |
--------------------------------------------------------------------------------
/web/admin/admin.go:
--------------------------------------------------------------------------------
1 | package admin
2 |
3 | import (
4 | "github.com/gin-gonic/gin"
5 | "github.com/sea-team/gofound/web/admin/assets"
6 | "net/http"
7 | "net/url"
8 | "os"
9 | )
10 |
11 | func adminIndex(ctx *gin.Context) {
12 | file, err := assets.Static.ReadFile("web/dist/index.html")
13 | if err != nil && os.IsNotExist(err) {
14 | ctx.String(http.StatusNotFound, "not found")
15 | return
16 | }
17 | ctx.Data(http.StatusOK, "text/html", file)
18 | }
19 |
20 | func handlerStatic(c *gin.Context) {
21 | staticServer := http.FileServer(http.FS(assets.Static))
22 | c.Request.URL = &url.URL{Path: "web/dist" + c.Request.RequestURI}
23 | staticServer.ServeHTTP(c.Writer, c.Request)
24 | }
25 |
26 | func Register(router *gin.Engine, handlers ...gin.HandlerFunc) {
27 | //注册路由
28 | r := router.Group("/admin", handlers...)
29 | r.GET("/", adminIndex)
30 | router.GET("/assets/*filepath", handlerStatic)
31 | }
32 |
--------------------------------------------------------------------------------
/web/admin/assets/assets.go:
--------------------------------------------------------------------------------
1 | package assets
2 |
3 | import "embed"
4 |
5 | var (
6 | //go:embed web/dist/*
7 | Static embed.FS
8 | )
9 |
--------------------------------------------------------------------------------
/web/admin/assets/web/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | pnpm-debug.log*
8 | lerna-debug.log*
9 |
10 | node_modules
11 | dist-ssr
12 | *.local
13 |
14 | # Editor directories and files
15 | .vscode/*
16 | !.vscode/extensions.json
17 | .idea
18 | .DS_Store
19 | *.suo
20 | *.ntvs*
21 | *.njsproj
22 | *.sln
23 | *.sw?
24 |
--------------------------------------------------------------------------------
/web/admin/assets/web/README.md:
--------------------------------------------------------------------------------
1 | # Vue 3 + Vite
2 |
3 | This template should help get you started developing with Vue 3 in Vite. The template uses Vue 3 `
10 |
11 |
12 |
13 |
53 |
54 |
55 |
56 |