├── .github └── workflows │ └── go.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── config.yaml ├── core ├── initialize.go └── parser.go ├── docs ├── TODO.md ├── api.md ├── compile.md ├── config.md ├── example.md ├── images │ ├── img1.png │ ├── img2.png │ └── index.png ├── index.md ├── release.md ├── storage.md └── test.md ├── global ├── config.go └── global.go ├── go.mod ├── go.sum ├── gofound.d ├── gofound.sh ├── main.go ├── sdk ├── SDK 设计指南.md ├── base.go ├── client.go ├── database.go ├── index.go └── word.go ├── searcher ├── arrays │ └── arrays.go ├── container.go ├── container_test.go ├── engine.go ├── model │ ├── doc.go │ ├── item.go │ └── search.go ├── pagination │ ├── page_test.go │ └── pagination.go ├── sorts │ ├── fast.go │ └── sort.go ├── storage │ ├── leveldb_storage.go │ └── leveldb_test.go ├── system │ ├── cpu.go │ ├── cpu_test.go │ ├── disk.go │ ├── disk_test.go │ ├── mem.go │ ├── mem_test.go │ └── utils.go ├── utils │ └── utils.go └── words │ ├── data │ └── dictionary.txt │ └── tokenizer.go ├── tests ├── array_test.go ├── benchmark │ ├── array_test.go │ ├── skiplist_test.go │ └── utils.go ├── chan_test.go ├── func_test.go ├── http │ ├── cut.http │ ├── dump.http │ ├── index.http │ ├── index2.http │ ├── query.http │ └── status.http ├── index_test.go ├── merge_test.go ├── sort.md ├── sort_test.go ├── thread_test.go ├── time_test.go └── word_test.go └── web ├── admin ├── admin.go └── assets │ ├── assets.go │ └── web │ ├── .gitignore │ ├── README.md │ ├── dist │ ├── assets │ │ ├── dashboard.05738d5c.js │ │ ├── dashboard.3b82ec2b.css │ │ ├── document.03ee9c1a.js │ │ ├── document.ad0de346.css │ │ ├── index.47e321f4.js │ │ ├── index.c3f99036.css │ │ ├── status.2e4cf3b3.css │ │ └── status.3af7fc63.js │ ├── favicon.ico │ └── index.html │ ├── index.html │ ├── package-lock.json │ ├── package.json │ ├── public │ └── favicon.ico │ ├── src │ ├── App.vue │ ├── api.js │ ├── assets │ │ └── logo.png │ ├── components │ │ ├── CPU.vue │ │ ├── Disk.vue │ │ ├── GC.vue │ │ ├── Icon.vue │ │ ├── IndexDialog.vue │ │ ├── Memory.vue │ │ ├── Menu.vue │ │ ├── ProgressChat.vue │ │ └── Runtime.vue │ ├── main.js │ ├── menus.js │ ├── router.js │ └── views │ │ ├── dashboard.vue │ │ ├── document.vue │ │ └── status.vue │ ├── vite.config.js │ └── yarn.lock ├── controller ├── base.go ├── database.go ├── index.go ├── response.go ├── services.go └── word.go ├── middleware ├── cors.go └── exception.go ├── result.go ├── router ├── base.go ├── database.go ├── index.go ├── router.go └── word.go └── service ├── base.go ├── database.go ├── index.go ├── service.go └── word.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | - uses: actions/setup-node@v3 14 | with: 15 | node-version: 14 16 | - run: npm install --prefix=./web/admin/assets/web --force 17 | - run: npm run build --prefix=./web/admin/assets/web --force 18 | 19 | - name: Set up Go 20 | uses: actions/setup-go@v3 21 | with: 22 | go-version: 1.18 23 | 24 | - name: Install dependencies 25 | run: go get -v -t -d ./... 26 | 27 | - name: Build-Macos 28 | run: GOOS=darwin GOARCH=amd64 go build -o ./dist/gofound_macos_amd64 29 | 30 | - name: Build-Macos-Arm64 31 | run: GOOS=darwin GOARCH=arm64 go build -o ./dist/gofound_macos_apple_silicon 32 | 33 | - name: Build-Windows-x64 34 | run: GOOS=windows GOARCH=amd64 go build -o ./dist/gofound_windows_amd64.exe 35 | 36 | - name: Build-Windows-AMR64 37 | run: GOOS=windows GOARCH=arm64 go build -o ./dist/gofound_windows_arm64.exe 38 | 39 | - name: Build-Linux-x64 40 | run: GOOS=linux GOARCH=amd64 go build -o ./dist/gofound_linux_amd64 41 | 42 | - name: Build-Linux-AMR64 43 | run: GOOS=linux GOARCH=arm64 go build -o ./dist/gofound_linux_arm64 44 | 45 | - uses: "marvinpinto/action-automatic-releases@latest" 46 | with: 47 | repo_token: "${{ secrets.GITHUB_TOKEN }}" 48 | prerelease: false 49 | files: | 50 | ./dist/* 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | gofound 3 | /gofound 4 | /tmp/ 5 | /index/ 6 | /.idea/ 7 | /*/*.bin 8 | /cache 9 | /tests/index 10 | /data -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.18 as builder 2 | 3 | ENV GO111MODULE=on \ 4 | GOPROXY=https://goproxy.io 5 | 6 | COPY . /app 7 | WORKDIR /app 8 | 9 | RUN go get && go build -ldflags="-s -w" -installsuffix cgo 10 | 11 | FROM debian:buster-slim 12 | 13 | ENV TZ=Asia/Shanghai \ 14 | LANG=C.UTF-8 \ 15 | APP_DIR=/usr/local/go_found 16 | 17 | COPY --from=builder /app/gofound ${APP_DIR}/gofound 18 | 19 | WORKDIR ${APP_DIR} 20 | 21 | RUN ln -snf /usr/share/zoneinfo/${TZ} /etc/localtime \ 22 | && echo ${TZ} > /etc/timezone \ 23 | && chmod +x gofound 24 | 25 | EXPOSE 5678 26 | 27 | CMD ["./gofound"] 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GoFound 2 | 3 | `GoFound` 一个golang实现的全文检索引擎,支持持久化和单机亿级数据毫秒级查找。 4 | 5 | 接口可以通过http调用。 6 | 7 | 详见 [API文档](./docs/api.md) 8 | 9 | ## 文档 10 | 11 | + [示例](./docs/example.md) 12 | + [API文档](./docs/api.md) 13 | + [索引原理](./docs/index.md) 14 | + [配置文档](./docs/config.md) 15 | + [持久化](./docs/storage.md) 16 | + [编译部署](./docs/compile.md) 17 | 18 | ## 在线体验 19 | 20 | > Simple社区使用的GoFound,可以直接模糊搜索相关帖子 21 | 22 | [在线体验](https://simpleui.72wo.com/search/simpleui) 23 | 24 | ## GoFound在线管理后台Demo 25 | [http://119.29.69.50:5678/admin](http://119.29.69.50:5678/admin) 26 | 27 | ![](./docs/images/img1.png) 28 | 29 | ![](./docs/images/img2.png) 30 | 31 | ## QQ交流群 32 | 33 | [556102631](https://qm.qq.com/cgi-bin/qm/qr?k=4OvO7bgRAhSLX0J2WXVbCWbY7hL7gMYd&jump_from=webapi) 34 | 35 | ## 二进制文件下载 36 | 37 | > 支持Windows、Linux、macOS、(amd64和arm64)和苹果M1 处理器 38 | 39 | [点击下载](https://github.com/newpanjing/gofound/releases) 40 | 41 | ## 技术栈 42 | 43 | + 二分法查找 44 | + 快速排序法 45 | + 倒排索引 46 | + 正排索引 47 | + 文件分片 48 | + golang-jieba分词 49 | + leveldb 50 | 51 | ### 为何要用golang实现一个全文检索引擎? 52 | 53 | + 正如其名,`GoFound`去探索全文检索的世界,一个小巧精悍的全文检索引擎,支持持久化和单机亿级数据毫秒级查找。 54 | 55 | + 传统的项目大多数会采用`ElasticSearch`来做全文检索,因为`ElasticSearch`够成熟,社区活跃、资料完善。缺点就是配置繁琐、基于JVM对内存消耗比较大。 56 | 57 | + 所以我们需要一个更高效的搜索引擎,而又不会消耗太多的内存。 以最低的内存达到全文检索的目的,相比`ElasticSearch`,`gofound`是原生编译,会减少系统资源的消耗。而且对外无任何依赖。 58 | 59 | ## 安装和启动 60 | 61 | > 下载好源码之后,进入到源码目录,执行下列两个命令 62 | > 63 | 64 | + 编译 65 | 66 | > 直接下载 [可执行文件](https://github.com/newpanjing/gofound/releases) 可以不用编译,省去这一步。 67 | 68 | ```shell 69 | go get && go build 70 | ``` 71 | 72 | + 启动 73 | 74 | ```shell 75 | ./gofound --addr=:8080 --data=./data 76 | ``` 77 | 78 | + docker部署 79 | 80 | ```shell 81 | docker build -t gofound . 82 | docker run -d --name gofound -p 5678:5678 -v /mnt/data/gofound:/usr/local/go_found/data gofound:latest 83 | ``` 84 | 85 | + 其他命令 86 | 参考 [配置文档](./docs/config.md) 87 | 88 | ## 多语言SDK 89 | 90 | > 使用gofound的多语言SDK,可以在不同语言中使用gofound。但是请注意,版本号与gofound需要一致。主版本和子版本号,修订版不一致不影响。 91 | 92 | [Java](https://github.com/newpanjing/gofound-java) 93 | 94 | [Python](https://github.com/newpanjing/gofound-python) 95 | 96 | [Node.js](https://github.com/newpanjing/gofound-nodejs) 97 | 98 | 其他语言的SDK,正在陆续完善中。也可以直接通过[API文档](./docs/api.md)用HTTP请求实现。 99 | 100 | ## 和ES比较 101 | 102 | | ES | GoFound | 103 | |-------------|-----------------------| 104 | | 支持持久化 | 支持持久化 | 105 | | 基于内存索引 | 基于磁盘+内存缓存 | 106 | | 需要安装JDK | 原生二进制,无外部依赖 | 107 | | 需要安装第三方分词插件 | 自带中文分词和词库 | 108 | | 默认没有可视化管理界面 | 自带可视化管理界面 | 109 | | 内存占用大 | 基于Golang原生可执行文件,内存非常小 | 110 | | 配置复杂 | 默认可以不加任何参数启动,并且提供少量配置 | 111 | 112 | 113 | ## 待办 114 | 115 | [TODO](docs/TODO.md) 116 | 117 | ## 使用GoFound的用户 118 | 119 | [Simple社区](https://simpleui.72wo.com)| [贝塔博客](https://www.88cto.com) | [Book360](https://www.book360.cn) 120 | 121 | [深圳市十二点科技有限公司](https://www.72wo.com)|[深圳市恒一博科技有限公司](http://www.hooebo.com) 122 | 123 | [西安易神网络信息系统服务有限公司](http://www.hansonvip.com/) 124 | 125 | [影视资源搜索](https://movie.ipip.icu)|[酷易物联](https://cooleiot.tech)|[French博客](https://hoime.cn/) 126 | 127 | [好咪二次元之家](http://hoime.space) 128 | 129 | ## 发布日志 130 | 131 | [发布日志](https://github.com/newpanjing/gofound/releases) 132 | 133 | ## 开发成员 134 | |姓名|联系方式|贡献部分| 135 | |---|---|---| 136 | |[newpanjing](https://github.com/newpanjing)|newpanjing@icloud.com|负责人、引擎、UI| 137 | |[XiaoK29](https://github.com/XiaoK29)|-|引擎、接口| 138 | |[nightzjp](https://github.com/nightzjp)|-|引擎| 139 | |[xiao luobei](https://github.com/liu-cn)|-|引擎| 140 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | #监听地址 2 | addr: 0.0.0.0:5678 3 | 4 | #数据目录 5 | data: ./data 6 | #词典目录 7 | dictionary: ./data/dictionary.txt 8 | 9 | #是否启用admin 10 | enableAdmin: true 11 | 12 | # 最大线程数 13 | gomaxprocs: 4 14 | 15 | # admin 用户名和密码 16 | auth: admin:123456 17 | 18 | # 接口是否开启压缩 19 | enableGzip: true 20 | 21 | # 数据库关闭超时时间 22 | timeout: 600 23 | 24 | # 分片数量 25 | shard: 10 26 | 27 | # 分片缓冲数量 28 | bufferNum: 1000 -------------------------------------------------------------------------------- /core/initialize.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "github.com/sea-team/gofound/global" 7 | "github.com/sea-team/gofound/searcher" 8 | "github.com/sea-team/gofound/searcher/words" 9 | "github.com/sea-team/gofound/web/controller" 10 | "github.com/sea-team/gofound/web/router" 11 | "log" 12 | "net/http" 13 | 14 | //_ "net/http/pprof" 15 | "os" 16 | "os/signal" 17 | 18 | //"runtime" 19 | "syscall" 20 | "time" 21 | ) 22 | 23 | // NewContainer 创建一个容器 24 | func NewContainer(tokenizer *words.Tokenizer) *searcher.Container { 25 | container := &searcher.Container{ 26 | Dir: global.CONFIG.Data, 27 | Debug: global.CONFIG.Debug, 28 | Tokenizer: tokenizer, 29 | Shard: global.CONFIG.Shard, 30 | Timeout: global.CONFIG.Timeout, 31 | BufferNum: global.CONFIG.BufferNum, 32 | } 33 | if err := container.Init(); err != nil { 34 | panic(err) 35 | } 36 | 37 | return container 38 | } 39 | 40 | func NewTokenizer(dictionaryPath string) *words.Tokenizer { 41 | return words.NewTokenizer(dictionaryPath) 42 | } 43 | 44 | // Initialize 初始化 45 | func Initialize() { 46 | 47 | //runtime.SetMutexProfileFraction(1) // 开启对锁调用的跟踪 48 | //runtime.SetBlockProfileRate(1) // 开启对阻塞操作的跟踪 49 | 50 | //go func() { http.ListenAndServe("0.0.0.0:6060", nil) }() 51 | 52 | global.CONFIG = Parser() 53 | 54 | if !global.CONFIG.Debug { 55 | log.SetOutput(os.Stdout) //将记录器的输出设置为os.Stdout 56 | } 57 | 58 | defer func() { 59 | 60 | if r := recover(); r != nil { 61 | fmt.Printf("panic: %s\n", r) 62 | } 63 | }() 64 | 65 | //初始化分词器 66 | tokenizer := NewTokenizer(global.CONFIG.Dictionary) 67 | global.Container = NewContainer(tokenizer) 68 | 69 | // 初始化业务逻辑 70 | controller.NewServices() 71 | 72 | // 注册路由 73 | r := router.SetupRouter() 74 | // 启动服务 75 | srv := &http.Server{ 76 | Addr: global.CONFIG.Addr, 77 | Handler: r, 78 | } 79 | go func() { 80 | // 开启一个goroutine启动服务 81 | if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { 82 | log.Println("listen:", err) 83 | } 84 | }() 85 | 86 | // 优雅关机 87 | quit := make(chan os.Signal, 1) 88 | signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM) 89 | <-quit 90 | log.Println("Shutdown Server ...") 91 | 92 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 93 | defer cancel() 94 | 95 | if err := srv.Shutdown(ctx); err != nil { 96 | log.Println("Server Shutdown:", err) 97 | } 98 | 99 | log.Println("Server exiting") 100 | } 101 | -------------------------------------------------------------------------------- /core/parser.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "github.com/sea-team/gofound/global" 7 | "os" 8 | "runtime" 9 | 10 | "gopkg.in/yaml.v2" 11 | ) 12 | 13 | // Parser 解析器 14 | func Parser() *global.Config { 15 | 16 | var addr = flag.String("addr", "127.0.0.1:5678", "设置监听地址和端口") 17 | //兼容windows 18 | dir := fmt.Sprintf(".%sdata", string(os.PathSeparator)) 19 | 20 | var dataDir = flag.String("data", dir, "设置数据存储目录") 21 | 22 | var debug = flag.Bool("debug", true, "设置是否开启调试模式") 23 | 24 | var dictionaryPath = flag.String("dictionary", "./data/dictionary.txt", "设置词典路径") 25 | 26 | var enableAdmin = flag.Bool("enableAdmin", true, "设置是否开启后台管理") 27 | 28 | var gomaxprocs = flag.Int("gomaxprocs", runtime.NumCPU()*2, "设置GOMAXPROCS") 29 | 30 | var auth = flag.String("auth", "", "开启认证,例如: admin:123456") 31 | 32 | var enableGzip = flag.Bool("enableGzip", true, "是否开启gzip压缩") 33 | var timeout = flag.Int64("timeout", 10*60, "数据库超时关闭时间(秒)") 34 | var bufferNum = flag.Int("bufferNum", 1000, "分片缓冲数量") 35 | 36 | var configPath = flag.String("config", "", "配置文件路径,配置此项其他参数忽略") 37 | 38 | flag.Parse() 39 | 40 | config := &global.Config{} 41 | 42 | if *configPath != "" { 43 | //解析配置文件 44 | //file, err := ioutil.ReadFile(*configPath) 45 | file, err := os.ReadFile(*configPath) //详情:https://github.com/golang/go/issues/42026 46 | if err != nil { 47 | panic(err) 48 | } 49 | err = yaml.Unmarshal(file, config) 50 | if err != nil { 51 | panic(err) 52 | } 53 | return config 54 | } 55 | config = &global.Config{ 56 | Addr: *addr, 57 | Data: *dataDir, 58 | Debug: *debug, 59 | Dictionary: *dictionaryPath, 60 | EnableAdmin: *enableAdmin, 61 | Gomaxprocs: *gomaxprocs, 62 | Auth: *auth, 63 | EnableGzip: *enableGzip, 64 | Timeout: *timeout, 65 | BufferNum: *bufferNum, 66 | } 67 | 68 | return config 69 | } 70 | -------------------------------------------------------------------------------- /docs/TODO.md: -------------------------------------------------------------------------------- 1 | # 待办 2 | 3 | + 增加多库 4 | + 增加配置 5 | + 增加Web控制台 6 | 7 | ta shuo d -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | # API 2 | 3 | `gofound`启动之后,会监听一个TCP端口,接收来自客户端的搜索请求。处理http请求部分使用`gin`框架。 4 | 5 | ## 多数据库支持 6 | 7 | 从1.1版本开始,我们支持了多数据库,API接口中通过get参数来指定数据库。 8 | 9 | 如果不指定,默认数据库为`default`。 10 | 11 | 如:`api/index?database=db1` 其他post参数不变 12 | 13 | 如果指定的数据库名没有存在,将会自动创建一个新的数据库。如果需要删除,直接删除改数据库目录,然后重启gofound即可。 14 | 15 | ## 增加/修改索引 16 | 17 | 需要在query参数中指定数据库名`database=default` 18 | 19 | | 接口地址 | /api/index | 20 | |------|------------------| 21 | | 请求方式 | POST | 22 | | 请求类型 | application/json | 23 | 24 | ### 请求 25 | 26 | | 字段 | 类型 | 必选 | 描述 | 27 | |----------|--------|-----|-----------------------------------| 28 | | id | uint32 | 是 | 文档的主键id,需要保持唯一性,如果id重复,将会覆盖直接的文档。 | 29 | | text | string | 是 | 需要索引的文本块 | 30 | | document | object | 是 | 附带的文档数据,json格式,搜索的时候原样返回 | 31 | 32 | query参数(params-data) 33 | 34 | | 字段 | 类型 | 必选 | 描述 | 35 | |----------|--------|-----|--------| 36 | | database | string | 是 | 指定数据库名 | 37 | 38 | + POST /api/index 39 | 40 | ```json 41 | { 42 | "id": 88888, 43 | "text": "深圳北站", 44 | "document": { 45 | "title": "阿森松岛所445", 46 | "number": 223 47 | } 48 | } 49 | ``` 50 | 51 | + 命令行 52 | 53 | ```bash 54 | curl -H "Content-Type:application/json" -X POST --data '{"id":88888,"text":"深圳北站","document":{"title":"阿森松岛所445","number":223}}' http://127.0.0.1:5678/api/index?database=default 55 | ``` 56 | 57 | ### 响应 58 | 59 | ```json 60 | { 61 | "state": true, 62 | "message": "success" 63 | } 64 | ``` 65 | 66 | ## 批量增加/修改索引 67 | 与添加单个索引一样,也需要在query参数中指定数据库名`database=default` 68 | 69 | | 接口地址 | /api/index/batch | 70 | |------|------------------| 71 | | 请求方式 | POST | 72 | | 请求类型 | application/json | 73 | 74 | 参数与单个一致,只是需要用数组包裹多个json对象,例如: 75 | 76 | ```json 77 | [ 78 | { 79 | "id": 88888, 80 | "text": "深圳北站", 81 | "document": { 82 | "title": "阿森松岛所445", 83 | "number": 223 84 | } 85 | }, 86 | { 87 | "id": 22222, 88 | "text": "北京东站", 89 | "document": { 90 | "title": "123123123", 91 | "number": 123123 92 | } 93 | } 94 | ] 95 | ``` 96 | 97 | ## 删除索引 98 | 与以上接口一样,也需要在query参数中指定数据库名`database=default` 99 | 100 | | 接口地址 | /api/index/remove | 101 | |------|-------------------| 102 | | 请求方式 | POST | 103 | | 请求类型 | application/json | 104 | 105 | ### 请求 106 | 107 | | 字段 | 类型 | 必选 | 描述 | 108 | |-----|--------|-----|---------| 109 | | id | uint32 | 是 | 文档的主键id | 110 | 111 | + POST /api/remove 112 | 113 | ```json 114 | { 115 | "id": 88888 116 | } 117 | ``` 118 | 119 | + 命令行 120 | 121 | ```bash 122 | curl -H "Content-Type:application/json" -X POST --data '{"id":88888}' http://127.0.0.1:5678/api/remove?database=default 123 | ``` 124 | 125 | ### 响应 126 | 127 | ```json 128 | { 129 | "state": true, 130 | "message": "success" 131 | } 132 | ``` 133 | 134 | ## 查询索引 135 | 136 | `GoFound`提供了一种查询方式,按照文本查询。与其他Nosql数据库不同,`GoFound`不支持按照文档的其他查询。 137 | 138 | | 接口地址 | /api/query | 139 | |------|------------------| 140 | | 请求方式 | POST | 141 | | 请求类型 | application/json | 142 | 143 | ### 请求 144 | 145 | | 字段 | 类型 | 必选 | 描述 | 146 | |-----------|--------|-----|----------------------------------------------------------------------------------------------| 147 | | query | string | 是 | 查询的关键词,都是or匹配 | 148 | | page | int | 否 | 页码,默认为1 | 149 | | limit | int | 否 | 返回的文档数量,默认为100,没有最大限制,最好不要超过1000,超过之后速度会比较慢,内存占用会比较多 | 150 | | order | string | 否 | 排序方式,取值`asc`和`desc`,默认为`desc`,按id排序,然后根据结果得分排序 | 151 | | highlight | object | 否 | 关键字高亮,相对text字段中的文本 | 152 | | scoreExp | string | 否 | 根据文档的字段计算分数,然后再进行排序,例如:score+[document.hot]*10,表达式中score为关键字的分数,document.hot为document中的hot字段 | 153 | 154 | 155 | query参数(params-data) 156 | 157 | | 字段 | 类型 | 必选 | 描述 | 158 | |----------|--------|-----|---------------------| 159 | | database | string | 否 | 指定数据库名,不填默认为default | 160 | 161 | 162 | ### highlight 163 | 164 | > 配置以后,符合条件的关键词将会被preTag和postTag包裹 165 | 166 | | 字段 | 描述 | 167 | |---------|-------| 168 | | preTag | 关键词前缀 | 169 | | postTag | 关键词后缀 | 170 | 171 | + 示例 172 | 173 | ```json 174 | { 175 | "query": "上海哪里好玩", 176 | "page": 1, 177 | "limit": 10, 178 | "order": "desc", 179 | "highlight": { 180 | "preTag": "", 181 | "postTag": "" 182 | } 183 | } 184 | ``` 185 | 186 | + POST /api/query 187 | 188 | ```json 189 | { 190 | "query": "深圳北站", 191 | "page": 1, 192 | "limit": 10, 193 | "order": "desc" 194 | } 195 | ``` 196 | 197 | + 命令行 198 | 199 | ```bash 200 | curl -H "Content-Type:application/json" -X POST --data '{"query":"深圳北站","page":1,"limit":10,"order":"desc"}' http://127.0.0.1:5678/api/query 201 | ``` 202 | 203 | ### 响应 204 | 205 | | 字段 | 类型 | 描述 | 206 | |-----------|---------|-------------------------| 207 | | time | float32 | 搜索文档用时 | 208 | | total | int | 符合条件的数量 | 209 | | pageCount | int | 页总数 | 210 | | page | int | 当前页码 | 211 | | limit | int | 每页数量 | 212 | | documents | array | 文档列表,[参考索引文档](#增加/修改索引) | 213 | 214 | ```json 215 | { 216 | "state": true, 217 | "message": "success", 218 | "data": { 219 | "time": 2.75375, 220 | "total": 13487, 221 | "pageCount": 1340, 222 | "page": 1, 223 | "limit": 10, 224 | "documents": [ 225 | { 226 | "id": 1675269553, 227 | "text": "【深圳消费卡/购物券转让/求购信息】- 深圳赶集网", 228 | "document": { 229 | "id": "8c68e948de7c7eb4362de15434a3ace7", 230 | "title": "【深圳消费卡/购物券转让/求购信息】- 深圳赶集网" 231 | }, 232 | "score": 3 233 | }, 234 | { 235 | "id": 88888, 236 | "text": "深圳北站", 237 | "document": { 238 | "number": 223, 239 | "title": "阿森松岛所445" 240 | }, 241 | "score": 2 242 | }, 243 | { 244 | "id": 212645608, 245 | "text": "【深圳美容美发卡转让/深圳美容美发卡求购信息】- 深圳赶集网", 246 | "document": { 247 | "id": "d3ce16b68a90833cbc20b8a49e93b9cd", 248 | "title": "【深圳美容美发卡转让/深圳美容美发卡求购信息】- 深圳赶集网" 249 | }, 250 | "score": 1.5 251 | }, 252 | { 253 | "id": 1191140208, 254 | "text": "【深圳赶集网】-免费发布信息-深圳分类信息门户", 255 | "document": { 256 | "id": "44be60a1d8b54c431e5511804062ae62", 257 | "title": "【深圳赶集网】-免费发布信息-深圳分类信息门户" 258 | }, 259 | "score": 1.5 260 | }, 261 | { 262 | "id": 4133884907, 263 | "text": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网", 264 | "document": { 265 | "id": "f25bb8136e8c2b02e3fcd65627a9ddbc", 266 | "title": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网" 267 | }, 268 | "score": 1 269 | }, 270 | { 271 | "id": 206909132, 272 | "text": "【沙嘴门票/电影票转让/求购信息】- 深圳赶集网", 273 | "document": { 274 | "id": "63ca3ea4ffd254454e738a0957efedc2", 275 | "title": "【沙嘴门票/电影票转让/求购信息】- 深圳赶集网" 276 | }, 277 | "score": 1 278 | }, 279 | { 280 | "id": 220071473, 281 | "text": "【深圳健身卡转让/深圳健身卡求购信息】- 深圳赶集网", 282 | "document": { 283 | "id": "72d3d650c8a8a4e73b89b406f6dc76ef", 284 | "title": "【深圳健身卡转让/深圳健身卡求购信息】- 深圳赶集网" 285 | }, 286 | "score": 1 287 | }, 288 | { 289 | "id": 461974720, 290 | "text": "铁路_论坛_深圳热线", 291 | "document": { 292 | "id": "73c96ac2c23bc0cb4fb12ce7660c8b35", 293 | "title": "铁路_论坛_深圳热线" 294 | }, 295 | "score": 1 296 | }, 297 | { 298 | "id": 490922879, 299 | "text": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网", 300 | "document": { 301 | "id": "93be0f35c484ddcd8c83602e27535d96", 302 | "title": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网" 303 | }, 304 | "score": 1 305 | }, 306 | { 307 | "id": 525810194, 308 | "text": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网", 309 | "document": { 310 | "id": "e489dd19dce0de2c9f4e59c969ec9ec0", 311 | "title": "【深圳购物卡转让/深圳购物卡求购信息】- 深圳赶集网" 312 | }, 313 | "score": 1 314 | } 315 | ], 316 | "words": [ 317 | "深圳", 318 | "北站" 319 | ] 320 | } 321 | } 322 | ``` 323 | 324 | ## 查询状态 325 | 326 | | 接口地址 | /api/status | 327 | |------|------------------| 328 | | 请求方式 | GET | 329 | 330 | ### 请求 331 | 332 | ```bash 333 | curl http://127.0.0.1:5678/api/status 334 | ``` 335 | 336 | ### 响应 337 | 338 | ```json 339 | { 340 | "state": true, 341 | "message": "success", 342 | "data": { 343 | "index": { 344 | "queue": 0, 345 | "shard": 10, 346 | "size": 531971 347 | }, 348 | "memory": { 349 | "alloc": 1824664656, 350 | "heap": 1824664656, 351 | "heap_idle": 10008625152, 352 | "heap_inuse": 2100068352, 353 | "heap_objects": 3188213, 354 | "heap_released": 9252003840, 355 | "heap_sys": 12108693504, 356 | "sys": 12700504512, 357 | "total": 11225144273040 358 | }, 359 | "status": "ok", 360 | "system": { 361 | "arch": "arm64", 362 | "cores": 10, 363 | "os": "darwin", 364 | "version": "go1.18" 365 | } 366 | } 367 | } 368 | ``` 369 | 370 | ## 删除数据库 371 | 372 | | 接口地址 | /api/db/drop | 373 | |------|--------------| 374 | | 请求方式 | GET | 375 | 376 | ### 请求 377 | 378 | ```bash 379 | curl http://127.0.0.1:5678/api/drop?database=db_name 380 | ``` 381 | 382 | ### 响应 383 | 384 | ```json 385 | { 386 | "state": true, 387 | "message": "success" 388 | } 389 | ``` 390 | 391 | ## 在线分词 392 | 393 | | 接口地址 | /api/word/cut | 394 | |------|-----------------| 395 | | 请求方式 | GET | 396 | 397 | ### 请求参数 398 | 399 | | 字段 | 类型 | 必选 | 描述 | 400 | |-----|--------|-----|-----| 401 | | q | string | 关键词 | 402 | 403 | ### 请求 404 | 405 | ```bash 406 | curl http://127.0.0.1:5678/api/word/cut?q=上海和深圳哪个城市幸福指数高 407 | ``` 408 | 409 | ### 响应 410 | 411 | ```json 412 | { 413 | "state": true, 414 | "message": "success", 415 | "data": [ 416 | "上海", 417 | "深圳", 418 | "哪个", 419 | "城市", 420 | "幸福", 421 | "指数" 422 | ] 423 | } 424 | ``` -------------------------------------------------------------------------------- /docs/compile.md: -------------------------------------------------------------------------------- 1 | # 编译 2 | 3 | `gofound` 基于`golang-1.18`,编译之前需要安装对于的golang版本。 4 | 5 | 推荐使用编译好的[二进制文件](https://github.com/newpanjing/github.com/sea-team/gofound/releases) 6 | 7 | ## Admin 8 | > 如果需要Admin部分,请先构建admin,admin基于vue+element-ui+vite,而这些也需要安装nodejs 9 | 10 | 构建命令: 11 | 12 | ```shell 13 | cd ./web/admin/assets/web/ 14 | 15 | npm install 16 | 17 | npm run build 18 | ``` 19 | 20 | 完成以上步骤之后,才能使用admin 21 | 22 | ## 编译 23 | 24 | ```shell 25 | go get 26 | go build -o gofound 27 | ``` 28 | 29 | ## 依赖 30 | 31 | ```shell 32 | go 1.18 33 | 34 | require ( 35 | github.com/emirpasic/gods v1.12.0 36 | github.com/gin-gonic/gin v1.7.7 37 | github.com/yanyiwu/gojieba v1.1.2 38 | ) 39 | 40 | require ( 41 | github.com/gin-contrib/sse v0.1.0 // indirect 42 | github.com/go-playground/locales v0.13.0 // indirect 43 | github.com/go-playground/universal-translator v0.17.0 // indirect 44 | github.com/go-playground/validator/v10 v10.4.1 // indirect 45 | github.com/golang/protobuf v1.3.3 // indirect 46 | github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db // indirect 47 | github.com/json-iterator/go v1.1.9 // indirect 48 | github.com/leodido/go-urn v1.2.0 // indirect 49 | github.com/mattn/go-isatty v0.0.12 // indirect 50 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect 51 | github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 // indirect 52 | github.com/syndtr/goleveldb v1.0.0 // indirect 53 | github.com/ugorji/go/codec v1.1.7 // indirect 54 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 // indirect 55 | golang.org/x/sys v0.0.0-20210823070655-63515b42dcdf // indirect 56 | gopkg.in/yaml.v2 v2.2.8 // indirect 57 | ) 58 | 59 | ``` -------------------------------------------------------------------------------- /docs/config.md: -------------------------------------------------------------------------------- 1 | # 配置 2 | 3 | 在编译好[gofound](./compile.md)之后,就可以启动了。 4 | 5 | ```shell 6 | ./gofound 7 | ``` 8 | 9 | ## 参数 10 | 11 | ```shell 12 | ./gofound -h 13 | 14 | -addr string 15 | 设置监听地址和端口 (default "0.0.0.0:5678") 16 | -auth string 17 | 开启认证,例如: admin:123456 18 | -config string 19 | 配置文件路径,配置此项其他参数忽略 20 | -data string 21 | 设置数据存储目录 (default "./data") 22 | -debug 23 | 设置是否开启调试模式 (default true) 24 | -dictionary string 25 | 设置词典路径 (default "./data/dictionary.txt") 26 | -enableAdmin 27 | 设置是否开启后台管理 (default true) 28 | -enableGzip 29 | 是否开启gzip压缩 (default true) 30 | -gomaxprocs int 31 | 设置GOMAXPROCS (default 20) 32 | -timeout int 33 | 数据库超时关闭时间(秒) (default 600) 34 | 35 | 36 | ``` 37 | 38 | ### addr 39 | 40 | 指定要监听的地址和端口。默认为`127.0.0.1:5678` 监听本地地址。 41 | 42 | ```shell 43 | ./gofound --addr=127.0.0.1:5678 44 | ./gofound --addr=:5678 45 | ./gofound --addr=0.0.0.0:5678 46 | ./gofound --addr=192.168.1.1:5678 47 | ``` 48 | 49 | ### auth 50 | 51 | 设置admin和api接口的用户名密码,采用basic auth 52 | 53 | ```shell 54 | ./gofound --auth=admin:123456 55 | ``` 56 | 57 | ### data 58 | 59 | 指定索引数据存储的目录,可以是相对路径,也可以是绝对路径。 60 | 61 | 相对路径是存在`gofound`所在目录下的。 62 | 63 | ```shell 64 | 65 | ```shell 66 | ./gofound --data=./data 67 | ./gofound --data=/www/data 68 | ``` 69 | 70 | ### debug 71 | 72 | 设置是否开启调试模式。默认为`true`。 73 | 74 | ```shell 75 | ./gofound --debug=false 76 | ``` 77 | 78 | ### dictionary 79 | 80 | 设置自定义词典路径。默认为`./data/dictionary.txt`。 81 | 82 | ```shell 83 | ./gofound --dictionary=./data/dictionary.txt 84 | ``` 85 | 86 | ### enableAdmin 87 | 88 | 设置是否开启后台管理。默认为`true`。 89 | 90 | ```shell 91 | ./gofound --enableAdmin=false 92 | ``` 93 | 94 | ### enableGzip 95 | 96 | 设置是否开启gzip压缩。默认为`true`。 97 | 98 | ```shell 99 | ./gofound --enableGzip=false 100 | ``` 101 | 102 | ### gomaxprocs 103 | 104 | 设置GOMAXPROCS。默认为CPU数量X2。 105 | 106 | ```shell 107 | ./gofound --gomaxprocs=10 108 | ``` 109 | 110 | ### shard 111 | 112 | 设置文件分片数量。默认为`10`。分片越多查询会越快,相反的磁盘IO和CPU会越多。 113 | 114 | ```shell 115 | ./gofound --shard=10 116 | ``` 117 | 118 | ### timeout 119 | 120 | 单位为秒。默认为600秒。 121 | 122 | 数据库超时关闭时间,如果设置为-1,表示永不关闭,适合频繁查询的。如果时间过久会造成内存占用过多 123 | 124 | ```shell 125 | ./gofound --timeout=600 126 | ``` -------------------------------------------------------------------------------- /docs/example.md: -------------------------------------------------------------------------------- 1 | # 示例 2 | 3 | 编译好的下载地址: 4 | [https://github.com/newpanjing/github.com/sea-team/gofound/releases](https://github.com/newpanjing/github.com/sea-team/gofound/releases) 5 | 6 | 将编译后的`gofound`文件复制到`/usr/local/bin`目录下,然后在命令行中运行`gofound`命令即可。 7 | 8 | ```shell 9 | gofound --addr=:5678 --data=./data 10 | ``` 11 | 12 | 启动成后,就可以调用[API](./api.md)来进行索引和查询了。 13 | 14 | -------------------------------------------------------------------------------- /docs/images/img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sea-team/gofound/eec89a008c64a278978db27ef40e9e248f2c6aac/docs/images/img1.png -------------------------------------------------------------------------------- /docs/images/img2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sea-team/gofound/eec89a008c64a278978db27ef40e9e248f2c6aac/docs/images/img2.png -------------------------------------------------------------------------------- /docs/images/index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sea-team/gofound/eec89a008c64a278978db27ef40e9e248f2c6aac/docs/images/index.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # 索引原理和流程 2 | 3 | `gofound` 采用平衡二叉树对文本关键词进行索引,然后利用`leveldb`存储id值,以及对应的文档。 4 | 5 | ## 原理图 6 | 7 | ![](./images/index.png) 8 | 9 | ## 二叉平衡查找树 10 | 11 | 二叉平衡查找树是一个高效的查找树,它的查找速度是`O(log n)`,并且每个节点的子树都是平衡的。 12 | `gofound`默认是分10个文件块,也就是10个平衡查找树,每个平衡查找树的深度是`log10(n)`。 13 | 14 | 1亿条索引在一颗树查找最大26次,如果10亿数据,最大查找也是26次,会根据key的hash值取模shard数量,来找到对应的索引进行检索。 -------------------------------------------------------------------------------- /docs/release.md: -------------------------------------------------------------------------------- 1 | # GoFound发布日志 2 | 3 | ## 1.1 4 | + 优化内存占用 5 | + 提升查询速度 6 | + 增加自定义词库配置 7 | + 增加Admin界面 8 | + 增加认证功能 9 | 10 | ## 1.0.2 11 | + 完成基础功能 -------------------------------------------------------------------------------- /docs/storage.md: -------------------------------------------------------------------------------- 1 | # 持久化 2 | 3 | 持久化采用golang版本的leveldb 4 | 5 | + 关键词与ID映射 6 | 7 | 二叉树的每个关键词都与ID相关联,这样在搜索的时候,可以先找到索引的key,然后在通过key找到对应的id数组。 8 | 9 | 映射文件采用的是`leveldb`存储,编码格式为`gob` 10 | 11 | [查看源码](../searcher/storage/leveldb_storage.go) 12 | 13 | 14 | + 文档 15 | 16 | 文档是指在索引时传入的数据,在搜索的时候会原样返回。 17 | 18 | 存储文件采用的是leveldb存储,编码格式为gob 19 | 20 | [查看源码](../searcher/storage/leveldb_storage.go) -------------------------------------------------------------------------------- /docs/test.md: -------------------------------------------------------------------------------- 1 | # 内存 2 | 3 | 我们的目标是以最小的内存使用和最大的性能,带来非凡的体验。 4 | 5 | 测试以1000万数据为基数。 6 | 7 | ## 内存理论 8 | 9 | 索引100亿条数据,只需要27.3MB的内存(经过bitmap的压缩),磁盘空间与1.0x一致。 10 | 11 | 查询100亿条搜索结果,只需要27.3MB的内存(经过bitmap的压缩)。 12 | 13 | 相比1.0x版本,内存可以减少 2794.43倍。同时不兼容1.0x版本的索引,需要重新索引一次。 14 | 15 | ## 查询理论 16 | 17 | 索引时间相比1.0x减少33.8倍,查询时间比1.0x快23%。如果数据超过亿级,查询速度比1.0x版本慢3倍。 -------------------------------------------------------------------------------- /global/config.go: -------------------------------------------------------------------------------- 1 | package global 2 | 3 | // Config 服务器设置 4 | type Config struct { 5 | Addr string `yaml:"addr"` // 监听地址 6 | Data string `json:"data"` // 数据目录 7 | Debug bool `yaml:"debug"` // 调试模式 8 | Dictionary string `json:"dictionary"` // 字典路径 9 | EnableAdmin bool `yaml:"enableAdmin"` //启用admin 10 | Gomaxprocs int `json:"gomaxprocs"` //GOMAXPROCS 11 | Shard int `yaml:"shard"` //分片数 12 | Auth string `json:"auth"` //认证 13 | EnableGzip bool `yaml:"enableGzip"` //是否开启gzip压缩 14 | Timeout int64 `json:"timeout"` //超时时间 15 | BufferNum int `yaml:"bufferNum"` //分片缓冲数 16 | } 17 | -------------------------------------------------------------------------------- /global/global.go: -------------------------------------------------------------------------------- 1 | package global 2 | 3 | import ( 4 | "github.com/sea-team/gofound/searcher" 5 | ) 6 | 7 | var ( 8 | CONFIG *Config // 服务器设置 9 | Container *searcher.Container 10 | ) 11 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/sea-team/gofound 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/Knetic/govaluate v3.0.0+incompatible 7 | github.com/emirpasic/gods v1.12.0 8 | github.com/gin-contrib/gzip v0.0.5 9 | github.com/gin-gonic/gin v1.7.7 10 | github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 11 | github.com/shirou/gopsutil/v3 v3.22.4 12 | github.com/syndtr/goleveldb v1.0.0 13 | github.com/wangbin/jiebago v0.3.2 14 | gopkg.in/yaml.v2 v2.4.0 15 | ) 16 | 17 | require ( 18 | github.com/gin-contrib/sse v0.1.0 // indirect 19 | github.com/go-ole/go-ole v1.2.6 // indirect 20 | github.com/go-playground/locales v0.13.0 // indirect 21 | github.com/go-playground/universal-translator v0.17.0 // indirect 22 | github.com/go-playground/validator/v10 v10.4.1 // indirect 23 | github.com/golang/protobuf v1.5.2 // indirect 24 | github.com/golang/snappy v0.0.3 // indirect 25 | github.com/json-iterator/go v1.1.12 // indirect 26 | github.com/kr/pretty v0.2.0 // indirect 27 | github.com/leodido/go-urn v1.2.0 // indirect 28 | github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect 29 | github.com/mattn/go-isatty v0.0.14 // indirect 30 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 31 | github.com/modern-go/reflect2 v1.0.2 // indirect 32 | github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect 33 | github.com/tklauser/go-sysconf v0.3.10 // indirect 34 | github.com/tklauser/numcpus v0.4.0 // indirect 35 | github.com/ugorji/go/codec v1.1.7 // indirect 36 | github.com/yusufpapurcu/wmi v1.2.2 // indirect 37 | golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 // indirect 38 | golang.org/x/net v0.0.0-20220412020605-290c469a71a5 // indirect 39 | golang.org/x/sys v0.0.0-20220412211240-33da011f77ad // indirect 40 | golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f // indirect 41 | google.golang.org/protobuf v1.28.0 // indirect 42 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect 43 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect 44 | ) 45 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/Knetic/govaluate v3.0.0+incompatible h1:7o6+MAPhYTCF0+fdvoz1xDedhRb4f6s9Tn1Tt7/WTEg= 2 | github.com/Knetic/govaluate v3.0.0+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= 3 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 6 | github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg= 7 | github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o= 8 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 9 | github.com/gin-contrib/gzip v0.0.5 h1:mhnVU32YnnBh2LPH2iqRqsA/eR7SAqRaD388jL2s/j0= 10 | github.com/gin-contrib/gzip v0.0.5/go.mod h1:OPIK6HR0Um2vNmBUTlayD7qle4yVVRZT0PyhdUigrKk= 11 | github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= 12 | github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= 13 | github.com/gin-gonic/gin v1.7.4/go.mod h1:jD2toBW3GZUr5UMcdrwQA10I7RuaFOl/SGeDjXkfUtY= 14 | github.com/gin-gonic/gin v1.7.7 h1:3DoBmSbJbZAWqXJC3SLjAPfutPJJRN1U5pALB7EeTTs= 15 | github.com/gin-gonic/gin v1.7.7/go.mod h1:axIBovoeJpVj8S3BwE0uPMTeReE4+AfFtqpqaZ1qq1U= 16 | github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= 17 | github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= 18 | github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A= 19 | github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= 20 | github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q= 21 | github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= 22 | github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no= 23 | github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= 24 | github.com/go-playground/validator/v10 v10.4.1 h1:pH2c5ADXtd66mxoE0Zm9SUhxE20r7aM3F26W0hOn+GE= 25 | github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4= 26 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 27 | github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= 28 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 29 | github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= 30 | github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 31 | github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 32 | github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= 33 | github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= 34 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 35 | github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 36 | github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= 37 | github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= 38 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 39 | github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= 40 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= 41 | github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= 42 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 43 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 44 | github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs= 45 | github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 46 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 47 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 48 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 49 | github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= 50 | github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= 51 | github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= 52 | github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= 53 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= 54 | github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= 55 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= 56 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 57 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 58 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 59 | github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= 60 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 61 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 62 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 63 | github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= 64 | github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 65 | github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU= 66 | github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= 67 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 68 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 69 | github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= 70 | github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= 71 | github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 h1:GHRpF1pTW19a8tTFrMLUcfWwyC0pnifVo2ClaLq+hP8= 72 | github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46/go.mod h1:uAQ5PCi+MFsC7HjREoAz1BU+Mq60+05gifQSsHSDG/8= 73 | github.com/shirou/gopsutil/v3 v3.22.4 h1:srAQaiX6jX/cYL6q29aE0m8lOskT9CurZ9N61YR3yoI= 74 | github.com/shirou/gopsutil/v3 v3.22.4/go.mod h1:D01hZJ4pVHPpCTZ3m3T2+wDF2YAGfd+H4ifUguaQzHM= 75 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 76 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 77 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 78 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 79 | github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= 80 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 81 | github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE= 82 | github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= 83 | github.com/tklauser/go-sysconf v0.3.10 h1:IJ1AZGZRWbY8T5Vfk04D9WOA5WSejdflXxP03OUqALw= 84 | github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk= 85 | github.com/tklauser/numcpus v0.4.0 h1:E53Dm1HjH1/R2/aoCtXtPgzmElmn51aOkhCFSuZq//o= 86 | github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ= 87 | github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= 88 | github.com/ugorji/go/codec v1.1.7 h1:2SvQaVZ1ouYrrKKwoSk2pzd4A9evlKJb9oTL+OaLUSs= 89 | github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= 90 | github.com/wangbin/jiebago v0.3.2 h1:reQKp0xTXWFK7eQ19L6Ofq5xODSR2hcam55qcdCCNpw= 91 | github.com/wangbin/jiebago v0.3.2/go.mod h1:PAqQLauF0qAzy/63jBvO7Goh0oYBq1ocr0OXHLlujwQ= 92 | github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= 93 | github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= 94 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 95 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 96 | golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 h1:kUhD7nTDoI3fVd9G4ORWrbV5NY0liEs/Jg2pv5f+bBA= 97 | golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= 98 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 99 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 100 | golang.org/x/net v0.0.0-20220412020605-290c469a71a5 h1:bRb386wvrE+oBNdF1d/Xh9mQrfQ4ecYhW5qJ5GvTGT4= 101 | golang.org/x/net v0.0.0-20220412020605-290c469a71a5/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= 102 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 103 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 104 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 105 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 106 | golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 107 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 108 | golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 109 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 110 | golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 111 | golang.org/x/sys v0.0.0-20220412211240-33da011f77ad h1:ntjMns5wyP/fN65tdBD4g8J5w8n015+iIIs9rtjXkY0= 112 | golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 113 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 114 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 115 | golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= 116 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 117 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 118 | golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f h1:GGU+dLjvlC3qDwqYgL6UgRmHXhOOgns0bZu2Ty5mm6U= 119 | golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 120 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 121 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= 122 | google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw= 123 | google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 124 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 125 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= 126 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 127 | gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= 128 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 129 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= 130 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 131 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 132 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 133 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 134 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 135 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 136 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 137 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= 138 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 139 | -------------------------------------------------------------------------------- /gofound.d: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # chkconfig: 2345 90 10 3 | # Description: Startup script for gofound on Debian. Place in /etc/init.d and 4 | # run 'update-rc.d -f gofound defaults', or use the appropriate command on your 5 | # distro. For CentOS/Redhat run: 'chkconfig --add gofound' 6 | 7 | ### BEGIN INIT INFO 8 | # 9 | # Provides: gofound.d 10 | # Required-Start: $local_fs $remote_fs 11 | # Required-Stop: $local_fs $remote_fs 12 | # Default-Start: 2 3 4 5 13 | # Default-Stop: 0 1 6 14 | # Short-Description: starts gofound 15 | # Description: This file should be used to gofound scripts to be placed in /etc/init.d. 16 | # 17 | ### END INIT INFO 18 | 19 | 20 | ## 2345是默认启动级别,级别有0-6共7个级别。 90是启动优先级,10是停止优先级,优先级范围是0-100,数字越大,优先级越低。 21 | 22 | ## Fill in name of program here. 23 | PROG="gofound" 24 | PROG_PATH="/usr/local/bin" ## Not need, but sometimes helpful (if $PROG resides in /opt for example). 25 | PROG_ARGS="--config=/gofound_path/config.yaml" 26 | PID_PATH="/var/run/" 27 | 28 | start() { 29 | if [ -e "$PID_PATH/$PROG.pid" ]; then 30 | ## Program is running, exit with error. 31 | echo "Error! $PROG_PATH/$PROG is currently running!" 1>&2 32 | exit 1 33 | else 34 | ## Change from /dev/null to something like /var/log/$PROG if you want to save output. 35 | $PROG_PATH/$PROG $PROG_ARGS 2>&1 >>/var/log/$PROG & 36 | #pid=`ps ax | grep -i '/usr/bin/frps' | grep -v 'grep' | sed 's/^\([0-9]\{1,\}\).*/\1/g' | head -n 1` 37 | pid=`ps -ef | grep $PROG_PATH/$PROG | grep -v grep | awk '{print $2}'` 38 | #echo $PROG_PATH/$PROG $PROG_ARGS 39 | echo "$PROG_PATH/$PROG($pid) started" 40 | echo $pid > "$PID_PATH/$PROG.pid" 41 | fi 42 | } 43 | 44 | stop() { 45 | echo "begin stop" 46 | if [ -e "$PID_PATH/$PROG.pid" ]; then 47 | ## Program is running, so stop it 48 | #pid=`ps ax | grep -i '/usr/bin/frps' | grep -v 'grep' | sed 's/^\([0-9]\{1,\}\).*/\1/g' | head -n 1` 49 | pid=`ps -ef | grep $PROG_PATH/$PROG | grep -v grep | awk '{print $2}'` 50 | kill $pid 51 | 52 | rm -f "$PID_PATH/$PROG.pid" 53 | echo "$PROG_PATH/$PROG($pid) stopped" 54 | else 55 | ## Program is not running, exit with error. 56 | echo "Error! $PROG_PATH/$PROG not started!" 1>&2 57 | fi 58 | } 59 | 60 | status() { 61 | if [ -e "$PID_PATH/$PROG.pid" ]; then 62 | ## Program is running, so stop it 63 | #pid=`ps ax | grep -i '/usr/bin/frps' | grep -v 'grep' | sed 's/^\([0-9]\{1,\}\).*/\1/g' | head -n 1` 64 | pid=`ps -ef | grep $PROG_PATH/$PROG | grep -v grep | awk '{print $2}'` 65 | 66 | if [ $pid ]; then 67 | echo "$PROG_PATH/$PROG($pid) is running..." 68 | else 69 | echo "$PROG_PATH/$PROG dead but pid file exists" 1>&2 70 | fi 71 | else 72 | ## Program is not running, exit with error. 73 | echo "Error! $PROG_PATH/$PROG not started!" 1>&2 74 | fi 75 | } 76 | 77 | 78 | ## Check to see if we are running as root first. 79 | ## Found at http://www.cyberciti.biz/tips/shell-root-user-check-script.html 80 | if [ "$(id -u)" != "0" ]; then 81 | echo "This script must be run as root" 1>&2 82 | exit 1 83 | fi 84 | 85 | case "$1" in 86 | start) 87 | start 88 | exit 0 89 | ;; 90 | stop) 91 | echo '' > /var/log/$PROG 92 | stop 93 | exit 0 94 | ;; 95 | reload|restart|force-reload) 96 | stop 97 | start 98 | exit 0 99 | ;; 100 | status) 101 | status 102 | exit 0 103 | ;; 104 | *) 105 | echo "Usage: $0 {start|stop|restart|status}" 1>&2 106 | exit 1 107 | ;; 108 | esac 109 | -------------------------------------------------------------------------------- /gofound.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #每分钟检测gofound运行 4 | #*/1 * * * * /data/gofound/gofound.sh > /dev/null 2>&1 5 | 6 | #每3点 重启gofound 7 | #0 3 * * * /etc/init.d/gofound.d restart 8 | 9 | count=`ps -fe |grep "gofound"|grep "config.yaml" -c` 10 | 11 | echo $count 12 | if [ $count -lt 1 ]; then 13 | echo "restart" 14 | echo $(date +%Y-%m-%d_%H:%M:%S) >/data/gofound/restart.log 15 | /etc/init.d/gofound.d restart 16 | else 17 | echo "is running" 18 | fi -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/sea-team/gofound/core" 5 | ) 6 | 7 | func main() { 8 | //初始化容器和参数解析 9 | core.Initialize() 10 | } 11 | -------------------------------------------------------------------------------- /sdk/SDK 设计指南.md: -------------------------------------------------------------------------------- 1 | # GoFound SDK设计指南 2 | 3 | ## 支持自定义配置 4 | 在支持自定义配置的时候,同时提供默认配置项 5 | ## 6 | 支持`gofound` 提供的所有操作,增删改查等 7 | 8 | -------------------------------------------------------------------------------- /sdk/base.go: -------------------------------------------------------------------------------- 1 | package gofound 2 | 3 | import ( 4 | "github.com/sea-team/gofound/searcher/model" 5 | "github.com/sea-team/gofound/searcher/system" 6 | "runtime" 7 | ) 8 | 9 | // Query 查询 10 | func (c *Client) Query(req *model.SearchRequest) (*model.SearchResult, error) { 11 | r, err := c.container.GetDataBase(req.Database).MultiSearch(req) 12 | if err != nil { 13 | return nil, err 14 | } 15 | 16 | return r, nil 17 | } 18 | 19 | func (*Client) GC() { 20 | runtime.GC() 21 | } 22 | func (c *Client) Status() (map[string]interface{}, error) { 23 | var m runtime.MemStats 24 | runtime.ReadMemStats(&m) 25 | 26 | // TODO 其他系统信息 27 | r := map[string]interface{}{ 28 | "memory": system.GetMemStat(), 29 | "cpu": system.GetCPUStatus(), 30 | "disk": system.GetDiskStat(), 31 | } 32 | return r, nil 33 | } 34 | -------------------------------------------------------------------------------- /sdk/client.go: -------------------------------------------------------------------------------- 1 | package gofound 2 | 3 | import ( 4 | "fmt" 5 | "github.com/sea-team/gofound/core" 6 | "github.com/sea-team/gofound/global" 7 | "github.com/sea-team/gofound/searcher" 8 | "os" 9 | "runtime" 10 | "sync" 11 | ) 12 | 13 | var once sync.Once 14 | 15 | // Client 应该对外部屏蔽细节 16 | // 尽量少的提供接口,但是又要保证功能性 17 | type Client struct { 18 | config *global.Config //服务配置 19 | container *searcher.Container //运行实体 20 | } 21 | 22 | func newDefaultConfig() *global.Config { 23 | return &global.Config{ 24 | Addr: "127.0.0.1:5678", 25 | Data: fmt.Sprintf(".%sdata", string(os.PathSeparator)), 26 | Debug: true, 27 | Dictionary: "./data/dictionary.txt", 28 | EnableAdmin: true, 29 | Gomaxprocs: runtime.NumCPU() * 2, 30 | Shard: 0, 31 | Auth: "", 32 | EnableGzip: true, 33 | Timeout: 10 * 60, 34 | } 35 | } 36 | func newTokenizerAndContainer(config *global.Config) *searcher.Container { 37 | tokenizer := core.NewTokenizer(global.CONFIG.Dictionary) 38 | return core.NewContainer(tokenizer) 39 | } 40 | 41 | // NewClient 通过参数进行配置,必须指定全部参数 42 | func NewClient(config *global.Config) *Client { 43 | global.CONFIG = config 44 | //初始化分词器 45 | container := newTokenizerAndContainer(config) 46 | global.Container = container 47 | return &Client{ 48 | config: config, 49 | container: container, 50 | } 51 | } 52 | 53 | // Default 使用默认参数创建服务 54 | func Default() *Client { 55 | global.CONFIG = newDefaultConfig() 56 | container := newTokenizerAndContainer(global.CONFIG) 57 | global.Container = container 58 | return &Client{ 59 | config: global.CONFIG, 60 | container: container, 61 | } 62 | } 63 | 64 | // SetAddr 设置Web服务地址 65 | func (c *Client) SetAddr(addr string) *Client { 66 | if addr == "" { 67 | return c 68 | } 69 | c.config.Addr = addr 70 | return c 71 | } 72 | 73 | // SetData 设置数据存放地址 74 | func (c *Client) SetData(dir string) *Client { 75 | if dir == "" { 76 | return c 77 | } 78 | c.config.Data = dir 79 | return c 80 | } 81 | 82 | //TODO 其他配置项 83 | -------------------------------------------------------------------------------- /sdk/database.go: -------------------------------------------------------------------------------- 1 | package gofound 2 | 3 | import ( 4 | "github.com/sea-team/gofound/searcher" 5 | 6 | "github.com/syndtr/goleveldb/leveldb/errors" 7 | ) 8 | 9 | // Show 查看数据库 10 | func (c *Client) Show() (map[string]*searcher.Engine, error) { 11 | // 保持分格一致 12 | return c.container.GetDataBases(), nil 13 | } 14 | 15 | // Drop 删除数据库 16 | func (c *Client) Drop(dbName string) error { 17 | if dbName == "" { 18 | return errors.New("database not exist") 19 | } 20 | if err := c.container.DropDataBase(dbName); err != nil { 21 | return err 22 | } 23 | return nil 24 | } 25 | 26 | // Create 创建数据库 27 | func (c *Client) Create(dbName string) (*searcher.Engine, error) { 28 | if dbName == "" { 29 | return nil, errors.New("database name is empty") 30 | } 31 | return c.container.GetDataBase(dbName), nil 32 | } 33 | -------------------------------------------------------------------------------- /sdk/index.go: -------------------------------------------------------------------------------- 1 | package gofound 2 | 3 | import ( 4 | "errors" 5 | "github.com/sea-team/gofound/searcher/model" 6 | ) 7 | 8 | // AddIndex 添加索引 9 | func (c *Client) AddIndex(dbName string, request *model.IndexDoc) error { 10 | if request.Text == "" { 11 | return errors.New("text is empty") 12 | } 13 | c.container.GetDataBase(dbName).IndexDocument(request) 14 | 15 | return nil 16 | } 17 | 18 | // BatchAddIndex 批次添加索引 19 | func (c *Client) BatchAddIndex(dbName string, documents []*model.IndexDoc) error { 20 | db := c.container.GetDataBase(dbName) 21 | // 数据预处理 22 | for _, doc := range documents { 23 | if doc.Text == "" { 24 | return errors.New("text is empty") 25 | } 26 | if doc.Document == nil { 27 | return errors.New("document is empty") 28 | } 29 | } 30 | for _, doc := range documents { 31 | go db.IndexDocument(doc) 32 | } 33 | return nil 34 | } 35 | 36 | // RemoveIndex 删除索引 37 | func (c *Client) RemoveIndex(dbName string, data *model.RemoveIndexModel) error { 38 | db := c.container.GetDataBase(dbName) 39 | if err := db.RemoveIndex(data.Id); err != nil { 40 | return err 41 | } 42 | return nil 43 | } 44 | -------------------------------------------------------------------------------- /sdk/word.go: -------------------------------------------------------------------------------- 1 | package gofound 2 | 3 | // WordCut 分词 4 | func (c *Client) WordCut(keyword string) []string { 5 | return c.container.Tokenizer.Cut(keyword) 6 | } 7 | 8 | // BatchWordCut 批量分词 9 | func (c *Client) BatchWordCut(keywords []string) *[][]string { 10 | res := make([][]string, len(keywords)) 11 | for _, w := range keywords { 12 | res = append(res, c.container.Tokenizer.Cut(w)) 13 | } 14 | return &res 15 | } 16 | -------------------------------------------------------------------------------- /searcher/arrays/arrays.go: -------------------------------------------------------------------------------- 1 | package arrays 2 | 3 | const ( 4 | LOW = 0 5 | HIGH = 1 6 | ) 7 | 8 | // BinarySearch 二分查找 9 | func BinarySearch(arr []uint32, target uint32) bool { 10 | low := 0 11 | high := len(arr) - 1 12 | for low < high { 13 | mid := (low + high) >> 1 14 | if arr[mid] >= target { 15 | high = mid 16 | } else { 17 | low = mid + 1 18 | } 19 | } 20 | return arr != nil && arr[low] == target 21 | } 22 | 23 | func ArrayUint32Exists(arr []uint32, target uint32) bool { 24 | for _, v := range arr { 25 | if v == target { 26 | return true 27 | } 28 | } 29 | return false 30 | } 31 | 32 | func ArrayStringExists(arr []string, str string) bool { 33 | for _, v := range arr { 34 | if v == str { 35 | return true 36 | } 37 | } 38 | return false 39 | } 40 | 41 | // MergeArrayUint32 合并两个数组 42 | func MergeArrayUint32(target []uint32, source []uint32) []uint32 { 43 | 44 | for _, val := range source { 45 | if !BinarySearch(target, val) { 46 | target = append(target, val) 47 | } 48 | } 49 | return target 50 | } 51 | 52 | func Find(arr []uint32, target uint32) int { 53 | for index, v := range arr { 54 | if v == target { 55 | return index 56 | } 57 | } 58 | return -1 59 | } 60 | -------------------------------------------------------------------------------- /searcher/container.go: -------------------------------------------------------------------------------- 1 | package searcher 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "github.com/sea-team/gofound/searcher/words" 7 | "log" 8 | "os" 9 | "runtime" 10 | "unsafe" 11 | ) 12 | 13 | type Container struct { 14 | Dir string //文件夹 15 | engines map[string]*Engine //引擎 16 | Debug bool //调试 17 | Tokenizer *words.Tokenizer //分词器 18 | Shard int //分片 19 | Timeout int64 //超时关闭数据库 20 | BufferNum int //分片缓冲数 21 | } 22 | 23 | func (c *Container) Init() error { 24 | 25 | c.engines = make(map[string]*Engine) 26 | 27 | //读取当前路径下的所有目录,就是数据库名称 28 | dirs, err := os.ReadDir(c.Dir) 29 | if err != nil { 30 | if os.IsNotExist(err) { 31 | //创建 32 | err := os.MkdirAll(c.Dir, os.ModePerm) 33 | if err != nil { 34 | return err 35 | } 36 | } else { 37 | return err 38 | } 39 | } 40 | //初始化数据库 41 | for _, dir := range dirs { 42 | if dir.IsDir() { 43 | c.engines[dir.Name()] = c.GetDataBase(dir.Name()) 44 | log.Println("db:", dir.Name()) 45 | } 46 | } 47 | 48 | return nil 49 | } 50 | 51 | // NewEngine 创建一个引擎 52 | func (c *Container) NewEngine(name string) *Engine { 53 | var engine = &Engine{ 54 | IndexPath: fmt.Sprintf("%s%c%s", c.Dir, os.PathSeparator, name), 55 | DatabaseName: name, 56 | Tokenizer: c.Tokenizer, 57 | Shard: c.Shard, 58 | Timeout: c.Timeout, 59 | BufferNum: c.BufferNum, 60 | } 61 | option := engine.GetOptions() 62 | 63 | engine.InitOption(option) 64 | engine.IsDebug = c.Debug 65 | 66 | return engine 67 | } 68 | 69 | // GetDataBase 获取或创建引擎 70 | func (c *Container) GetDataBase(name string) *Engine { 71 | 72 | //默认数据库名为default 73 | if name == "" { 74 | name = "default" 75 | } 76 | 77 | //log.Println("Get DataBase:", name) 78 | engine, ok := c.engines[name] 79 | if !ok { 80 | //创建引擎 81 | engine = c.NewEngine(name) 82 | c.engines[name] = engine 83 | //释放引擎 84 | } 85 | 86 | return engine 87 | } 88 | 89 | // GetDataBases 获取数据库列表 90 | func (c *Container) GetDataBases() map[string]*Engine { 91 | for _, engine := range c.engines { 92 | size := unsafe.Sizeof(&engine) 93 | fmt.Printf("%s:%d\n", engine.DatabaseName, size) 94 | } 95 | return c.engines 96 | } 97 | 98 | func (c *Container) GetDataBaseNumber() int { 99 | return len(c.engines) 100 | } 101 | 102 | func (c *Container) GetIndexCount() int64 { 103 | var count int64 104 | for _, engine := range c.engines { 105 | count += engine.GetIndexCount() 106 | } 107 | return count 108 | } 109 | 110 | func (c *Container) GetDocumentCount() int64 { 111 | var count int64 112 | for _, engine := range c.engines { 113 | count += engine.GetDocumentCount() 114 | } 115 | return count 116 | } 117 | 118 | // DropDataBase 删除数据库 119 | func (c *Container) DropDataBase(name string) error { 120 | if _, ok := c.engines[name]; !ok { 121 | return errors.New("数据库不存在") 122 | } 123 | err := c.engines[name].Drop() 124 | if err != nil { 125 | return err 126 | } 127 | 128 | delete(c.engines, name) 129 | //释放资源 130 | runtime.GC() 131 | 132 | return nil 133 | } 134 | -------------------------------------------------------------------------------- /searcher/container_test.go: -------------------------------------------------------------------------------- 1 | package searcher 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestContainer_Init(t *testing.T) { 9 | c := &Container{ 10 | Dir: "/Users/panjing/GolandProjects/github.com/sea-team/gofound/dbs", 11 | Debug: true, 12 | } 13 | err := c.Init() 14 | if err != nil { 15 | panic(err) 16 | } 17 | 18 | test := c.GetDataBase("test") 19 | 20 | fmt.Println(test.GetIndexCount()) 21 | 22 | all := c.GetDataBases() 23 | for name, engine := range all { 24 | fmt.Println(name) 25 | fmt.Println(engine) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /searcher/engine.go: -------------------------------------------------------------------------------- 1 | package searcher 2 | 3 | import ( 4 | "fmt" 5 | "github.com/sea-team/gofound/searcher/arrays" 6 | "github.com/sea-team/gofound/searcher/model" 7 | "github.com/sea-team/gofound/searcher/pagination" 8 | "github.com/sea-team/gofound/searcher/sorts" 9 | "github.com/sea-team/gofound/searcher/storage" 10 | "github.com/sea-team/gofound/searcher/utils" 11 | "github.com/sea-team/gofound/searcher/words" 12 | "log" 13 | "os" 14 | "runtime" 15 | "sort" 16 | "strings" 17 | "sync" 18 | "time" 19 | 20 | "github.com/Knetic/govaluate" 21 | "github.com/syndtr/goleveldb/leveldb/errors" 22 | ) 23 | 24 | type Engine struct { 25 | IndexPath string //索引文件存储目录 26 | Option *Option //配置 27 | 28 | invertedIndexStorages []*storage.LeveldbStorage //关键字和Id映射,倒排索引,key=id,value=[]words 29 | positiveIndexStorages []*storage.LeveldbStorage //ID和key映射,用于计算相关度,一个id 对应多个key,正排索引 30 | docStorages []*storage.LeveldbStorage //文档仓 31 | 32 | sync.Mutex //锁 33 | sync.WaitGroup //等待 34 | addDocumentWorkerChan []chan *model.IndexDoc //添加索引的通道 35 | IsDebug bool //是否调试模式 36 | Tokenizer *words.Tokenizer //分词器 37 | DatabaseName string //数据库名 38 | 39 | Shard int //分片数 40 | Timeout int64 //超时时间,单位秒 41 | BufferNum int //分片缓冲数 42 | 43 | documentCount int64 //文档总数量 44 | } 45 | 46 | type Option struct { 47 | InvertedIndexName string //倒排索引 48 | PositiveIndexName string //正排索引 49 | DocIndexName string //文档存储 50 | } 51 | 52 | // Init 初始化索引引擎 53 | func (e *Engine) Init() { 54 | e.Add(1) 55 | defer e.Done() 56 | 57 | if e.Option == nil { 58 | e.Option = e.GetOptions() 59 | } 60 | if e.Timeout == 0 { 61 | e.Timeout = 10 * 3 // 默认30s 62 | } 63 | //-1代表没有初始化 64 | e.documentCount = -1 65 | //log.Println("数据存储目录:", e.IndexPath) 66 | log.Println("chain num:", e.Shard*e.BufferNum) 67 | 68 | e.addDocumentWorkerChan = make([]chan *model.IndexDoc, e.Shard) 69 | //初始化文件存储 70 | for shard := 0; shard < e.Shard; shard++ { 71 | 72 | //初始化chan 73 | worker := make(chan *model.IndexDoc, e.BufferNum) 74 | e.addDocumentWorkerChan[shard] = worker 75 | 76 | //初始化chan 77 | go e.DocumentWorkerExec(worker) 78 | 79 | s, err := storage.NewStorage(e.getFilePath(fmt.Sprintf("%s_%d", e.Option.DocIndexName, shard)), e.Timeout) 80 | if err != nil { 81 | panic(err) 82 | } 83 | e.docStorages = append(e.docStorages, s) 84 | 85 | //初始化Keys存储 86 | ks, kerr := storage.NewStorage(e.getFilePath(fmt.Sprintf("%s_%d", e.Option.InvertedIndexName, shard)), e.Timeout) 87 | if kerr != nil { 88 | panic(err) 89 | } 90 | e.invertedIndexStorages = append(e.invertedIndexStorages, ks) 91 | 92 | //id和keys映射 93 | iks, ikerr := storage.NewStorage(e.getFilePath(fmt.Sprintf("%s_%d", e.Option.PositiveIndexName, shard)), e.Timeout) 94 | if ikerr != nil { 95 | panic(ikerr) 96 | } 97 | e.positiveIndexStorages = append(e.positiveIndexStorages, iks) 98 | } 99 | go e.automaticGC() 100 | //log.Println("初始化完成") 101 | } 102 | 103 | // 自动保存索引,10秒钟检测一次 104 | func (e *Engine) automaticGC() { 105 | ticker := time.NewTicker(time.Second * 10) 106 | for { 107 | <-ticker.C 108 | //定时GC 109 | runtime.GC() 110 | if e.IsDebug { 111 | log.Println("waiting:", e.GetQueue()) 112 | } 113 | } 114 | } 115 | 116 | func (e *Engine) IndexDocument(doc *model.IndexDoc) error { 117 | //数量增加 118 | e.documentCount++ 119 | e.addDocumentWorkerChan[e.getShard(doc.Id)] <- doc 120 | return nil 121 | /* 122 | select { 123 | case e.addDocumentWorkerChan[e.getShard(doc.Id)] <- doc: 124 | e.documentCount++ 125 | default: 126 | return errors.New("处理缓冲已满") 127 | } 128 | return nil 129 | */ 130 | } 131 | 132 | // GetQueue 获取队列剩余 133 | func (e *Engine) GetQueue() int { 134 | total := 0 135 | for _, v := range e.addDocumentWorkerChan { 136 | total += len(v) 137 | } 138 | return total 139 | } 140 | 141 | // DocumentWorkerExec 添加文档队列 142 | func (e *Engine) DocumentWorkerExec(worker chan *model.IndexDoc) { 143 | for { 144 | doc := <-worker 145 | e.AddDocument(doc) 146 | } 147 | } 148 | 149 | // getShard 计算索引分布在哪个文件块 150 | func (e *Engine) getShard(id uint32) int { 151 | return int(id % uint32(e.Shard)) 152 | } 153 | 154 | func (e *Engine) getShardByWord(word string) int { 155 | 156 | return int(utils.StringToInt(word) % uint32(e.Shard)) 157 | } 158 | 159 | func (e *Engine) InitOption(option *Option) { 160 | 161 | if option == nil { 162 | //默认值 163 | option = e.GetOptions() 164 | } 165 | e.Option = option 166 | //shard默认值 167 | if e.Shard <= 0 { 168 | e.Shard = 10 169 | } 170 | if e.BufferNum <= 0 { 171 | e.BufferNum = 1000 172 | } 173 | //初始化其他的 174 | e.Init() 175 | 176 | } 177 | 178 | func (e *Engine) getFilePath(fileName string) string { 179 | return e.IndexPath + string(os.PathSeparator) + fileName 180 | } 181 | 182 | func (e *Engine) GetOptions() *Option { 183 | return &Option{ 184 | DocIndexName: "docs", 185 | InvertedIndexName: "inverted_index", 186 | PositiveIndexName: "positive_index", 187 | } 188 | } 189 | 190 | // AddDocument 分词索引 191 | func (e *Engine) AddDocument(index *model.IndexDoc) { 192 | //等待初始化完成 193 | e.Wait() 194 | text := index.Text 195 | 196 | splitWords := e.Tokenizer.Cut(text) 197 | 198 | id := index.Id 199 | // 检查是否需要更新倒排索引 words变更/id不存在 200 | inserts, needUpdateInverted := e.optimizeIndex(id, splitWords) 201 | 202 | // 将新增的word剔出单独处理,减少I/O操作 203 | if needUpdateInverted { 204 | for _, word := range inserts { 205 | e.addInvertedIndex(word, id) 206 | } 207 | } 208 | 209 | // TODO: 是否需要更新正排索引 - 检测document变更 210 | e.addPositiveIndex(index, splitWords) 211 | } 212 | 213 | // 添加倒排索引 214 | func (e *Engine) addInvertedIndex(word string, id uint32) { 215 | e.Lock() 216 | defer e.Unlock() 217 | 218 | shard := e.getShardByWord(word) 219 | 220 | s := e.invertedIndexStorages[shard] 221 | 222 | //string作为key 223 | key := []byte(word) 224 | 225 | //存在 226 | //添加到列表 227 | buf, find := s.Get(key) 228 | ids := make([]uint32, 0) 229 | if find { 230 | utils.Decoder(buf, &ids) 231 | } 232 | 233 | if !arrays.ArrayUint32Exists(ids, id) { 234 | ids = append(ids, id) 235 | } 236 | 237 | s.Set(key, utils.Encoder(ids)) 238 | } 239 | 240 | // 移除删去的词 241 | func (e *Engine) optimizeIndex(id uint32, newWords []string) ([]string, bool) { 242 | // 判断id是否存在 243 | e.Lock() 244 | defer e.Unlock() 245 | 246 | // 计算差值 247 | removes, inserts, changed := e.getDifference(id, newWords) 248 | if changed { 249 | if removes != nil && len(removes) > 0 { 250 | // 移除正排索引 251 | for _, word := range removes { 252 | e.removeIdInWordIndex(id, word) 253 | } 254 | } 255 | } 256 | return inserts, changed 257 | } 258 | 259 | func (e *Engine) removeIdInWordIndex(id uint32, word string) { 260 | 261 | shard := e.getShardByWord(word) 262 | 263 | wordStorage := e.invertedIndexStorages[shard] 264 | 265 | //string作为key 266 | key := []byte(word) 267 | 268 | buf, found := wordStorage.Get(key) 269 | if found { 270 | ids := make([]uint32, 0) 271 | utils.Decoder(buf, &ids) 272 | 273 | //移除 274 | index := arrays.Find(ids, id) 275 | if index != -1 { 276 | ids = utils.DeleteArray(ids, index) 277 | if len(ids) == 0 { 278 | err := wordStorage.Delete(key) 279 | if err != nil { 280 | panic(err) 281 | } 282 | } else { 283 | wordStorage.Set(key, utils.Encoder(ids)) 284 | } 285 | } 286 | } 287 | 288 | } 289 | 290 | // 计算差值 291 | // @return []string: 需要删除的词 292 | // @return bool : words出现变更返回true,否则返回false 293 | func (e *Engine) getDifference(id uint32, newWords []string) ([]string, []string, bool) { 294 | shard := e.getShard(id) 295 | wordStorage := e.positiveIndexStorages[shard] 296 | key := utils.Uint32ToBytes(id) 297 | buf, found := wordStorage.Get(key) 298 | if found { 299 | oldWords := make([]string, 0) 300 | utils.Decoder(buf, &oldWords) 301 | 302 | // 计算需要移除的 303 | removes := make([]string, 0) 304 | for _, word := range oldWords { 305 | // 旧的在新的里面不存在,就是需要移除的 306 | if !arrays.ArrayStringExists(newWords, word) { 307 | removes = append(removes, word) 308 | } 309 | } 310 | // 计算需要新增的 311 | inserts := make([]string, 0) 312 | for _, word := range newWords { 313 | if !arrays.ArrayStringExists(oldWords, word) { 314 | inserts = append(inserts, word) 315 | } 316 | } 317 | if len(removes) != 0 || len(inserts) != 0 { 318 | return removes, inserts, true 319 | } 320 | // 没有改变 321 | return removes, inserts, false 322 | } 323 | // id不存在,相当于insert 324 | return nil, newWords, true 325 | } 326 | 327 | // 添加正排索引 id=>keys id=>doc 328 | func (e *Engine) addPositiveIndex(index *model.IndexDoc, keys []string) { 329 | e.Lock() 330 | defer e.Unlock() 331 | 332 | key := utils.Uint32ToBytes(index.Id) 333 | shard := e.getShard(index.Id) 334 | docStorage := e.docStorages[shard] 335 | 336 | //id和key的映射 337 | positiveIndexStorage := e.positiveIndexStorages[shard] 338 | 339 | doc := &model.StorageIndexDoc{ 340 | IndexDoc: index, 341 | Keys: keys, 342 | } 343 | 344 | //存储id和key以及文档的映射 345 | docStorage.Set(key, utils.Encoder(doc)) 346 | 347 | //设置到id和key的映射中 348 | positiveIndexStorage.Set(key, utils.Encoder(keys)) 349 | } 350 | 351 | // MultiSearch 多线程搜索 352 | func (e *Engine) MultiSearch(request *model.SearchRequest) (*model.SearchResult, error) { 353 | //等待搜索初始化完成 354 | e.Wait() 355 | 356 | //分词搜索 357 | words := e.Tokenizer.Cut(request.Query) 358 | 359 | fastSort := &sorts.FastSort{ 360 | IsDebug: e.IsDebug, 361 | Order: request.Order, 362 | } 363 | 364 | _time := utils.ExecTime(func() { 365 | 366 | base := len(words) 367 | wg := &sync.WaitGroup{} 368 | wg.Add(base) 369 | 370 | for _, word := range words { 371 | go e.processKeySearch(word, fastSort, wg) 372 | } 373 | wg.Wait() 374 | }) 375 | if e.IsDebug { 376 | log.Println("搜索时间:", _time, "ms") 377 | } 378 | // 处理分页 379 | request = request.GetAndSetDefault() 380 | 381 | //计算交集得分和去重 382 | fastSort.Process() 383 | 384 | wordMap := make(map[string]bool) 385 | for _, word := range words { 386 | wordMap[word] = true 387 | } 388 | 389 | //读取文档 390 | var result = &model.SearchResult{ 391 | Total: fastSort.Count(), 392 | Page: request.Page, 393 | Limit: request.Limit, 394 | Words: words, 395 | } 396 | 397 | t, err := utils.ExecTimeWithError(func() error { 398 | 399 | pager := new(pagination.Pagination) 400 | 401 | pager.Init(request.Limit, fastSort.Count()) 402 | //设置总页数 403 | result.PageCount = pager.PageCount 404 | 405 | //读取单页的id 406 | if pager.PageCount != 0 { 407 | 408 | start, end := pager.GetPage(request.Page) 409 | if request.ScoreExp != "" { 410 | // 分数表达式不为空,获取所有的数据 411 | start, end = 0, pager.Total 412 | } 413 | 414 | var resultItems = make([]model.SliceItem, 0) 415 | fastSort.GetAll(&resultItems, start, end) 416 | 417 | count := len(resultItems) 418 | 419 | result.Documents = make([]model.ResponseDoc, count) 420 | //只读取前面100个 421 | wg := new(sync.WaitGroup) 422 | wg.Add(count) 423 | for index, item := range resultItems { 424 | go e.getDocument(item, &result.Documents[index], request, &wordMap, wg) 425 | } 426 | wg.Wait() 427 | if request.ScoreExp != "" { 428 | // 生成计算表达式 429 | exp, err := govaluate.NewEvaluableExpression(request.ScoreExp) 430 | if err != nil { 431 | return err 432 | } 433 | parameters := make(map[string]interface{}) 434 | // 根据表达式计算分数 435 | for i, doc := range result.Documents { 436 | parameters["score"] = doc.Score 437 | for k, v := range doc.Document { 438 | parameters["document."+k] = v 439 | } 440 | val, err := exp.Evaluate(parameters) 441 | if err != nil { 442 | log.Printf("表达式执行'%v'错误: %v 值内容: %v", request.ScoreExp, err, parameters) 443 | } else { 444 | result.Documents[i].Score = int(val.(float64)) 445 | } 446 | } 447 | if request.Order == "desc" { 448 | sort.Sort(sort.Reverse(model.ResponseDocSort(result.Documents))) 449 | } else { 450 | sort.Sort(model.ResponseDocSort(result.Documents)) 451 | } 452 | // 取出page 453 | start, end := pager.GetPage(request.Page) 454 | result.Documents = result.Documents[start:end] 455 | } 456 | } 457 | return nil 458 | }) 459 | if e.IsDebug { 460 | log.Println("处理数据耗时:", _time, "ms") 461 | } 462 | if err != nil { 463 | return nil, err 464 | } 465 | result.Time = _time + t 466 | 467 | return result, nil 468 | } 469 | 470 | func (e *Engine) getDocument(item model.SliceItem, doc *model.ResponseDoc, request *model.SearchRequest, wordMap *map[string]bool, wg *sync.WaitGroup) { 471 | buf := e.GetDocById(item.Id) 472 | defer wg.Done() 473 | doc.Score = item.Score 474 | 475 | if buf != nil { 476 | //gob解析 477 | storageDoc := new(model.StorageIndexDoc) 478 | utils.Decoder(buf, &storageDoc) 479 | doc.Document = storageDoc.Document 480 | doc.Keys = storageDoc.Keys 481 | text := storageDoc.Text 482 | //处理关键词高亮 483 | highlight := request.Highlight 484 | if highlight != nil { 485 | //全部小写 486 | text = strings.ToLower(text) 487 | //还可以优化,只替换击中的词 488 | for _, key := range storageDoc.Keys { 489 | if ok := (*wordMap)[key]; ok { 490 | text = strings.ReplaceAll(text, key, fmt.Sprintf("%s%s%s", highlight.PreTag, key, highlight.PostTag)) 491 | } 492 | } 493 | //放置原始文本 494 | doc.OriginalText = storageDoc.Text 495 | } 496 | doc.Text = text 497 | doc.Id = item.Id 498 | 499 | } 500 | 501 | } 502 | 503 | func (e *Engine) processKeySearch(word string, fastSort *sorts.FastSort, wg *sync.WaitGroup) { 504 | defer wg.Done() 505 | 506 | shard := e.getShardByWord(word) 507 | //读取id 508 | invertedIndexStorage := e.invertedIndexStorages[shard] 509 | key := []byte(word) 510 | 511 | buf, find := invertedIndexStorage.Get(key) 512 | if find { 513 | ids := make([]uint32, 0) 514 | //解码 515 | utils.Decoder(buf, &ids) 516 | fastSort.Add(&ids) 517 | } 518 | 519 | } 520 | 521 | // GetIndexCount 获取索引数量 522 | func (e *Engine) GetIndexCount() int64 { 523 | var size int64 524 | for i := 0; i < e.Shard; i++ { 525 | size += e.invertedIndexStorages[i].GetCount() 526 | } 527 | return size 528 | } 529 | 530 | // GetDocumentCount 获取文档数量 531 | func (e *Engine) GetDocumentCount() int64 { 532 | if e.documentCount == -1 { 533 | var count int64 534 | //使用多线程加速统计 535 | wg := sync.WaitGroup{} 536 | wg.Add(e.Shard) 537 | //这里的统计可能会出现数据错误,因为没加锁 538 | for i := 0; i < e.Shard; i++ { 539 | go func(i int) { 540 | count += e.docStorages[i].GetCount() 541 | wg.Done() 542 | }(i) 543 | } 544 | wg.Wait() 545 | e.documentCount = count 546 | } 547 | 548 | return e.documentCount 549 | } 550 | 551 | // GetDocById 通过id获取文档 552 | func (e *Engine) GetDocById(id uint32) []byte { 553 | shard := e.getShard(id) 554 | key := utils.Uint32ToBytes(id) 555 | buf, found := e.docStorages[shard].Get(key) 556 | if found { 557 | return buf 558 | } 559 | 560 | return nil 561 | } 562 | 563 | // RemoveIndex 根据ID移除索引 564 | func (e *Engine) RemoveIndex(id uint32) error { 565 | //移除 566 | e.Lock() 567 | defer e.Unlock() 568 | 569 | shard := e.getShard(id) 570 | key := utils.Uint32ToBytes(id) 571 | 572 | //关键字和Id映射 573 | //invertedIndexStorages []*storage.LeveldbStorage 574 | //ID和key映射,用于计算相关度,一个id 对应多个key 575 | ik := e.positiveIndexStorages[shard] 576 | keysValue, found := ik.Get(key) 577 | if !found { 578 | return errors.New(fmt.Sprintf("没有找到id=%d", id)) 579 | } 580 | 581 | keys := make([]string, 0) 582 | utils.Decoder(keysValue, &keys) 583 | 584 | //符合条件的key,要移除id 585 | for _, word := range keys { 586 | e.removeIdInWordIndex(id, word) 587 | } 588 | 589 | //删除id映射 590 | err := ik.Delete(key) 591 | if err != nil { 592 | return errors.New(err.Error()) 593 | } 594 | 595 | //文档仓 596 | err = e.docStorages[shard].Delete(key) 597 | if err != nil { 598 | return err 599 | } 600 | //减少数量 601 | e.documentCount-- 602 | 603 | return nil 604 | } 605 | 606 | func (e *Engine) Close() { 607 | e.Lock() 608 | defer e.Unlock() 609 | 610 | for i := 0; i < e.Shard; i++ { 611 | e.invertedIndexStorages[i].Close() 612 | e.positiveIndexStorages[i].Close() 613 | } 614 | } 615 | 616 | // Drop 删除 617 | func (e *Engine) Drop() error { 618 | e.Lock() 619 | defer e.Unlock() 620 | //删除文件 621 | if err := os.RemoveAll(e.IndexPath); err != nil { 622 | return err 623 | } 624 | 625 | //清空内存 626 | for i := 0; i < e.Shard; i++ { 627 | e.docStorages = make([]*storage.LeveldbStorage, 0) 628 | e.invertedIndexStorages = make([]*storage.LeveldbStorage, 0) 629 | e.positiveIndexStorages = make([]*storage.LeveldbStorage, 0) 630 | } 631 | 632 | return nil 633 | } 634 | -------------------------------------------------------------------------------- /searcher/model/doc.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | // IndexDoc 索引实体 4 | type IndexDoc struct { 5 | Id uint32 `json:"id,omitempty"` 6 | Text string `json:"text,omitempty"` 7 | Document map[string]interface{} `json:"document,omitempty"` 8 | } 9 | 10 | // StorageIndexDoc 文档对象 11 | type StorageIndexDoc struct { 12 | *IndexDoc 13 | Keys []string `json:"keys,omitempty"` 14 | } 15 | 16 | type ResponseDoc struct { 17 | IndexDoc 18 | OriginalText string `json:"originalText,omitempty"` 19 | Score int `json:"score,omitempty"` //得分 20 | Keys []string `json:"keys,omitempty"` 21 | } 22 | 23 | type RemoveIndexModel struct { 24 | Id uint32 `json:"id,omitempty"` 25 | } 26 | 27 | type ResponseDocSort []ResponseDoc 28 | 29 | func (r ResponseDocSort) Len() int { 30 | return len(r) 31 | } 32 | 33 | func (r ResponseDocSort) Less(i, j int) bool { 34 | return r[i].Score < r[j].Score 35 | } 36 | 37 | func (r ResponseDocSort) Swap(i, j int) { 38 | r[i], r[j] = r[j], r[i] 39 | } 40 | -------------------------------------------------------------------------------- /searcher/model/item.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | type SliceItem struct { 4 | Id uint32 5 | Score int 6 | } 7 | -------------------------------------------------------------------------------- /searcher/model/search.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | // Highlight 关键词高亮 4 | type Highlight struct { 5 | PreTag string `json:"preTag"` //高亮前缀 6 | PostTag string `json:"postTag"` //高亮后缀 7 | } 8 | 9 | // SearchRequest 搜索请求 10 | type SearchRequest struct { 11 | Query string `json:"query,omitempty" form:"database"` // 搜索关键词 12 | Order string `json:"order,omitempty" form:"database"` // 排序类型 13 | ScoreExp string `json:"scoreExp,omitempty" form:"scoreExp"` // 分数计算表达式 14 | Page int `json:"page,omitempty" form:"database"` // 页码 15 | Limit int `json:"limit,omitempty" form:"database"` // 每页大小,最大1000,超过报错 16 | Highlight *Highlight `json:"highlight,omitempty" form:"database"` // 关键词高亮 17 | Database string `json:"database" form:"database"` // 数据库名字 18 | } 19 | 20 | func (s *SearchRequest) GetAndSetDefault() *SearchRequest { 21 | 22 | if s.Limit == 0 { 23 | s.Limit = 100 24 | } 25 | if s.Page == 0 { 26 | s.Page = 1 27 | } 28 | 29 | if s.Order == "" { 30 | s.Order = "desc" 31 | } 32 | 33 | return s 34 | } 35 | 36 | // SearchResult 搜索响应 37 | type SearchResult struct { 38 | Time float64 `json:"time,omitempty"` //查询用时 39 | Total int `json:"total"` //总数 40 | PageCount int `json:"pageCount"` //总页数 41 | Page int `json:"page,omitempty"` //页码 42 | Limit int `json:"limit,omitempty"` //页大小 43 | Documents []ResponseDoc `json:"documents,omitempty"` //文档 44 | Words []string `json:"words,omitempty"` //搜索关键词 45 | } 46 | -------------------------------------------------------------------------------- /searcher/pagination/page_test.go: -------------------------------------------------------------------------------- 1 | package pagination 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestPagination_GetPage(t *testing.T) { 9 | pagination := new(Pagination) 10 | 11 | var data []int64 12 | for i := 0; i < 100; i++ { 13 | data = append(data, int64(i)) 14 | } 15 | 16 | pagination.Init(10, 100) 17 | 18 | for i := 1; i <= 10; i++ { 19 | start, end := pagination.GetPage(i) 20 | fmt.Println(start, end) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /searcher/pagination/pagination.go: -------------------------------------------------------------------------------- 1 | package pagination 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | type Pagination struct { 8 | Limit int //限制大小 9 | 10 | PageCount int //总页数 11 | Total int //总数据量 12 | } 13 | 14 | func (p *Pagination) Init(limit int, total int) { 15 | p.Limit = limit 16 | 17 | //计算总页数 18 | 19 | p.Total = total 20 | 21 | pageCount := math.Ceil(float64(total) / float64(limit)) 22 | p.PageCount = int(pageCount) 23 | 24 | } 25 | 26 | func (p *Pagination) GetPage(page int) (s int, e int) { 27 | //获取指定页数的数据 28 | if page > p.PageCount { 29 | page = p.PageCount 30 | } 31 | if page < 0 { 32 | page = 1 33 | } 34 | 35 | //从1开始 36 | page -= 1 37 | 38 | //计算起始位置 39 | start := page * p.Limit 40 | end := start + p.Limit 41 | 42 | if start > p.Total { 43 | return 0, p.Total - 1 44 | } 45 | if end > p.Total { 46 | end = p.Total 47 | } 48 | 49 | return start, end 50 | 51 | } 52 | -------------------------------------------------------------------------------- /searcher/sorts/fast.go: -------------------------------------------------------------------------------- 1 | package sorts 2 | 3 | import ( 4 | "github.com/sea-team/gofound/searcher/model" 5 | "sort" 6 | "strings" 7 | "sync" 8 | ) 9 | 10 | const ( 11 | DESC = "desc" 12 | ) 13 | 14 | type ScoreSlice []model.SliceItem 15 | 16 | func (x ScoreSlice) Len() int { 17 | return len(x) 18 | } 19 | func (x ScoreSlice) Less(i, j int) bool { 20 | return x[i].Score < x[j].Score 21 | } 22 | func (x ScoreSlice) Swap(i, j int) { 23 | x[i], x[j] = x[j], x[i] 24 | } 25 | 26 | type SortSlice []uint32 27 | 28 | func (x SortSlice) Len() int { 29 | return len(x) 30 | } 31 | func (x SortSlice) Less(i, j int) bool { 32 | return x[i] < x[j] 33 | } 34 | func (x SortSlice) Swap(i, j int) { 35 | x[i], x[j] = x[j], x[i] 36 | 37 | } 38 | 39 | type Uint32Slice []uint32 40 | 41 | func (x Uint32Slice) Len() int { return len(x) } 42 | func (x Uint32Slice) Less(i, j int) bool { return x[i] < x[j] } 43 | func (x Uint32Slice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 44 | 45 | type FastSort struct { 46 | sync.Mutex 47 | 48 | IsDebug bool 49 | 50 | data []model.SliceItem 51 | 52 | temps []uint32 53 | 54 | count int //总数 55 | 56 | Order string //排序方式 57 | } 58 | 59 | func (f *FastSort) Add(ids *[]uint32) { 60 | //f.Lock() 61 | //defer f.Unlock() 62 | 63 | //for _, id := range *ids { 64 | // 65 | // found, index := f.find(&id) 66 | // if found { 67 | // f.data[index].Score += 1 68 | // } else { 69 | // 70 | // f.data = append(f.data, model.SliceItem{ 71 | // Id: id, 72 | // Score: 1, 73 | // }) 74 | // f.Sort() 75 | // } 76 | //} 77 | //f.count = len(f.data) 78 | f.temps = append(f.temps, *ids...) 79 | } 80 | 81 | // 二分法查找 82 | func (f *FastSort) find(target *uint32) (bool, int) { 83 | 84 | low := 0 85 | high := f.count - 1 86 | for low <= high { 87 | mid := (low + high) / 2 88 | if f.data[mid].Id == *target { 89 | return true, mid 90 | } else if f.data[mid].Id > *target { 91 | high = mid - 1 92 | } else { 93 | low = mid + 1 94 | } 95 | } 96 | return false, -1 97 | //for index, item := range f.data { 98 | // if item.Id == *target { 99 | // return true, index 100 | // } 101 | //} 102 | //return false, -1 103 | } 104 | 105 | // Count 获取数量 106 | func (f *FastSort) Count() int { 107 | return f.count 108 | } 109 | 110 | // Sort 排序 111 | func (f *FastSort) Sort() { 112 | if strings.ToLower(f.Order) == DESC { 113 | sort.Sort(sort.Reverse(SortSlice(f.temps))) 114 | } else { 115 | sort.Sort(SortSlice(f.temps)) 116 | } 117 | } 118 | 119 | // Process 处理数据 120 | func (f *FastSort) Process() { 121 | //计算重复 122 | f.Sort() 123 | 124 | for _, temp := range f.temps { 125 | if found, index := f.find(&temp); found { 126 | f.data[index].Score += 1 127 | } else { 128 | f.data = append(f.data, model.SliceItem{ 129 | Id: temp, 130 | Score: 1, 131 | }) 132 | f.count++ 133 | } 134 | } 135 | //对分数进行排序 136 | sort.Sort(sort.Reverse(ScoreSlice(f.data))) 137 | } 138 | func (f *FastSort) GetAll(result *[]model.SliceItem, start int, end int) { 139 | 140 | *result = f.data[start:end] 141 | } 142 | -------------------------------------------------------------------------------- /searcher/sorts/sort.go: -------------------------------------------------------------------------------- 1 | package sorts 2 | 3 | import ( 4 | "github.com/emirpasic/gods/trees/avltree" 5 | "github.com/sea-team/gofound/searcher/utils" 6 | "log" 7 | "sync" 8 | ) 9 | 10 | // IdSort 二叉树对id 进行打分和排序 11 | type IdSort struct { 12 | Tree *avltree.Tree 13 | sync.Mutex 14 | } 15 | 16 | func NewIdSortTree() *IdSort { 17 | return &IdSort{ 18 | Tree: &avltree.Tree{Comparator: utils.Uint32Comparator}, 19 | } 20 | 21 | } 22 | func (e *IdSort) Add(key uint32) { 23 | count, found := e.Tree.Get(key) 24 | val := 1 25 | if found { 26 | val = count.(int) + 1 27 | } 28 | e.Lock() 29 | defer e.Unlock() 30 | e.Tree.Put(key, val) 31 | } 32 | 33 | func (e *IdSort) Size() int { 34 | return e.Tree.Size() 35 | } 36 | 37 | // GetAll 正序获取 38 | func (e *IdSort) GetAll(order string) []uint32 { 39 | scores := make([]int, 0) 40 | ids := make([]uint32, 0) 41 | it := e.Tree.Iterator() 42 | _tt := utils.ExecTime(func() { 43 | for it.Next() { 44 | scores = append(scores, it.Value().(int)) 45 | ids = append(ids, it.Key().(uint32)) 46 | } 47 | }) 48 | log.Println("迭代耗时:", _tt) 49 | 50 | _t := utils.ExecTime(func() { 51 | //ids 降序 52 | if order == "desc" { 53 | for i, j := 0, len(ids)-1; i < j; i, j = i+1, j-1 { 54 | ids[i], ids[j] = ids[j], ids[i] 55 | scores[i], scores[j] = scores[j], scores[i] 56 | } 57 | } 58 | }) 59 | log.Println("id排序耗时:", _t) 60 | 61 | _t = utils.ExecTime(func() { 62 | // 排序,得分越高 排越前 63 | for i := 0; i < len(scores); i++ { 64 | for j := i + 1; j < len(scores); j++ { 65 | if scores[i] < scores[j] { 66 | scores[i], scores[j] = scores[j], scores[i] 67 | ids[i], ids[j] = ids[j], ids[i] 68 | } 69 | } 70 | } 71 | }) 72 | 73 | log.Println("得分排序耗时:", _t) 74 | 75 | return ids 76 | } 77 | -------------------------------------------------------------------------------- /searcher/storage/leveldb_storage.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "log" 5 | "sync" 6 | "time" 7 | 8 | "github.com/syndtr/goleveldb/leveldb" 9 | "github.com/syndtr/goleveldb/leveldb/filter" 10 | "github.com/syndtr/goleveldb/leveldb/opt" 11 | ) 12 | 13 | // LeveldbStorage TODO 要支持事务 14 | type LeveldbStorage struct { 15 | db *leveldb.DB 16 | path string 17 | mu sync.RWMutex //加锁 18 | closed bool 19 | timeout int64 20 | lastTime int64 21 | count int64 22 | } 23 | 24 | func (s *LeveldbStorage) autoOpenDB() { 25 | if s.isClosed() { 26 | s.ReOpen() 27 | } 28 | s.lastTime = time.Now().Unix() 29 | } 30 | 31 | // NewStorage 打开数据库 32 | func NewStorage(path string, timeout int64) (*LeveldbStorage, error) { 33 | 34 | db := &LeveldbStorage{ 35 | path: path, 36 | closed: true, 37 | timeout: timeout, 38 | lastTime: time.Now().Unix(), 39 | } 40 | 41 | go db.task() 42 | 43 | return db, nil 44 | } 45 | 46 | func (s *LeveldbStorage) task() { 47 | if s.timeout == -1 { 48 | //不检查 49 | return 50 | } 51 | for { 52 | 53 | if !s.isClosed() && time.Now().Unix()-s.lastTime > s.timeout { 54 | s.Close() 55 | //log.Println("leveldb storage timeout", s.path) 56 | } 57 | 58 | time.Sleep(time.Duration(5) * time.Second) 59 | 60 | } 61 | } 62 | 63 | func openDB(path string) (*leveldb.DB, error) { 64 | 65 | ////使用布隆过滤器 66 | o := &opt.Options{ 67 | Filter: filter.NewBloomFilter(10), 68 | } 69 | 70 | db, err := leveldb.OpenFile(path, o) 71 | return db, err 72 | } 73 | func (s *LeveldbStorage) ReOpen() { 74 | if !s.isClosed() { 75 | log.Println("db is not closed") 76 | return 77 | } 78 | s.mu.Lock() 79 | db, err := openDB(s.path) 80 | if err != nil { 81 | panic(err) 82 | } 83 | s.db = db 84 | s.closed = false 85 | s.mu.Unlock() 86 | //计算总条数 87 | go s.compute() 88 | } 89 | 90 | func (s *LeveldbStorage) Get(key []byte) ([]byte, bool) { 91 | s.autoOpenDB() 92 | buffer, err := s.db.Get(key, nil) 93 | if err != nil { 94 | return nil, false 95 | } 96 | return buffer, true 97 | } 98 | 99 | func (s *LeveldbStorage) Has(key []byte) bool { 100 | s.autoOpenDB() 101 | has, err := s.db.Has(key, nil) 102 | if err != nil { 103 | panic(err) 104 | } 105 | return has 106 | } 107 | 108 | func (s *LeveldbStorage) Set(key []byte, value []byte) { 109 | s.autoOpenDB() 110 | err := s.db.Put(key, value, nil) 111 | if err != nil { 112 | panic(err) 113 | } 114 | } 115 | 116 | // Delete 删除 117 | func (s *LeveldbStorage) Delete(key []byte) error { 118 | s.autoOpenDB() 119 | return s.db.Delete(key, nil) 120 | } 121 | 122 | // Close 关闭 123 | func (s *LeveldbStorage) Close() error { 124 | if s.isClosed() { 125 | return nil 126 | } 127 | s.mu.Lock() 128 | err := s.db.Close() 129 | if err != nil { 130 | return err 131 | } 132 | s.closed = true 133 | s.mu.Unlock() 134 | return nil 135 | } 136 | 137 | func (s *LeveldbStorage) isClosed() bool { 138 | s.mu.RLock() 139 | defer s.mu.RUnlock() 140 | return s.closed 141 | } 142 | 143 | func (s *LeveldbStorage) compute() { 144 | var count int64 145 | iter := s.db.NewIterator(nil, nil) 146 | for iter.Next() { 147 | count++ 148 | } 149 | iter.Release() 150 | s.count = count 151 | } 152 | 153 | func (s *LeveldbStorage) GetCount() int64 { 154 | if s.count == 0 && s.isClosed() { 155 | s.ReOpen() 156 | s.compute() 157 | } 158 | return s.count 159 | } 160 | -------------------------------------------------------------------------------- /searcher/storage/leveldb_test.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "fmt" 5 | "github.com/syndtr/goleveldb/leveldb" 6 | "testing" 7 | ) 8 | 9 | func TestLeveldb(t *testing.T) { 10 | db, err := leveldb.OpenFile("/Users/panjing/GolandProjects/github.com/sea-team/gofound/cache/doc_6.db", nil) 11 | if err != nil { 12 | t.Fatal(err) 13 | } 14 | defer db.Close() 15 | 16 | //_time := utils.ExecTime(func() { 17 | // 18 | // for i := 0; i < 10000; i++ { 19 | // db.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)), nil) 20 | // } 21 | //}) 22 | //fmt.Println("leveldb put 1000:", _time) 23 | db.Put([]byte("1"), []byte("1"), nil) 24 | value, err := db.Get([]byte("1"), nil) 25 | fmt.Println(string(value), err) 26 | } 27 | -------------------------------------------------------------------------------- /searcher/system/cpu.go: -------------------------------------------------------------------------------- 1 | package system 2 | 3 | import ( 4 | "github.com/shirou/gopsutil/v3/cpu" 5 | "runtime" 6 | "time" 7 | ) 8 | 9 | type CPUStatus struct { 10 | Cores int `json:"cores"` 11 | UsedPercent float64 `json:"usedPercent"` 12 | ModelName string `json:"modelName"` 13 | } 14 | 15 | func GetCPUStatus() CPUStatus { 16 | percent, _ := cpu.Percent(time.Second, false) 17 | info, _ := cpu.Info() 18 | c := CPUStatus{ 19 | UsedPercent: GetPercent(percent[0]), 20 | Cores: runtime.NumCPU(), 21 | ModelName: info[0].ModelName, 22 | } 23 | 24 | return c 25 | } 26 | -------------------------------------------------------------------------------- /searcher/system/cpu_test.go: -------------------------------------------------------------------------------- 1 | package system 2 | 3 | import ( 4 | "fmt" 5 | "github.com/shirou/gopsutil/v3/cpu" 6 | "testing" 7 | ) 8 | 9 | func TestCPU(t *testing.T) { 10 | fmt.Println(GetCPUStatus()) 11 | c, _ := cpu.Info() 12 | fmt.Println(c) 13 | } 14 | -------------------------------------------------------------------------------- /searcher/system/disk.go: -------------------------------------------------------------------------------- 1 | package system 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/shirou/gopsutil/v3/disk" 6 | ) 7 | 8 | type DiskStatus struct { 9 | Total float64 `json:"total"` 10 | Used float64 `json:"used"` 11 | Free float64 `json:"free"` 12 | FsType string `json:"fsType"` 13 | UsedPercent float64 `json:"usedPercent"` 14 | Path string `json:"path"` 15 | } 16 | 17 | func (d *DiskStatus) String() string { 18 | buf, _ := json.Marshal(d) 19 | return string(buf) 20 | } 21 | 22 | func GetDiskStat() DiskStatus { 23 | parts, _ := disk.Partitions(true) 24 | diskInfo, _ := disk.Usage(parts[0].Mountpoint) 25 | 26 | d := DiskStatus{ 27 | Path: diskInfo.Path, 28 | Total: GetUint64GB(diskInfo.Total), 29 | Free: GetUint64GB(diskInfo.Free), 30 | Used: GetUint64GB(diskInfo.Used), 31 | UsedPercent: GetPercent(diskInfo.UsedPercent), 32 | FsType: diskInfo.Fstype, 33 | } 34 | return d 35 | } 36 | -------------------------------------------------------------------------------- /searcher/system/disk_test.go: -------------------------------------------------------------------------------- 1 | package system 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestDisk(t *testing.T) { 9 | 10 | fmt.Println(GetDiskStat()) 11 | } 12 | -------------------------------------------------------------------------------- /searcher/system/mem.go: -------------------------------------------------------------------------------- 1 | package system 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/shirou/gopsutil/v3/mem" 6 | "runtime" 7 | ) 8 | 9 | type MemStatus struct { 10 | Total float64 `json:"total"` 11 | Used float64 `json:"used"` 12 | Free float64 `json:"free"` 13 | Self float64 `json:"self"` 14 | UsedPercent float64 `json:"usedPercent"` 15 | } 16 | 17 | func (m *MemStatus) String() string { 18 | buf, _ := json.Marshal(m) 19 | return string(buf) 20 | } 21 | 22 | func GetMemStat() MemStatus { 23 | 24 | //内存信息 25 | info, _ := mem.VirtualMemory() 26 | m := MemStatus{ 27 | Total: GetUint64GB(info.Total), 28 | Used: GetUint64GB(info.Used), 29 | Free: GetUint64GB(info.Free), 30 | UsedPercent: GetPercent(info.UsedPercent), 31 | } 32 | 33 | //自身占用 34 | memStat := new(runtime.MemStats) 35 | runtime.ReadMemStats(memStat) 36 | m.Self = GetUint64GB(memStat.Alloc) 37 | 38 | return m 39 | } 40 | -------------------------------------------------------------------------------- /searcher/system/mem_test.go: -------------------------------------------------------------------------------- 1 | package system 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestMem(t *testing.T) { 9 | 10 | m := GetMemStat() 11 | fmt.Println(m) 12 | } 13 | -------------------------------------------------------------------------------- /searcher/system/utils.go: -------------------------------------------------------------------------------- 1 | package system 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | ) 7 | 8 | func GetFloat64MB(size int64) float64 { 9 | val, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", float64(size)/1024/1024), 64) 10 | return val 11 | } 12 | func GetUint64GB(size uint64) float64 { 13 | val, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", float64(size)/1024/1024/1024), 64) 14 | return val 15 | } 16 | 17 | func GetPercent(val float64) float64 { 18 | v, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", val), 64) 19 | return v 20 | } 21 | -------------------------------------------------------------------------------- /searcher/utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "encoding/gob" 7 | "io/fs" 8 | "os" 9 | "path/filepath" 10 | "regexp" 11 | "time" 12 | ) 13 | 14 | func ExecTime(fn func()) float64 { 15 | start := time.Now() 16 | fn() 17 | tc := float64(time.Since(start).Nanoseconds()) 18 | return tc / 1e6 19 | } 20 | 21 | func ExecTimeWithError(fn func() error) (float64, error) { 22 | start := time.Now() 23 | err := fn() 24 | tc := float64(time.Since(start).Nanoseconds()) 25 | return tc / 1e6, err 26 | } 27 | 28 | func Encoder(data interface{}) []byte { 29 | if data == nil { 30 | return nil 31 | } 32 | buffer := new(bytes.Buffer) 33 | encoder := gob.NewEncoder(buffer) 34 | err := encoder.Encode(data) 35 | if err != nil { 36 | panic(err) 37 | } 38 | return buffer.Bytes() 39 | } 40 | 41 | func Decoder(data []byte, v interface{}) { 42 | if data == nil { 43 | return 44 | } 45 | buffer := bytes.NewBuffer(data) 46 | decoder := gob.NewDecoder(buffer) 47 | err := decoder.Decode(v) 48 | if err != nil { 49 | panic(err) 50 | } 51 | } 52 | 53 | const ( 54 | c1 = 0xcc9e2d51 55 | c2 = 0x1b873593 56 | c3 = 0x85ebca6b 57 | c4 = 0xc2b2ae35 58 | r1 = 15 59 | r2 = 13 60 | m = 5 61 | n = 0xe6546b64 62 | ) 63 | 64 | var ( 65 | Seed = uint32(1) 66 | ) 67 | 68 | func Murmur3(key []byte) (hash uint32) { 69 | hash = Seed 70 | iByte := 0 71 | for ; iByte+4 <= len(key); iByte += 4 { 72 | k := uint32(key[iByte]) | uint32(key[iByte+1])<<8 | uint32(key[iByte+2])<<16 | uint32(key[iByte+3])<<24 73 | k *= c1 74 | k = (k << r1) | (k >> (32 - r1)) 75 | k *= c2 76 | hash ^= k 77 | hash = (hash << r2) | (hash >> (32 - r2)) 78 | hash = hash*m + n 79 | } 80 | 81 | var remainingBytes uint32 82 | switch len(key) - iByte { 83 | case 3: 84 | remainingBytes += uint32(key[iByte+2]) << 16 85 | fallthrough 86 | case 2: 87 | remainingBytes += uint32(key[iByte+1]) << 8 88 | fallthrough 89 | case 1: 90 | remainingBytes += uint32(key[iByte]) 91 | remainingBytes *= c1 92 | remainingBytes = (remainingBytes << r1) | (remainingBytes >> (32 - r1)) 93 | remainingBytes = remainingBytes * c2 94 | hash ^= remainingBytes 95 | } 96 | 97 | hash ^= uint32(len(key)) 98 | hash ^= hash >> 16 99 | hash *= c3 100 | hash ^= hash >> 13 101 | hash *= c4 102 | hash ^= hash >> 16 103 | 104 | // 出发吧,狗嬷嬷! 105 | return 106 | } 107 | 108 | // StringToInt 字符串转整数 109 | func StringToInt(value string) uint32 { 110 | return Murmur3([]byte(value)) 111 | } 112 | 113 | func Uint32Comparator(a, b interface{}) int { 114 | aAsserted := a.(uint32) 115 | bAsserted := b.(uint32) 116 | switch { 117 | case aAsserted > bAsserted: 118 | return 1 119 | case aAsserted < bAsserted: 120 | return -1 121 | default: 122 | return 0 123 | } 124 | } 125 | 126 | func Uint32ToBytes(i uint32) []byte { 127 | var buf = make([]byte, 4) 128 | binary.BigEndian.PutUint32(buf, i) 129 | return buf 130 | } 131 | 132 | // QuickSortAsc 快速排序 133 | func QuickSortAsc(arr []int, start, end int, cmp func(int, int)) { 134 | if start < end { 135 | i, j := start, end 136 | key := arr[(start+end)/2] 137 | for i <= j { 138 | for arr[i] < key { 139 | i++ 140 | } 141 | for arr[j] > key { 142 | j-- 143 | } 144 | if i <= j { 145 | arr[i], arr[j] = arr[j], arr[i] 146 | if cmp != nil { 147 | cmp(i, j) 148 | } 149 | i++ 150 | j-- 151 | } 152 | } 153 | 154 | if start < j { 155 | QuickSortAsc(arr, start, j, cmp) 156 | } 157 | if end > i { 158 | QuickSortAsc(arr, i, end, cmp) 159 | } 160 | } 161 | } 162 | func DeleteArray(array []uint32, index int) []uint32 { 163 | return append(array[:index], array[index+1:]...) 164 | } 165 | 166 | func ReleaseAssets(file fs.File, out string) { 167 | if file == nil { 168 | return 169 | } 170 | 171 | if out == "" { 172 | panic("out is empty") 173 | } 174 | 175 | //判断out文件是否存在 176 | if _, err := os.Stat(out); os.IsNotExist(err) { 177 | //读取文件信息 178 | fileInfo, err := file.Stat() 179 | if err != nil { 180 | panic(err) 181 | } 182 | buffer := make([]byte, fileInfo.Size()) 183 | _, err = file.Read(buffer) 184 | if err != nil { 185 | panic(err) 186 | } 187 | 188 | // 读取输出文件目录 189 | outDir := filepath.Dir(out) 190 | err = os.MkdirAll(outDir, os.ModePerm) 191 | if err != nil { 192 | panic(err) 193 | } 194 | 195 | //创建文件 196 | outFile, _ := os.Create(out) 197 | defer func(outFile *os.File) { 198 | err := outFile.Close() 199 | if err != nil { 200 | panic(err) 201 | } 202 | }(outFile) 203 | 204 | err = os.WriteFile(out, buffer, os.ModePerm) 205 | if err != nil { 206 | panic(err) 207 | } 208 | } 209 | 210 | } 211 | 212 | // DirSizeB DirSizeMB getFileSize get file size by path(B) 213 | func DirSizeB(path string) int64 { 214 | var size int64 215 | filepath.Walk(path, func(_ string, info os.FileInfo, err error) error { 216 | if !info.IsDir() { 217 | size += info.Size() 218 | } 219 | return err 220 | }) 221 | 222 | return size 223 | } 224 | 225 | // RemovePunctuation 移除所有的标点符号 226 | func RemovePunctuation(str string) string { 227 | reg := regexp.MustCompile(`\p{P}+`) 228 | return reg.ReplaceAllString(str, "") 229 | } 230 | 231 | // RemoveSpace 移除所有的空格 232 | func RemoveSpace(str string) string { 233 | reg := regexp.MustCompile(`\s+`) 234 | return reg.ReplaceAllString(str, "") 235 | } 236 | 237 | // init 注册数据类型 238 | // 防止 gob: type not registered for interface: map[string]interface {} 239 | func init() { 240 | gob.Register(map[string]interface{}{}) 241 | gob.Register([]interface{}{}) 242 | } 243 | -------------------------------------------------------------------------------- /searcher/words/tokenizer.go: -------------------------------------------------------------------------------- 1 | package words 2 | 3 | import ( 4 | "embed" 5 | "github.com/sea-team/gofound/searcher/utils" 6 | "strings" 7 | 8 | "github.com/wangbin/jiebago" 9 | ) 10 | 11 | var ( 12 | //go:embed data/*.txt 13 | dictionaryFS embed.FS 14 | ) 15 | 16 | type Tokenizer struct { 17 | seg jiebago.Segmenter 18 | } 19 | 20 | func NewTokenizer(dictionaryPath string) *Tokenizer { 21 | file, err := dictionaryFS.Open("data/dictionary.txt") 22 | if err != nil { 23 | panic(err) 24 | } 25 | utils.ReleaseAssets(file, dictionaryPath) 26 | 27 | tokenizer := &Tokenizer{} 28 | 29 | err = tokenizer.seg.LoadDictionary(dictionaryPath) 30 | if err != nil { 31 | panic(err) 32 | } 33 | 34 | return tokenizer 35 | } 36 | 37 | func (t *Tokenizer) Cut(text string) []string { 38 | //不区分大小写 39 | text = strings.ToLower(text) 40 | //移除所有的标点符号 41 | text = utils.RemovePunctuation(text) 42 | //移除所有的空格 43 | text = utils.RemoveSpace(text) 44 | 45 | var wordMap = make(map[string]struct{}) 46 | 47 | resultChan := t.seg.CutForSearch(text, true) 48 | var wordsSlice []string 49 | for { 50 | w, ok := <-resultChan 51 | if !ok { 52 | break 53 | } 54 | _, found := wordMap[w] 55 | if !found { 56 | //去除重复的词 57 | wordMap[w] = struct{}{} 58 | wordsSlice = append(wordsSlice, w) 59 | } 60 | } 61 | 62 | return wordsSlice 63 | } 64 | -------------------------------------------------------------------------------- /tests/array_test.go: -------------------------------------------------------------------------------- 1 | package tests 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func DeleteArray(array []uint32, index int) []uint32 { 9 | return append(array[:index], array[index+1:]...) 10 | } 11 | 12 | func TestArray(t *testing.T) { 13 | array := []uint32{1} 14 | fmt.Println(DeleteArray(array, 0)) 15 | } 16 | -------------------------------------------------------------------------------- /tests/benchmark/array_test.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | "github.com/sea-team/gofound/searcher/arrays" 5 | "testing" 6 | ) 7 | 8 | func Benchmark(b *testing.B) { 9 | 10 | //测试两种方法的性能 11 | size := 100 12 | arrayList := make([][]uint32, size) 13 | for i := 0; i < size; i++ { 14 | arrayList[i] = GetRandomUint32(1000) 15 | } 16 | 17 | b.Run("array", func(b *testing.B) { 18 | for i := 0; i < b.N; i++ { 19 | var temp []uint32 20 | for _, nums := range arrayList { 21 | 22 | for _, num := range nums { 23 | if !arrays.BinarySearch(temp, num) { 24 | temp = append(temp, num) 25 | } 26 | } 27 | } 28 | } 29 | }) 30 | 31 | b.Run("sort", func(b *testing.B) { 32 | for i := 0; i < b.N; i++ { 33 | var temp []uint32 34 | for _, v := range arrayList { 35 | temp = append(temp, v...) 36 | } 37 | //去重 38 | var as []uint32 39 | for _, v := range temp { 40 | if !arrays.BinarySearch(as, v) { 41 | as = append(as, v) 42 | } 43 | } 44 | } 45 | }) 46 | } 47 | -------------------------------------------------------------------------------- /tests/benchmark/skiplist_test.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | "github.com/sea-team/gofound/searcher/arrays" 5 | "math/rand" 6 | "testing" 7 | ) 8 | import "github.com/ryszard/goskiplist/skiplist" 9 | 10 | func BenchmarkSkipList(b *testing.B) { 11 | 12 | //产生1万个随机数 13 | var nums []int 14 | for i := 0; i < 10000; i++ { 15 | num := rand.Intn(100000) 16 | nums = append(nums, num) 17 | } 18 | 19 | b.ResetTimer() 20 | 21 | b.Run("skip", func(b *testing.B) { 22 | for i := 0; i < b.N; i++ { 23 | sl := skiplist.NewIntSet() 24 | 25 | for _, num := range nums { 26 | if !sl.Contains(num) { 27 | sl.Add(num) 28 | } 29 | } 30 | } 31 | }) 32 | 33 | b.Run("binary", func(b *testing.B) { 34 | for i := 0; i < b.N; i++ { 35 | temps := make([]uint32, len(nums)) 36 | for index, num := range nums { 37 | if !arrays.BinarySearch(temps, uint32(num)) { 38 | temps[index] = uint32(num) 39 | } 40 | } 41 | } 42 | }) 43 | 44 | } 45 | -------------------------------------------------------------------------------- /tests/benchmark/utils.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import "math/rand" 4 | 5 | func GetRandomUint32(n int) []uint32 { 6 | var array = make([]uint32, n) 7 | for i := 0; i < n; i++ { 8 | array[i] = rand.Uint32() 9 | } 10 | return array 11 | } 12 | -------------------------------------------------------------------------------- /tests/chan_test.go: -------------------------------------------------------------------------------- 1 | package tests 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func TestChan(t *testing.T) { 11 | 12 | data := make(chan int) 13 | 14 | go func() { 15 | for { 16 | time.Sleep(time.Second * 1) 17 | data <- rand.Intn(100) 18 | break 19 | } 20 | }() 21 | 22 | r := <-data 23 | fmt.Println(r) 24 | 25 | } 26 | -------------------------------------------------------------------------------- /tests/func_test.go: -------------------------------------------------------------------------------- 1 | package tests 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | type FuncTest struct { 9 | name string 10 | } 11 | 12 | func aa(a int, b *int, d *FuncTest) int { 13 | a = 111 14 | *b = 3 15 | fmt.Printf("b=%p\n", &b) 16 | fmt.Printf("d=%p\n", d) 17 | d.name = "aa" 18 | return a + *b 19 | } 20 | 21 | func TestA(t *testing.T) { 22 | 23 | var a int = 1 24 | var b int = 2 25 | d := &FuncTest{name: "test"} 26 | 27 | fmt.Printf("b=%p\n", &b) 28 | fmt.Printf("d=%p\n", d) 29 | 30 | fmt.Println(aa(a, &b, d)) 31 | fmt.Println(d) 32 | } 33 | -------------------------------------------------------------------------------- /tests/http/cut.http: -------------------------------------------------------------------------------- 1 | GET localhost:5678/api/word/cut?q=开发者程序员是什么职业 2 | Content-Type: application/json 3 | 4 | {} 5 | 6 | ### 7 | -------------------------------------------------------------------------------- /tests/http/dump.http: -------------------------------------------------------------------------------- 1 | GET localhost:5678/api/dump 2 | Accept: application/json 3 | 4 | ### 5 | -------------------------------------------------------------------------------- /tests/http/index.http: -------------------------------------------------------------------------------- 1 | POST localhost:5678/api/index 2 | Content-Type: application/json 3 | 4 | { 5 | "id": 1, 6 | "text": "上海哪里好玩", 7 | "document": { 8 | "title": "阿森松岛所445", 9 | "number": 223 10 | } 11 | } 12 | 13 | ### 14 | 15 | POST localhost:5678/api/index 16 | Content-Type: application/json 17 | 18 | { 19 | "id": 2, 20 | "text": "深圳哪里好玩", 21 | "document": { 22 | "title": "阿森松岛所445", 23 | "number": 223 24 | } 25 | } 26 | 27 | ### 28 | 29 | POST localhost:5678/api/index 30 | Content-Type: application/json 31 | 32 | { 33 | "id": 3, 34 | "text": "哪里是人最多的地方", 35 | "document": { 36 | "title": "阿森松岛所445", 37 | "number": 223 38 | } 39 | } 40 | 41 | ### 42 | 43 | POST localhost:5678/api/index 44 | Content-Type: application/json 45 | 46 | { 47 | "id": 4, 48 | "text": "哪个城市人是最多的", 49 | "document": { 50 | "title": "阿森松岛所445", 51 | "number": 223 52 | } 53 | } 54 | 55 | ### 56 | 57 | POST localhost:5678/api/index 58 | Content-Type: application/json 59 | 60 | { 61 | "id": 5, 62 | "text": "上海的景点哪里好玩", 63 | "document": { 64 | "title": "阿森松岛所445", 65 | "number": 223 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /tests/http/index2.http: -------------------------------------------------------------------------------- 1 | POST localhost:5678/api/index/batch 2 | Content-Type: application/json 3 | 4 | [ 5 | { 6 | "id": 4194561, 7 | "text": "人一生必须培养的好习惯有哪些?", 8 | "document": { 9 | "category": "news_culture", 10 | "cid": "101", 11 | "id": "6422885744473276673", 12 | "title": "人一生必须培养的好习惯有哪些?" 13 | } 14 | }, 15 | { 16 | "id": 29360647, 17 | "text": "去茶店喝茶,这些坏习惯一定要改", 18 | "document": { 19 | "category": "news_culture", 20 | "cid": "101", 21 | "id": "6526026244490789383", 22 | "title": "去茶店喝茶,这些坏习惯一定要改" 23 | } 24 | }, 25 | { 26 | "id": 482345479, 27 | "text": "终于等到你 大连一方迎中超首胜", 28 | "document": { 29 | "category": "news_sports", 30 | "cid": "103", 31 | "id": "6552361064552464903", 32 | "title": "终于等到你 大连一方迎中超首胜" 33 | } 34 | }, 35 | { 36 | "id": 2273313284, 37 | "text": "范丞丞人气有多高?付费自拍睡一觉起来就躺赚了480万", 38 | "document": { 39 | "category": "news_entertainment", 40 | "cid": "102", 41 | "id": "6552353988237328900", 42 | "title": "范丞丞人气有多高?付费自拍睡一觉起来就躺赚了480万" 43 | } 44 | }, 45 | { 46 | "id": 2688549390, 47 | "text": "比特币交易平台官网", 48 | "document": { 49 | "category": "news_finance", 50 | "cid": "104", 51 | "id": "6552399416521654798", 52 | "title": "比特币交易平台官网" 53 | } 54 | }, 55 | { 56 | "id": 3162505486, 57 | "text": "有哪些优秀的阅读习惯和技巧?", 58 | "document": { 59 | "category": "news_culture", 60 | "cid": "101", 61 | "id": "6552470670503051534", 62 | "title": "有哪些优秀的阅读习惯和技巧?" 63 | } 64 | }, 65 | { 66 | "id": 3602907656, 67 | "text": "大学英语四级考试500个高频词汇", 68 | "document": { 69 | "category": "news_edu", 70 | "cid": "108", 71 | "id": "6552332699414037000", 72 | "title": "大学英语四级考试500个高频词汇" 73 | } 74 | }, 75 | { 76 | "id": 3921674756, 77 | "text": "农村深山里的这种植物,看似其貌不扬,它竟还有这样的功效!", 78 | "document": { 79 | "category": "news_agriculture", 80 | "cid": "115", 81 | "id": "6552380653042663940", 82 | "title": "农村深山里的这种植物,看似其貌不扬,它竟还有这样的功效!" 83 | } 84 | }, 85 | { 86 | "id": 4034920967, 87 | "text": "北大百廿燕归来", 88 | "document": { 89 | "category": "news_edu", 90 | "cid": "108", 91 | "id": "6552368906420355591", 92 | "title": "北大百廿燕归来" 93 | } 94 | } 95 | ] 96 | 97 | -------------------------------------------------------------------------------- /tests/http/query.http: -------------------------------------------------------------------------------- 1 | POST http://localhost:5678/api/query 2 | Content-Type: application/json 3 | 4 | { 5 | "query":"手机", 6 | "page":1, 7 | "limit":10, 8 | "order": "desc" 9 | } 10 | 11 | ### 12 | -------------------------------------------------------------------------------- /tests/http/status.http: -------------------------------------------------------------------------------- 1 | GET http://localhost:5678/api/status 2 | Accept: application/json 3 | 4 | ### 5 | -------------------------------------------------------------------------------- /tests/index_test.go: -------------------------------------------------------------------------------- 1 | package tests 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "github.com/sea-team/gofound/searcher" 7 | "github.com/sea-team/gofound/searcher/model" 8 | "github.com/sea-team/gofound/searcher/utils" 9 | "github.com/sea-team/gofound/searcher/words" 10 | "os" 11 | "strings" 12 | "testing" 13 | ) 14 | 15 | func TestIndex(t *testing.T) { 16 | 17 | tokenizer := words.NewTokenizer("../searcher/words/data/dictionary.txt") 18 | 19 | var engine = &searcher.Engine{ 20 | IndexPath: "./index/db2", 21 | Tokenizer: tokenizer, 22 | } 23 | option := engine.GetOptions() 24 | 25 | engine.InitOption(option) 26 | 27 | f, err := os.Open("index/toutiao_cat_data.txt") 28 | if err != nil { 29 | t.Errorf("open file: %v", err) 30 | } 31 | 32 | id := uint32(0) 33 | rd := bufio.NewReader(f) 34 | index := 0 35 | for { 36 | line, isPrefix, err := rd.ReadLine() 37 | if err != nil { 38 | return 39 | } 40 | if isPrefix { 41 | t.Errorf("A long line has been cut, %s", line) 42 | } 43 | 44 | if len(line) == 0 { 45 | break 46 | } 47 | 48 | lineString := string(line) 49 | //fmt.Println(lineString) 50 | array := strings.Split(lineString, "_!_") 51 | if index%1000 == 0 { 52 | fmt.Println(index) 53 | } 54 | index++ 55 | //if index == 6000 { 56 | // break 57 | //} 58 | data := make(map[string]interface{}) 59 | id++ 60 | 61 | data["id"] = id 62 | data["title"] = array[3] 63 | data["category"] = array[2] 64 | data["cid"] = array[1] 65 | 66 | doc := model.IndexDoc{ 67 | Id: id, 68 | Text: array[3], 69 | Document: data, 70 | } 71 | engine.IndexDocument(&doc) 72 | } 73 | for engine.GetQueue() > 0 { 74 | } 75 | fmt.Println("index finish") 76 | } 77 | 78 | func TestRepeat(t *testing.T) { 79 | //判断是否重复 80 | 81 | tokenizer := words.NewTokenizer("../searcher/words/data/dictionary.txt") 82 | var engine = &searcher.Engine{ 83 | IndexPath: "./index", 84 | Tokenizer: tokenizer, 85 | } 86 | option := engine.GetOptions() 87 | 88 | engine.InitOption(option) 89 | 90 | f, err := os.Open("index/toutiao_cat_data.txt") 91 | if err != nil { 92 | t.Errorf("open file: %v", err) 93 | } 94 | 95 | container := make(map[uint32][]string) 96 | 97 | rd := bufio.NewReader(f) 98 | index := 0 99 | for { 100 | 101 | line, _, err := rd.ReadLine() 102 | if err != nil { 103 | break 104 | } 105 | 106 | lineString := string(line) 107 | array := strings.Split(lineString, "_!_") 108 | if index%10000 == 0 { 109 | fmt.Println(index) 110 | } 111 | index++ 112 | 113 | data := struct { 114 | Id string 115 | Title string 116 | Category string 117 | Cid string 118 | }{ 119 | Id: array[0], 120 | Title: array[3], 121 | Category: array[2], 122 | Cid: array[1], 123 | } 124 | 125 | //分词 126 | words := engine.Tokenizer.Cut(data.Title) 127 | for _, word := range words { 128 | key := Murmur3([]byte(word)) 129 | val := container[key] 130 | if val == nil { 131 | val = make([]string, 0) 132 | } 133 | if !exists(val, word) { 134 | val = append(val, word) 135 | } 136 | container[key] = val 137 | } 138 | } 139 | 140 | //输出 value大于2的key 141 | for key, val := range container { 142 | if len(val) > 1 { 143 | fmt.Println("key:", key, "value:", val) 144 | } 145 | } 146 | 147 | fmt.Println("index finish") 148 | 149 | } 150 | 151 | func exists(values []string, value string) bool { 152 | for _, v := range values { 153 | if v == value { 154 | return true 155 | } 156 | } 157 | return false 158 | 159 | } 160 | 161 | func TestStringToInt(t *testing.T) { 162 | /* 163 | key: 3756240089 value: [现场版 58.6] 164 | key: 2832448212 value: [树下 初展] 165 | */ 166 | 167 | fmt.Println(utils.StringToInt("现场版")) 168 | fmt.Println(utils.StringToInt("58.6")) 169 | 170 | fmt.Println(utils.StringToInt("树下")) 171 | fmt.Println(utils.StringToInt("初展")) 172 | } 173 | 174 | const ( 175 | c1 = 0xcc9e2d51 176 | c2 = 0x1b873593 177 | c3 = 0x85ebca6b 178 | c4 = 0xc2b2ae35 179 | r1 = 15 180 | r2 = 13 181 | m = 5 182 | n = 0xe6546b64 183 | ) 184 | 185 | var ( 186 | Seed = uint32(1) 187 | ) 188 | 189 | func Murmur3(key []byte) (hash uint32) { 190 | hash = Seed 191 | iByte := 0 192 | for ; iByte+4 <= len(key); iByte += 4 { 193 | k := uint32(key[iByte]) | uint32(key[iByte+1])<<8 | uint32(key[iByte+2])<<16 | uint32(key[iByte+3])<<24 194 | k *= c1 195 | k = (k << r1) | (k >> (32 - r1)) 196 | k *= c2 197 | hash ^= k 198 | hash = (hash << r2) | (hash >> (32 - r2)) 199 | hash = hash*m + n 200 | } 201 | 202 | var remainingBytes uint32 203 | switch len(key) - iByte { 204 | case 3: 205 | remainingBytes += uint32(key[iByte+2]) << 16 206 | fallthrough 207 | case 2: 208 | remainingBytes += uint32(key[iByte+1]) << 8 209 | fallthrough 210 | case 1: 211 | remainingBytes += uint32(key[iByte]) 212 | remainingBytes *= c1 213 | remainingBytes = (remainingBytes << r1) | (remainingBytes >> (32 - r1)) 214 | remainingBytes = remainingBytes * c2 215 | hash ^= remainingBytes 216 | } 217 | 218 | hash ^= uint32(len(key)) 219 | hash ^= hash >> 16 220 | hash *= c3 221 | hash ^= hash >> 13 222 | hash *= c4 223 | hash ^= hash >> 16 224 | 225 | // 出发吧,狗嬷嬷! 226 | return 227 | } 228 | -------------------------------------------------------------------------------- /tests/merge_test.go: -------------------------------------------------------------------------------- 1 | package tests 2 | 3 | import "testing" 4 | 5 | func merge(array *[]int, val int) { 6 | 7 | } 8 | 9 | func TestName(t *testing.T) { 10 | merge(nil, 0) 11 | } 12 | -------------------------------------------------------------------------------- /tests/sort.md: -------------------------------------------------------------------------------- 1 | ``` 2 | 3 | if(low=high 22 | while(iarr[j])//后端比temp小,符合降序,不管它,low下标前移 23 | j--;//while完后指比temp大的那个 24 | if(i data[j+1] { 72 | data[j], data[j+1] = data[j+1], data[j] 73 | } 74 | } 75 | } 76 | } 77 | 78 | func SelectSort(arr []int) { 79 | for j := 0; j < len(arr)-1; j++ { 80 | max := arr[j] 81 | maxIndex := j 82 | for i := j + 1; i < len(arr); i++ { 83 | if max < arr[i] { 84 | //记录 85 | max = arr[i] 86 | maxIndex = i 87 | } 88 | } 89 | //交换 90 | if maxIndex != j { 91 | arr[j], arr[maxIndex] = arr[maxIndex], arr[j] 92 | } 93 | //fmt.Printf("数据第 %v 次交换后为:\t%v\n", j+1, arr) 94 | } 95 | } 96 | 97 | // 快速排序 98 | func QuickSort(arr []int, start, end int) { 99 | if start < end { 100 | i, j := start, end 101 | key := arr[(start+end)/2] 102 | for i <= j { 103 | for arr[i] < key { 104 | i++ 105 | } 106 | for arr[j] > key { 107 | j-- 108 | } 109 | if i <= j { 110 | arr[i], arr[j] = arr[j], arr[i] 111 | i++ 112 | j-- 113 | } 114 | } 115 | 116 | if start < j { 117 | QuickSort(arr, start, j) 118 | } 119 | if end > i { 120 | QuickSort(arr, i, end) 121 | } 122 | } 123 | } 124 | 125 | func InsertSort(list []int) { 126 | n := len(list) 127 | // 进行 N-1 轮迭代 128 | for i := 1; i <= n-1; i++ { 129 | deal := list[i] // 待排序的数 130 | j := i - 1 // 待排序的数左边的第一个数的位置 131 | 132 | // 如果第一次比较,比左边的已排好序的第一个数小,那么进入处理 133 | if deal < list[j] { 134 | // 一直往左边找,比待排序大的数都往后挪,腾空位给待排序插入 135 | for ; j >= 0 && deal < list[j]; j-- { 136 | list[j+1] = list[j] // 某数后移,给待排序留空位 137 | } 138 | list[j+1] = deal // 结束了,待排序的数插入空位 139 | } 140 | } 141 | } 142 | 143 | func TestFastSort(t *testing.T) { 144 | 145 | //QuickSortDesc 146 | //测试数据 147 | var data []int 148 | 149 | for i := 0; i < 1000; i++ { 150 | //随机数 151 | data = append(data, rand.Intn(100)) 152 | 153 | } 154 | 155 | _time := utils.ExecTime(func() { 156 | //utils.QuickSortDesc(data, 0, len(data)-1, func(i int, j int) { 157 | 158 | //}) 159 | //sort.Ints(data) 160 | sort.Sort(sort.Reverse(sort.IntSlice(data))) 161 | //sort.Reverse(data) 162 | }) 163 | fmt.Println("时间", _time) 164 | fmt.Println(data) 165 | 166 | } 167 | 168 | // 获取数组最大值 169 | func getMaxInArr(arr []int) int { 170 | max := arr[0] 171 | for i := 1; i < len(arr); i++ { 172 | if arr[i] > max { 173 | max = arr[i] 174 | } 175 | } 176 | return max 177 | } 178 | func sortInBucket(bucket []int) { //此处实现插入排序方式,其实可以用任意其他排序方式 179 | length := len(bucket) 180 | if length == 1 { 181 | return 182 | } 183 | for i := 1; i < length; i++ { 184 | backup := bucket[i] 185 | j := i - 1 186 | //将选出的被排数比较后插入左边有序区 187 | for j >= 0 && backup < bucket[j] { //注意j >= 0必须在前边,否则会数组越界 188 | bucket[j+1] = bucket[j] //移动有序数组 189 | j-- //反向移动下标 190 | } 191 | bucket[j+1] = backup //插队插入移动后的空位 192 | } 193 | } 194 | 195 | // 桶排序 196 | func BucketSort(arr []int) []int { 197 | //桶数 198 | num := len(arr) 199 | //k(数组最大值) 200 | max := getMaxInArr(arr) 201 | //二维切片 202 | buckets := make([][]int, num) 203 | //分配入桶 204 | index := 0 205 | for i := 0; i < num; i++ { 206 | index = arr[i] * (num - 1) / max //分配桶index = value * (n-1) /k 207 | buckets[index] = append(buckets[index], arr[i]) 208 | } 209 | //桶内排序 210 | tmpPos := 0 211 | for i := 0; i < num; i++ { 212 | bucketLen := len(buckets[i]) 213 | if bucketLen > 0 { 214 | sortInBucket(buckets[i]) 215 | copy(arr[tmpPos:], buckets[i]) 216 | tmpPos += bucketLen 217 | } 218 | } 219 | return arr 220 | } 221 | 222 | func TestFind(t *testing.T) { 223 | 224 | data := make([]int, 0) 225 | data2 := make([]int, 0) 226 | for i := 0; i < 100000; i++ { 227 | val := rand.Intn(100000) 228 | data = append(data, val) 229 | data2 = append(data2, val) 230 | } 231 | 232 | t1 := utils.ExecTime(func() { 233 | sort.Sort(sort.IntSlice(data)) 234 | }) 235 | fmt.Println("快排用时", t1) 236 | 237 | //fmt.Println(find(data, 1)) 238 | t2 := utils.ExecTime(func() { 239 | BucketSort(data2) 240 | for i, j := 0, len(data2)-1; i < j; i, j = i+1, j-1 { 241 | data2[i], data2[j] = data2[j], data2[i] 242 | } 243 | }) 244 | fmt.Println("捅排", t2) 245 | //fmt.Println("捅排", sort.Reverse(sort.IntSlice(data2))) 246 | 247 | //查找优化,桶排序+map去重 248 | 249 | } 250 | func find(data []uint32, target uint32) (bool, int) { 251 | low := 0 252 | high := len(data) - 1 253 | for low <= high { 254 | mid := (low + high) / 2 255 | if data[mid] == target { 256 | return true, mid 257 | } else if data[mid] < target { 258 | high = mid - 1 259 | } else { 260 | low = mid + 1 261 | } 262 | } 263 | return false, -1 264 | } 265 | func TestMerge(t *testing.T) { 266 | 267 | data1 := make([]uint32, 0) 268 | data2 := make([]uint32, 0) 269 | for i := 0; i < 10000; i++ { 270 | v := rand.Intn(10) 271 | data1 = append(data1, uint32(v)) 272 | data2 = append(data2, uint32(v)) 273 | } 274 | 275 | t1 := utils.ExecTime(func() { 276 | temp := make([]uint32, 0) 277 | for _, v := range data1 { 278 | if found, _ := find(temp, v); found { 279 | temp = append(temp, v) 280 | } 281 | } 282 | fmt.Println(temp) 283 | }) 284 | 285 | fmt.Println("二分法去重", t1) 286 | 287 | t2 := utils.ExecTime(func() { 288 | temp := make(map[uint32]bool, len(data2)) 289 | d := make([]uint32, 0) 290 | for _, val := range data2 { 291 | if _, ok := temp[val]; !ok { 292 | temp[val] = true 293 | d = append(d, val) 294 | } 295 | } 296 | fmt.Println(d) 297 | }) 298 | fmt.Println("map去重", t2) 299 | } 300 | -------------------------------------------------------------------------------- /tests/thread_test.go: -------------------------------------------------------------------------------- 1 | package tests 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | type ThreadTest struct { 11 | sync.Mutex 12 | } 13 | 14 | var wg sync.WaitGroup 15 | 16 | func (t *ThreadTest) Test(name int) { 17 | defer t.Unlock() 18 | t.Lock() 19 | time.Sleep(time.Second * 1) 20 | fmt.Println("我是线程", name, "执行结束") 21 | wg.Done() 22 | } 23 | 24 | func TestThread(t *testing.T) { 25 | 26 | //sync.Mutex 27 | test := new(ThreadTest) 28 | for i := 0; i < 10; i++ { 29 | wg.Add(1) 30 | go test.Test(i) 31 | } 32 | wg.Wait() 33 | fmt.Println("完成了") 34 | 35 | } 36 | -------------------------------------------------------------------------------- /tests/time_test.go: -------------------------------------------------------------------------------- 1 | package tests 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestExecTime(t *testing.T) { 10 | startT := time.Now() 11 | time.Sleep(time.Millisecond * 10) 12 | 13 | tc := time.Since(startT) 14 | fmt.Println(tc) 15 | } 16 | -------------------------------------------------------------------------------- /tests/word_test.go: -------------------------------------------------------------------------------- 1 | package tests 2 | 3 | import ( 4 | "fmt" 5 | "github.com/wangbin/jiebago" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | func TestWord(t *testing.T) { 11 | var seg jiebago.Segmenter 12 | 13 | seg.LoadDictionary("/Users/panjing/GolandProjects/github.com/sea-team/gofound/data/dictionary.txt") 14 | r := seg.CutForSearch("想在西安买房投资,哪个区域比较好,最好有具体楼盘?", true) 15 | words := make([]string, 0) 16 | for { 17 | w, ok := <-r 18 | if !ok { 19 | break 20 | } 21 | words = append(words, w) 22 | } 23 | for _, w := range words { 24 | f := int(seg.SuggestFrequency(w)) 25 | if len([]rune(w)) <= 1 { 26 | f = 0 27 | } else { 28 | f = f % len(words) 29 | } 30 | 31 | fmt.Printf("%s\t%d\n", w, f) 32 | } 33 | } 34 | func contains(s []string, e string, skipIndex int) bool { 35 | for index, a := range s { 36 | if index != skipIndex && strings.Contains(a, e) { 37 | return true 38 | } 39 | } 40 | return false 41 | } 42 | func getLongWords(words []string) []string { 43 | 44 | var newWords = make([]string, 0) 45 | for index, w := range words { 46 | if !contains(words, w, index) { 47 | newWords = append(newWords, w) 48 | } 49 | } 50 | return newWords 51 | } 52 | 53 | func TestLongWord(t *testing.T) { 54 | words := []string{"博物", "博物馆", "深圳北", "深圳", "深圳东"} 55 | r := getLongWords(words) 56 | fmt.Println(r) 57 | } 58 | 59 | func BenchmarkTest(b *testing.B) { 60 | var r []string 61 | for i := 0; i < b.N; i++ { 62 | words := []string{"博物", "博物馆", "深圳北", "深圳", "深圳东"} 63 | r = getLongWords(words) 64 | } 65 | fmt.Println(r) 66 | } 67 | -------------------------------------------------------------------------------- /web/admin/admin.go: -------------------------------------------------------------------------------- 1 | package admin 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "github.com/sea-team/gofound/web/admin/assets" 6 | "net/http" 7 | "net/url" 8 | "os" 9 | ) 10 | 11 | func adminIndex(ctx *gin.Context) { 12 | file, err := assets.Static.ReadFile("web/dist/index.html") 13 | if err != nil && os.IsNotExist(err) { 14 | ctx.String(http.StatusNotFound, "not found") 15 | return 16 | } 17 | ctx.Data(http.StatusOK, "text/html", file) 18 | } 19 | 20 | func handlerStatic(c *gin.Context) { 21 | staticServer := http.FileServer(http.FS(assets.Static)) 22 | c.Request.URL = &url.URL{Path: "web/dist" + c.Request.RequestURI} 23 | staticServer.ServeHTTP(c.Writer, c.Request) 24 | } 25 | 26 | func Register(router *gin.Engine, handlers ...gin.HandlerFunc) { 27 | //注册路由 28 | r := router.Group("/admin", handlers...) 29 | r.GET("/", adminIndex) 30 | router.GET("/assets/*filepath", handlerStatic) 31 | } 32 | -------------------------------------------------------------------------------- /web/admin/assets/assets.go: -------------------------------------------------------------------------------- 1 | package assets 2 | 3 | import "embed" 4 | 5 | var ( 6 | //go:embed web/dist/* 7 | Static embed.FS 8 | ) 9 | -------------------------------------------------------------------------------- /web/admin/assets/web/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist-ssr 12 | *.local 13 | 14 | # Editor directories and files 15 | .vscode/* 16 | !.vscode/extensions.json 17 | .idea 18 | .DS_Store 19 | *.suo 20 | *.ntvs* 21 | *.njsproj 22 | *.sln 23 | *.sw? 24 | -------------------------------------------------------------------------------- /web/admin/assets/web/README.md: -------------------------------------------------------------------------------- 1 | # Vue 3 + Vite 2 | 3 | This template should help get you started developing with Vue 3 in Vite. The template uses Vue 3 ` 10 | 11 | 12 | 13 | 53 |
54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /web/admin/assets/web/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | GoFound Admin 8 | 9 | 10 | 11 | 51 |
52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /web/admin/assets/web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "web", 3 | "private": true, 4 | "version": "0.0.0", 5 | "scripts": { 6 | "dev": "vite", 7 | "build": "vite build", 8 | "preview": "vite preview" 9 | }, 10 | "dependencies": { 11 | "@element-plus/icons-vue": "^1.1.4", 12 | "@vueuse/core": "^8.4.2", 13 | "axios": "^0.27.2", 14 | "element-plus": "^2.1.11", 15 | "sass": "^1.51.0", 16 | "vue": "^3.2.25", 17 | "vue-json-viewer": "3", 18 | "vue-router": "^4.0.15" 19 | }, 20 | "devDependencies": { 21 | "@vitejs/plugin-vue": "^2.2.0", 22 | "vite": "^2.8.0" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /web/admin/assets/web/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sea-team/gofound/eec89a008c64a278978db27ef40e9e248f2c6aac/web/admin/assets/web/public/favicon.ico -------------------------------------------------------------------------------- /web/admin/assets/web/src/App.vue: -------------------------------------------------------------------------------- 1 | 44 | 45 | 77 | 78 | 81 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/api.js: -------------------------------------------------------------------------------- 1 | import axios from 'axios' 2 | 3 | const BASE_URL = process.env.NODE_ENV === 'production' ? '/api' : 'http://127.0.0.1:5678/api' 4 | 5 | function request(url, method = 'get', data) { 6 | return axios({ 7 | baseURL: BASE_URL, 8 | url: url, 9 | method: method, 10 | data: data, 11 | }) 12 | } 13 | 14 | export default { 15 | getDatabase() { 16 | return request('/db/list') 17 | }, 18 | query(db, params) { 19 | 20 | return request(`/query?database=${db}`, 'post', { 21 | ...params, 22 | highlight: params.highlight ? { 23 | preTag: '', 24 | postTag: '', 25 | } : null, 26 | }) 27 | }, 28 | remove(db, id) { 29 | return request(`/index/remove?database=${db}`, 'post', { id }) 30 | }, 31 | gc() { 32 | return request('/gc') 33 | }, 34 | getStatus() { 35 | return request('/status') 36 | }, 37 | addIndex(db, index) { 38 | return request(`/index?database=${db}`, 'post', index ) 39 | }, 40 | drop(db){ 41 | return request(`/db/drop?database=${db}`) 42 | }, 43 | create(db){ 44 | return request(`/db/create?database=${db}`) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sea-team/gofound/eec89a008c64a278978db27ef40e9e248f2c6aac/web/admin/assets/web/src/assets/logo.png -------------------------------------------------------------------------------- /web/admin/assets/web/src/components/CPU.vue: -------------------------------------------------------------------------------- 1 | 16 | 17 | 32 | 33 | 36 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/components/Disk.vue: -------------------------------------------------------------------------------- 1 | 22 | 23 | 38 | 39 | 42 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/components/GC.vue: -------------------------------------------------------------------------------- 1 | 4 | 5 | 23 | 24 | 27 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/components/Icon.vue: -------------------------------------------------------------------------------- 1 | 21 | 22 | 25 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/components/IndexDialog.vue: -------------------------------------------------------------------------------- 1 | 26 | 27 | 118 | 119 | 122 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/components/Memory.vue: -------------------------------------------------------------------------------- 1 | 20 | 21 | 35 | 36 | 39 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/components/Menu.vue: -------------------------------------------------------------------------------- 1 | 31 | 32 | 69 | 70 | 75 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/components/ProgressChat.vue: -------------------------------------------------------------------------------- 1 | 4 | 5 | 25 | 26 | 29 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/components/Runtime.vue: -------------------------------------------------------------------------------- 1 | 69 | 70 | 81 | 82 | 85 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/main.js: -------------------------------------------------------------------------------- 1 | import { createApp } from 'vue' 2 | import App from './App.vue' 3 | import 'element-plus/dist/index.css' 4 | import ElementPlus from 'element-plus' 5 | import zhCn from 'element-plus/es/locale/lang/zh-cn' 6 | 7 | import * as ElementPlusIconsVue from '@element-plus/icons-vue' 8 | import 'element-plus/theme-chalk/src/dark/css-vars.scss' 9 | 10 | import router from './router' 11 | let app = createApp(App) 12 | app.use(ElementPlus,{ 13 | locale: zhCn, 14 | }) 15 | app.use(router) 16 | for (const [key, component] of Object.entries(ElementPlusIconsVue)) { 17 | app.component(key, component) 18 | app.use(component) 19 | } 20 | 21 | app.mount('#app') 22 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/menus.js: -------------------------------------------------------------------------------- 1 | import { Coin, DataLine, Document } from '@element-plus/icons-vue' 2 | 3 | const menus = [ 4 | { 5 | path: '/', 6 | name: 'dashboard', 7 | icon: Coin, 8 | label: '数据库', 9 | color: 'rgb(105, 192, 255)', 10 | component: () => import('./views/dashboard.vue'), 11 | }, { 12 | path: '/status', 13 | name: 'status', 14 | label: '服务器状态', 15 | color: 'rgb(149, 222, 100)', 16 | icon: DataLine, 17 | component: () => import('./views/status.vue'), 18 | }, { 19 | path: '/document', 20 | name: 'document', 21 | label: '帮助文档', 22 | icon: Document, 23 | color: 'rgb(255, 156, 110)', 24 | component: () => import('./views/document.vue'), 25 | }, 26 | ] 27 | export default menus 28 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/router.js: -------------------------------------------------------------------------------- 1 | import { createRouter, createWebHashHistory } from 'vue-router' 2 | 3 | import menus from './menus' 4 | 5 | const router = createRouter({ 6 | history: createWebHashHistory(), 7 | routes: menus, 8 | }) 9 | 10 | export default router 11 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/views/dashboard.vue: -------------------------------------------------------------------------------- 1 | 143 | 144 | 347 | 348 | 376 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/views/document.vue: -------------------------------------------------------------------------------- 1 | 51 | 52 | 57 | 58 | 63 | -------------------------------------------------------------------------------- /web/admin/assets/web/src/views/status.vue: -------------------------------------------------------------------------------- 1 | 47 | 48 | 85 | 86 | 91 | -------------------------------------------------------------------------------- /web/admin/assets/web/vite.config.js: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite' 2 | import vue from '@vitejs/plugin-vue' 3 | 4 | // https://vitejs.dev/config/ 5 | export default defineConfig({ 6 | plugins: [vue()] 7 | }) 8 | -------------------------------------------------------------------------------- /web/controller/base.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "github.com/sea-team/gofound/searcher/model" 5 | 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | func Welcome(c *gin.Context) { 10 | ResponseSuccessWithData(c, "Welcome to GoFound") 11 | } 12 | 13 | // Query 查询 14 | func Query(c *gin.Context) { 15 | var request = &model.SearchRequest{ 16 | Database: c.Query("database"), 17 | } 18 | if err := c.ShouldBind(&request); err != nil { 19 | ResponseErrorWithMsg(c, err.Error()) 20 | return 21 | } 22 | //调用搜索 23 | r, err := srv.Base.Query(request) 24 | if err != nil { 25 | ResponseErrorWithMsg(c, err.Error()) 26 | } else { 27 | ResponseSuccessWithData(c, r) 28 | } 29 | } 30 | 31 | // GC 释放GC 32 | func GC(c *gin.Context) { 33 | srv.Base.GC() 34 | ResponseSuccess(c) 35 | } 36 | 37 | // Status 获取服务器状态 38 | func Status(c *gin.Context) { 39 | r := srv.Base.Status() 40 | ResponseSuccessWithData(c, r) 41 | } 42 | -------------------------------------------------------------------------------- /web/controller/database.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import "github.com/gin-gonic/gin" 4 | 5 | // DatabaseDrop 删除数据库 6 | func DatabaseDrop(c *gin.Context) { 7 | dbName := c.Query("database") 8 | if dbName == "" { 9 | ResponseErrorWithMsg(c, "database is empty") 10 | return 11 | } 12 | 13 | if err := srv.Database.Drop(dbName); err != nil { 14 | ResponseErrorWithMsg(c, err.Error()) 15 | return 16 | } 17 | 18 | ResponseSuccessWithData(c, "删除成功") 19 | } 20 | 21 | // DatabaseCreate 创建数据库 22 | func DatabaseCreate(c *gin.Context) { 23 | dbName := c.Query("database") 24 | if dbName == "" { 25 | ResponseErrorWithMsg(c, "database is empty") 26 | return 27 | } 28 | 29 | srv.Database.Create(dbName) 30 | ResponseSuccessWithData(c, "创建成功") 31 | } 32 | 33 | // DBS 查询数据库 34 | func DBS(c *gin.Context) { 35 | ResponseSuccessWithData(c, srv.Database.Show()) 36 | } 37 | -------------------------------------------------------------------------------- /web/controller/index.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "github.com/sea-team/gofound/searcher/model" 5 | 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | // AddIndex 添加索引 10 | func AddIndex(c *gin.Context) { 11 | document := &model.IndexDoc{} 12 | if err := c.ShouldBindJSON(&document); err != nil { 13 | ResponseErrorWithMsg(c, err.Error()) 14 | return 15 | } 16 | dbName := c.Query("database") 17 | if dbName == "" { 18 | ResponseErrorWithMsg(c, "database is empty") 19 | return 20 | } 21 | err := srv.Index.AddIndex(dbName, document) 22 | if err != nil { 23 | ResponseErrorWithMsg(c, err.Error()) 24 | return 25 | } 26 | 27 | ResponseSuccessWithData(c, nil) 28 | } 29 | 30 | // BatchAddIndex 批量添加索引 31 | func BatchAddIndex(c *gin.Context) { 32 | documents := make([]*model.IndexDoc, 0) 33 | if err := c.BindJSON(&documents); err != nil { 34 | ResponseErrorWithMsg(c, err.Error()) 35 | return 36 | } 37 | 38 | dbName := c.Query("database") 39 | if dbName == "" { 40 | ResponseErrorWithMsg(c, "database is empty") 41 | return 42 | } 43 | 44 | err := srv.Index.BatchAddIndex(dbName, documents) 45 | if err != nil { 46 | ResponseErrorWithMsg(c, err.Error()) 47 | return 48 | } 49 | 50 | ResponseSuccess(c) 51 | } 52 | 53 | // RemoveIndex 删除索引 54 | func RemoveIndex(c *gin.Context) { 55 | removeIndexModel := &model.RemoveIndexModel{} 56 | if err := c.BindJSON(&removeIndexModel); err != nil { 57 | ResponseErrorWithMsg(c, err.Error()) 58 | return 59 | } 60 | 61 | dbName := c.Query("database") 62 | if dbName == "" { 63 | ResponseErrorWithMsg(c, "database is empty") 64 | return 65 | } 66 | 67 | if err := srv.Index.RemoveIndex(dbName, removeIndexModel); err != nil { 68 | ResponseErrorWithMsg(c, err.Error()) 69 | return 70 | } 71 | 72 | ResponseSuccess(c) 73 | } 74 | -------------------------------------------------------------------------------- /web/controller/response.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | type ResponseData struct { 10 | State bool `json:"state"` 11 | Message string `json:"message,omitempty"` 12 | Data interface{} `json:"data,omitempty"` 13 | } 14 | 15 | // ResponseSuccessWithData 携带数据成功返回 16 | func ResponseSuccessWithData(c *gin.Context, data interface{}) { 17 | c.JSON(http.StatusOK, &ResponseData{ 18 | State: true, 19 | Message: "success", 20 | Data: data, 21 | }) 22 | } 23 | 24 | // ResponseErrorWithMsg 返回错误 25 | func ResponseErrorWithMsg(c *gin.Context, message string) { 26 | c.JSON(http.StatusOK, &ResponseData{ 27 | State: false, 28 | Message: message, 29 | Data: nil, 30 | }) 31 | } 32 | 33 | // ResponseSuccess 返回成功 34 | func ResponseSuccess(c *gin.Context) { 35 | c.JSON(http.StatusOK, &ResponseData{ 36 | State: true, 37 | Message: "success", 38 | Data: nil, 39 | }) 40 | } 41 | -------------------------------------------------------------------------------- /web/controller/services.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | service2 "github.com/sea-team/gofound/web/service" 5 | ) 6 | 7 | var srv *Services 8 | 9 | type Services struct { 10 | Base *service2.Base 11 | Index *service2.Index 12 | Database *service2.Database 13 | Word *service2.Word 14 | } 15 | 16 | func NewServices() { 17 | srv = &Services{ 18 | Base: service2.NewBase(), 19 | Index: service2.NewIndex(), 20 | Database: service2.NewDatabase(), 21 | Word: service2.NewWord(), 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /web/controller/word.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import "github.com/gin-gonic/gin" 4 | 5 | // WordCut 分词 6 | func WordCut(c *gin.Context) { 7 | q := c.Query("q") 8 | if q == "" { 9 | ResponseErrorWithMsg(c, "请输入关键字") 10 | return 11 | } 12 | r := srv.Word.WordCut(q) 13 | ResponseSuccessWithData(c, r) 14 | } 15 | -------------------------------------------------------------------------------- /web/middleware/cors.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | // Cors 处理跨域请求,支持options访问 10 | func Cors() gin.HandlerFunc { 11 | return func(c *gin.Context) { 12 | method := c.Request.Method 13 | 14 | c.Header("Access-Control-Allow-Origin", "*") 15 | c.Header("Access-Control-Allow-Methods", "POST, GET, OPTIONS, PUT, DELETE, UPDATE") 16 | c.Header("Access-Control-Allow-Headers", "*") 17 | c.Header("Access-Control-Expose-Headers", "Content-Length, Access-Control-Allow-Origin, Access-Control-Allow-Headers, Cache-Control, Content-Language, Content-Type") 18 | c.Header("Access-Control-Allow-Credentials", "true") 19 | 20 | //放行所有OPTIONS方法 21 | if method == "OPTIONS" { 22 | c.AbortWithStatus(http.StatusNoContent) 23 | } 24 | // 处理请求 25 | c.Next() 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /web/middleware/exception.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "github.com/sea-team/gofound/web" 5 | "runtime/debug" 6 | 7 | "github.com/gin-gonic/gin" 8 | ) 9 | 10 | // Exception 处理异常 11 | func Exception() gin.HandlerFunc { 12 | return func(c *gin.Context) { 13 | defer func() { 14 | if err := recover(); err != nil { 15 | debug.PrintStack() 16 | c.JSON(200, web.Error(err.(error).Error())) 17 | } 18 | c.Abort() 19 | }() 20 | c.Next() 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /web/result.go: -------------------------------------------------------------------------------- 1 | package web 2 | 3 | type Result struct { 4 | State bool `json:"state"` 5 | 6 | Message string `json:"message,omitempty"` 7 | 8 | Data interface{} `json:"data,omitempty"` 9 | } 10 | 11 | func NewResult(state bool, message string, data interface{}) *Result { 12 | return &Result{ 13 | State: state, 14 | Message: message, 15 | Data: data, 16 | } 17 | } 18 | 19 | // Success 返回成功 20 | func Success(data interface{}) *Result { 21 | return NewResult(true, "success", data) 22 | } 23 | 24 | // Error 返回错误 25 | func Error(message string) *Result { 26 | return NewResult(false, message, nil) 27 | } 28 | -------------------------------------------------------------------------------- /web/router/base.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/sea-team/gofound/web/controller" 5 | 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | // InitBaseRouter 基础管理路由 10 | func InitBaseRouter(Router *gin.RouterGroup) { 11 | 12 | BaseRouter := Router.Group("") 13 | { 14 | BaseRouter.GET("/", controller.Welcome) 15 | BaseRouter.POST("query", controller.Query) 16 | BaseRouter.GET("status", controller.Status) 17 | BaseRouter.GET("gc", controller.GC) 18 | 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /web/router/database.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/sea-team/gofound/web/controller" 5 | 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | // InitDatabaseRouter 数据库路由 10 | func InitDatabaseRouter(Router *gin.RouterGroup) { 11 | 12 | databaseRouter := Router.Group("db") 13 | { 14 | databaseRouter.GET("list", controller.DBS) // 查看数据库 15 | databaseRouter.GET("drop", controller.DatabaseDrop) // 删除数据库 16 | databaseRouter.GET("create", controller.DatabaseCreate) // 添加数据库 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /web/router/index.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/sea-team/gofound/web/controller" 5 | 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | // InitIndexRouter 索引路由 10 | func InitIndexRouter(Router *gin.RouterGroup) { 11 | 12 | indexRouter := Router.Group("index") 13 | { 14 | indexRouter.POST("", controller.AddIndex) // 添加单条索引 15 | indexRouter.POST("batch", controller.BatchAddIndex) // 批量添加索引 16 | indexRouter.POST("remove", controller.RemoveIndex) // 删除索引 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /web/router/router.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/sea-team/gofound/global" 5 | "github.com/sea-team/gofound/web/admin" 6 | "github.com/sea-team/gofound/web/middleware" 7 | "io" 8 | "log" 9 | "mime" 10 | "strings" 11 | 12 | "github.com/gin-contrib/gzip" 13 | "github.com/gin-gonic/gin" 14 | ) 15 | 16 | // SetupRouter 路由管理 17 | func SetupRouter() *gin.Engine { 18 | if global.CONFIG.Debug { 19 | gin.SetMode(gin.DebugMode) 20 | } else { 21 | gin.SetMode(gin.ReleaseMode) 22 | gin.DefaultWriter = io.Discard //禁止Gin的控制台输出 23 | } 24 | 25 | router := gin.Default() 26 | // 启用GZIP压缩 27 | if global.CONFIG.EnableGzip { 28 | router.Use(gzip.Gzip(gzip.DefaultCompression)) 29 | } 30 | 31 | var handlers []gin.HandlerFunc 32 | //认证 33 | if global.CONFIG.Auth != "" { 34 | auths := strings.Split(global.CONFIG.Auth, ":") 35 | handlers = append(handlers, gin.BasicAuth( 36 | gin.Accounts{ 37 | auths[0]: auths[1], 38 | }, 39 | ), 40 | ) 41 | log.Println("Enable Auth:", global.CONFIG.Auth) 42 | } 43 | 44 | // 告诉服务.js文件的MIME类型 45 | err := mime.AddExtensionType(".js", "application/javascript") 46 | // 如果存在错误则需要马上抛出 47 | if err != nil { 48 | panic("添加扩展类型 mime 错误,错误原因:" + err.Error()) 49 | } 50 | 51 | //注册admin 52 | if global.CONFIG.EnableAdmin { 53 | admin.Register(router, handlers...) 54 | log.Printf("Admin Url: \t http://%v/admin", global.CONFIG.Addr) 55 | } 56 | 57 | // 分组管理 中间件管理 58 | router.Use(middleware.Cors(), middleware.Exception()) 59 | group := router.Group("/api", handlers...) 60 | { 61 | InitBaseRouter(group) // 基础管理 62 | InitIndexRouter(group) // 索引管理 63 | InitDatabaseRouter(group) // 数据库管理 64 | InitWordRouter(group) // 分词管理 65 | } 66 | log.Printf("API Url: \t http://%v/api", global.CONFIG.Addr) 67 | return router 68 | } 69 | -------------------------------------------------------------------------------- /web/router/word.go: -------------------------------------------------------------------------------- 1 | package router 2 | 3 | import ( 4 | "github.com/sea-team/gofound/web/controller" 5 | 6 | "github.com/gin-gonic/gin" 7 | ) 8 | 9 | // InitWordRouter 分词路由 10 | func InitWordRouter(Router *gin.RouterGroup) { 11 | 12 | wordRouter := Router.Group("word") 13 | { 14 | wordRouter.GET("cut", controller.WordCut) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /web/service/base.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/sea-team/gofound/global" 5 | "github.com/sea-team/gofound/searcher" 6 | "github.com/sea-team/gofound/searcher/model" 7 | "github.com/sea-team/gofound/searcher/system" 8 | "os" 9 | "runtime" 10 | ) 11 | 12 | // Base 基础管理 13 | type Base struct { 14 | Container *searcher.Container 15 | Callback func() map[string]interface{} 16 | } 17 | 18 | func NewBase() *Base { 19 | return &Base{ 20 | Container: global.Container, 21 | Callback: Callback, 22 | } 23 | } 24 | 25 | // Query 查询 26 | func (b *Base) Query(request *model.SearchRequest) (*model.SearchResult, error) { 27 | return b.Container.GetDataBase(request.Database).MultiSearch(request) 28 | } 29 | 30 | // GC 释放GC 31 | func (b *Base) GC() { 32 | runtime.GC() 33 | } 34 | 35 | // Status 获取服务器状态 36 | func (b *Base) Status() map[string]interface{} { 37 | var m runtime.MemStats 38 | runtime.ReadMemStats(&m) 39 | 40 | s := b.Callback() 41 | 42 | r := map[string]interface{}{ 43 | "memory": system.GetMemStat(), 44 | "cpu": system.GetCPUStatus(), 45 | "disk": system.GetDiskStat(), 46 | "system": s, 47 | } 48 | return r 49 | } 50 | 51 | // Restart 重启服务 52 | func (b *Base) Restart() { 53 | // TODD 未实现 54 | os.Exit(0) 55 | } 56 | -------------------------------------------------------------------------------- /web/service/database.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/sea-team/gofound/global" 5 | "github.com/sea-team/gofound/searcher" 6 | ) 7 | 8 | type Database struct { 9 | Container *searcher.Container 10 | } 11 | 12 | func NewDatabase() *Database { 13 | return &Database{ 14 | Container: global.Container, 15 | } 16 | } 17 | 18 | // Show 查看数据库 19 | func (d *Database) Show() map[string]*searcher.Engine { 20 | return d.Container.GetDataBases() 21 | } 22 | 23 | // Drop 删除数据库 24 | func (d *Database) Drop(dbName string) error { 25 | if err := d.Container.DropDataBase(dbName); err != nil { 26 | return err 27 | } 28 | return nil 29 | } 30 | 31 | // Create 创建数据库 32 | func (d *Database) Create(dbName string) *searcher.Engine { 33 | return d.Container.GetDataBase(dbName) 34 | } 35 | -------------------------------------------------------------------------------- /web/service/index.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/sea-team/gofound/global" 5 | "github.com/sea-team/gofound/searcher" 6 | "github.com/sea-team/gofound/searcher/model" 7 | ) 8 | 9 | type Index struct { 10 | Container *searcher.Container 11 | } 12 | 13 | func NewIndex() *Index { 14 | return &Index{ 15 | Container: global.Container, 16 | } 17 | } 18 | 19 | // AddIndex 添加索引 20 | func (i *Index) AddIndex(dbName string, request *model.IndexDoc) error { 21 | return i.Container.GetDataBase(dbName).IndexDocument(request) 22 | } 23 | 24 | // BatchAddIndex 批次添加索引 25 | func (i *Index) BatchAddIndex(dbName string, documents []*model.IndexDoc) error { 26 | db := i.Container.GetDataBase(dbName) 27 | for _, doc := range documents { 28 | if err := db.IndexDocument(doc); err != nil { 29 | return err 30 | } 31 | } 32 | return nil 33 | } 34 | 35 | // RemoveIndex 删除索引 36 | func (i *Index) RemoveIndex(dbName string, data *model.RemoveIndexModel) error { 37 | db := i.Container.GetDataBase(dbName) 38 | if err := db.RemoveIndex(data.Id); err != nil { 39 | return err 40 | } 41 | return nil 42 | } 43 | -------------------------------------------------------------------------------- /web/service/service.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/sea-team/gofound/global" 5 | "github.com/sea-team/gofound/searcher/system" 6 | "github.com/sea-team/gofound/searcher/utils" 7 | "os" 8 | "runtime" 9 | ) 10 | 11 | func Callback() map[string]interface{} { 12 | return map[string]interface{}{ 13 | "os": runtime.GOOS, 14 | "arch": runtime.GOARCH, 15 | "cores": runtime.NumCPU(), 16 | "version": runtime.Version(), 17 | "goroutines": runtime.NumGoroutine(), 18 | "dataPath": global.CONFIG.Data, 19 | "dictionaryPath": global.CONFIG.Dictionary, 20 | "gomaxprocs": runtime.NumCPU() * 2, 21 | "debug": global.CONFIG.Debug, 22 | "shard": global.CONFIG.Shard, 23 | "dataSize": system.GetFloat64MB(utils.DirSizeB(global.CONFIG.Data)), 24 | "executable": os.Args[0], 25 | "dbs": global.Container.GetDataBaseNumber(), 26 | //"indexCount": global.container.GetIndexCount(), 27 | //"documentCount": global.container.GetDocumentCount(), 28 | "pid": os.Getpid(), 29 | "enableAuth": global.CONFIG.Auth != "", 30 | "enableGzip": global.CONFIG.EnableGzip, 31 | "bufferNum": global.CONFIG.BufferNum, 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /web/service/word.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "github.com/sea-team/gofound/global" 5 | "github.com/sea-team/gofound/searcher" 6 | ) 7 | 8 | type Word struct { 9 | Container *searcher.Container 10 | } 11 | 12 | func NewWord() *Word { 13 | return &Word{ 14 | Container: global.Container, 15 | } 16 | } 17 | 18 | // WordCut 分词 19 | func (w *Word) WordCut(keyword string) []string { 20 | return w.Container.Tokenizer.Cut(keyword) 21 | } 22 | --------------------------------------------------------------------------------