├── .github ├── release.yml └── workflows │ └── release.yml ├── .gitignore ├── .goreleaser.yaml ├── README.assets ├── image-20221221224536047.png ├── image-20221221224723254.png ├── image-20221221224936530.png ├── image-20221221225135745.png ├── image-20221221225220822.png ├── image-20231019121834019.png ├── image-20231019123028213.png ├── image-20231019130808020.png ├── image-20231019130928736.png ├── image-20231019131015426.png ├── image-20231019131641587.png ├── image-20231019132813381.png ├── image-20231019133102160.png ├── image-20231019133117388.png ├── image-20231019133133696.png ├── image-20231019133502798.png ├── image-20231019133518701.png ├── image-20231019133533504.png ├── image-20231019140852430.png ├── image-20231019140857250.png ├── image-20231023031640316.png ├── image-20231031085701682.png ├── image-20231031090513302.png ├── image-20231031090600695.png ├── image-20231031091824133.png ├── image-20231031133301924.png ├── image-20240902135717430.png ├── image-20240902140053666.png └── image-20240902140112091.png ├── README.md ├── cmd ├── cmd.go ├── cobra.go ├── output.go ├── patch.go └── root.go ├── core ├── diff.go ├── duplicate.go ├── duplicate_test.go ├── extract │ ├── simple_url.go │ └── simple_url_test.go ├── ip.go ├── ip_test.go ├── matcher.go ├── myBuffer.go ├── replace.go ├── similarity.go ├── similarity_test.go ├── utils.go ├── utils │ └── uniqueutil │ │ └── uniqueutil.go ├── xlsx.go └── xlsx_test.go ├── errx └── errors.go ├── go.mod ├── gotest ├── function_test.go └── patch_test.go ├── gr_test.sh ├── main.go ├── makefile ├── push.sh ├── test.go ├── update ├── gh.go ├── type.go ├── update.go └── update_test.go └── vars ├── common.go ├── help.go └── version.go /.github/release.yml: -------------------------------------------------------------------------------- 1 | #changelog: 2 | # exclude: 3 | # authors: 4 | # - dependabot 5 | # categories: 6 | # - title: 🎉 New Features 7 | # labels: 8 | # - "Type: Enhancement" 9 | # - title: 🐞 Bug Fixes 10 | # labels: 11 | # - "Type: Bug" 12 | # - title: 🔨 Maintenance 13 | # labels: 14 | # - "Type: Maintenance" 15 | # - title: Other Changes 16 | # labels: 17 | # - "*" -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: goreleaser 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | jobs: 8 | goreleaser: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - 12 | name: Checkout 13 | uses: actions/checkout@v3 14 | with: 15 | fetch-depth: 0 16 | - 17 | name: Set up Go 18 | uses: actions/setup-go@v4 19 | - 20 | name: Run GoReleaser 21 | uses: goreleaser/goreleaser-action@v4 22 | with: 23 | # either 'goreleaser' (default) or 'goreleaser-pro' 24 | distribution: goreleaser 25 | version: latest 26 | args: release --clean 27 | env: 28 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 29 | # Your GoReleaser Pro key, if you are using the 'gorel -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Go template 2 | # Binaries for programs and plugins 3 | *.exe 4 | *.exe~ 5 | *.dll 6 | *.so 7 | *.dylib 8 | *.xlsx 9 | *.xls 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | ### Example user template template 21 | ### Example user template 22 | 23 | # IntelliJ project files 24 | .idea 25 | *.iml 26 | out 27 | gen 28 | 29 | *.txt 30 | dist/ 31 | /go.sum 32 | /go_build_morefind 33 | /result.txt 34 | /target.txt 35 | /MoreFind -------------------------------------------------------------------------------- /.goreleaser.yaml: -------------------------------------------------------------------------------- 1 | # This is an example .goreleaser.yml file with some sensible defaults. 2 | # Make sure to check the documentation at https://goreleaser.com 3 | before: 4 | hooks: 5 | # You may remove this if you don't use go modules. 6 | - go mod tidy 7 | # you may remove this if you don't need go generate 8 | # - go generate ./... 9 | builds: 10 | - env: 11 | - CGO_ENABLED=0 12 | binary: '{{ .ProjectName }}' 13 | goos: 14 | - linux 15 | - windows 16 | - darwin 17 | - freebsd 18 | goarch: ['amd64','386', 'arm', 'arm64'] 19 | ignore: 20 | - goos: darwin 21 | goarch: '386' 22 | - goos: windows 23 | goarch: arm 24 | - goos: windows 25 | goarch: arm64 26 | ldflags: 27 | - "-s -w" 28 | # - -X /vars.version={{.Version}} 29 | # - -extldflags="-static" # 跨平台 30 | flags: 31 | - -trimpath 32 | 33 | archives: 34 | - format: zip 35 | id: MoreFind 36 | name_template: >- 37 | {{ .ProjectName }}_v{{ .Version }}_ 38 | {{- .Os }}_ 39 | {{- if eq .Arch "amd64" }}x86_64 40 | {{- else if eq .Arch "386" }}i386 41 | {{- else}}{{ .Arch }}{{ end }} 42 | files: 43 | - README.md 44 | format_overrides: 45 | - goos: windows 46 | format: zip 47 | - format: tar.gz 48 | name_template: >- 49 | {{ .ProjectName }}_v{{ .Version }}_ 50 | {{- .Os }}_ 51 | {{- if eq .Arch "amd64" }}x86_64 52 | {{- else if eq .Arch "386" }}i386 53 | {{- else}}{{ .Arch }}{{ end }} 54 | files: 55 | - README.md 56 | 57 | checksum: 58 | name_template: 'checksums.txt' 59 | snapshot: 60 | name_template: "{{ incpatch .Version }}-next" 61 | changelog: 62 | sort: asc 63 | filters: 64 | exclude: 65 | - '^docs:' 66 | - '^test:' -------------------------------------------------------------------------------- /README.assets/image-20221221224536047.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20221221224536047.png -------------------------------------------------------------------------------- /README.assets/image-20221221224723254.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20221221224723254.png -------------------------------------------------------------------------------- /README.assets/image-20221221224936530.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20221221224936530.png -------------------------------------------------------------------------------- /README.assets/image-20221221225135745.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20221221225135745.png -------------------------------------------------------------------------------- /README.assets/image-20221221225220822.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20221221225220822.png -------------------------------------------------------------------------------- /README.assets/image-20231019121834019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019121834019.png -------------------------------------------------------------------------------- /README.assets/image-20231019123028213.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019123028213.png -------------------------------------------------------------------------------- /README.assets/image-20231019130808020.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019130808020.png -------------------------------------------------------------------------------- /README.assets/image-20231019130928736.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019130928736.png -------------------------------------------------------------------------------- /README.assets/image-20231019131015426.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019131015426.png -------------------------------------------------------------------------------- /README.assets/image-20231019131641587.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019131641587.png -------------------------------------------------------------------------------- /README.assets/image-20231019132813381.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019132813381.png -------------------------------------------------------------------------------- /README.assets/image-20231019133102160.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019133102160.png -------------------------------------------------------------------------------- /README.assets/image-20231019133117388.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019133117388.png -------------------------------------------------------------------------------- /README.assets/image-20231019133133696.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019133133696.png -------------------------------------------------------------------------------- /README.assets/image-20231019133502798.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019133502798.png -------------------------------------------------------------------------------- /README.assets/image-20231019133518701.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019133518701.png -------------------------------------------------------------------------------- /README.assets/image-20231019133533504.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019133533504.png -------------------------------------------------------------------------------- /README.assets/image-20231019140852430.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019140852430.png -------------------------------------------------------------------------------- /README.assets/image-20231019140857250.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231019140857250.png -------------------------------------------------------------------------------- /README.assets/image-20231023031640316.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231023031640316.png -------------------------------------------------------------------------------- /README.assets/image-20231031085701682.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231031085701682.png -------------------------------------------------------------------------------- /README.assets/image-20231031090513302.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231031090513302.png -------------------------------------------------------------------------------- /README.assets/image-20231031090600695.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231031090600695.png -------------------------------------------------------------------------------- /README.assets/image-20231031091824133.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231031091824133.png -------------------------------------------------------------------------------- /README.assets/image-20231031133301924.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20231031133301924.png -------------------------------------------------------------------------------- /README.assets/image-20240902135717430.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20240902135717430.png -------------------------------------------------------------------------------- /README.assets/image-20240902140053666.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20240902140053666.png -------------------------------------------------------------------------------- /README.assets/image-20240902140112091.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mstxq17/MoreFind/ba6f45ca2e3d1a0ea5f29cc1d11b1f31860d0d5d/README.assets/image-20240902140112091.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MoreFind 2 | ![GitHub Repo stars](https://img.shields.io/github/stars/mstxq17/morefind?style=social) [![Go Report Card](https://goreportcard.com/badge/github.com/mstxq17/MoreFind)](https://goreportcard.com/report/github.com/mstxq17/MoreFind) ![GitHub go.mod Go version](https://img.shields.io/github/go-mod/go-version/mstxq17/MoreFind) ![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/mstxq17/MoreFind) 3 | 4 | 一款用于快速导出URL、Domain和IP的**魔法**小工具 5 | 6 | ![image-20231019140857250](README.assets/image-20231019140857250.png) 7 | 8 | ## 欢迎使用 :zap: 9 | 10 | 本工具开发崇尚第一原则: 小、简、美 11 | 12 | 如果有简单的需求可以提交ISSUE,不支持复杂需求,目前该工具已经能满足作者工作常用的大部分操作。 13 | 14 | ## 快速安装 :zap: 15 | 16 | 方式一: 通过Go包管理安装 17 | ```bash 18 | go install github.com/mstxq17/MoreFind@latest 19 | ``` 20 | 方式二: 直接安装二进制文件 21 | ```bash 22 | wget --no-check-certificate https://ghproxy.nday.pw/https://github.com/mstxq17/MoreFind/releases/download/v1.5.7/MoreFind_v1.5.7_`uname -s`_`uname -m`.tar.gz 23 | tar -xzvf MoreFind_v1.5.7_`uname -s`_`uname -m`.tar.gz 24 | sudo mv ./MoreFind /usr/bin/MoreFind && chmod +x /usr/bin/MoreFind 25 | ``` 26 | 27 | 方式三: 本地编译 28 | ```bash 29 | git clone https://github.com/mstxq17/MoreFind.git 30 | chmod +x ./build.sh && ./build.sh 31 | ``` 32 | 33 | 方式四:Release 页面已经发布各大系统的二进制执行文件直接下载执行 34 | 35 | https://github.com/mstxq17/MoreFind/releases/ 36 | 37 | ![image-20240902135717430](README.assets/image-20240902135717430.png) 38 | 39 | 40 | 41 | ## 用法说明 **:fire:** 42 | 43 | 显示帮助信息 44 | ```bash 45 | MoreFind -h 46 | ``` 47 | ```bash 48 | MoreFind is a very rapid script for extracting URL、Domain and Ip from data stream 49 | 50 | Usage: 51 | morefind [flags] 52 | morefind [command] 53 | 54 | Available Commands: 55 | doc Covert xlsx/xls data to plain text 56 | dedu De-duplicated lines 57 | diff File Comparator between two files 58 | grep If no grep , use this 59 | version Print the semantic version number of MoreFind 60 | help Help about any command 61 | completion Generate the autocompletion script for the specified shell 62 | 63 | Flags: 64 | -f, --file string Specifies the input file path. 65 | -o, --output string Specifies the output file path. 66 | -k, --link Matches schema://host from the input pipe or file. 67 | --schema string When use with -t, the default is set to the specified protocol 68 | -i, --ip Matches IPs from the input pipe or file. 69 | --exclude Excludes internal/private IP segments when using -i/--ip. 70 | -d, --domain Matches domains from the input pipe or file. 71 | --root Outputs only the primary domain when using -d/--domain. 72 | -p, --port Filters only domain & IP:port combinations. 73 | -r, --rule string Utilizes a custom replacement rule (custom output replacement rule: https://{}/). 74 | --flag string Specifies the replacement identification. (default "{}") 75 | -u, --url Matches URLs from the input pipe or file. 76 | --filter string[="js,css,json,png,jpg,html,xml,zip,rar"] Filters URLs with specific extensions. 77 | -c, --cidr string[="__pipe__"] Outputs the specified CIDR IP list. 78 | -a, --alter strings IP Alters (0,1,2,3,4,5,6,7,8) 79 | -l, --len string Matches input specified length string, e.g., "-l 35" == "-l 0-35". 80 | -s, --show Displays the length of each line and provides summaries. 81 | -m, --metric Outputs execution progress metrics. 82 | -q, --quiet Enable quit mode,keep silence. 83 | -U, --update Updates the tool engine to the latest released version. 84 | -h, --help help for morefind 85 | 86 | Use "morefind [command] --help" for more information about a command. 87 | ``` 88 | 89 | 1)导出URL 90 | 91 | ```bash 92 | MoreFind -u 93 | # append --filter(default:js,css,json,png,jpg,html,xml,zip,rar) or --filter="png,jpg,xls,custom..." 94 | # 通过添加参数 --filter(默认排除常见静态文件) 或者 通过 --filter="png,jpg" 自定义需要排除的后缀 95 | MoreFind -u --filter="png" 96 | ``` 97 | 98 | 1.1) 导出schema://host的格式,如 http://baidu.com:80/123.jpg -> http://baidu.com:80 99 | 100 | ``` 101 | MoreFind -k 102 | # append --schema "http" 103 | # 通过添加 --schema "http" 来为不存在协议的url添加上自定义协议 104 | MoreFind -k --schema "http" 105 | ``` 106 | 107 | 108 | 109 | 2)导出域名 110 | 111 | ```bash 112 | MoreFind -d 113 | # append -root param can only search rootdomain 114 | # 通过加上 --root 参数能够只显示根域名 115 | MoreFind -d --root 116 | # append --port param can retain domain:port format 117 | # 通过加上 --port 参数保留域名:端口的格式,搭配grep 118 | MoreFind -d --root --port 119 | ``` 120 | 121 | 122 | 123 | 3)导出ip 124 | 125 | ```bash 126 | # 默认会搜索全部ipv4地址 127 | MoreFind -i 128 | # 加上--exclude 排除属于内网的ip, 存在bug,比如localhost, 127.0.0.1 这些回环ip没排除 129 | MoreFind -i --exclude 130 | # 加上 --port 参数保留ip:端口的格式 131 | MoreFind -i --exclude --port 132 | ``` 133 | 134 | 135 | 136 | 4)输出统计信息 137 | 138 | ```bash 139 | MoreFind -s 140 | ``` 141 | 142 | 143 | 144 | 5)筛选指定长度字符串 145 | 146 | ```bash 147 | MoreFind -l 35 148 | MoreFind -l 0-35 149 | ``` 150 | 151 | 152 | 153 | 6)解析CIDR生成指定ip列表 154 | 155 | ```bash 156 | MoreFind -c="192.168.0.1/24" 157 | MoreFind -c="192.168.0.1-192.168.2.254" 158 | echo -e "192.168.4.1/24\n192.168.1.1/24"|./MoreFind --cidr 159 | ``` 160 | 161 | 162 | 163 | 7) 支持输出IP的8种格式 164 | 165 | ```bash 166 | MoreFind --cidr="127.0.0.1/32" -a 1 167 | MoreFind --cidr="127.0.0.1/32" -a 2 168 | MoreFind --cidr="127.0.0.1/32" -a 1 -a 2 169 | MoreFind --cidr="127.0.0.1/32" --alter 3 170 | ... 171 | ``` 172 | 173 | | Format | Index | Example | 174 | | -------------------------- | ----- | ---------------------------------- | 175 | | Dotted decimal | 1 | `127.0.0.1` | 176 | | 0 Optimized dotted decimal | 2 | `127.1` | 177 | | Octal | 3 | `0177.0.0.01` | 178 | | Hexadecimal | 4 | `0x7f.0x0.0x0.0x1` | 179 | | Decimal (Dword) | 5 | `2130706433` | 180 | | Binary | 6 | `01111111000000000000000000000001` | 181 | | Mixed | 7 | `127.0x1` | 182 | | URL encoded | 9 | `%31%32%37%2E%30%2E%30%2E%31` | 183 | 184 | 8)支持自定义输出规则 185 | 186 | ```bash 187 | # 最终会将结果替换到 {} 188 | MoreFind -i --exclude -r "http://{}/" 189 | ``` 190 | 191 | 9)支持输出执行进度,读取大文件的时候让你心里有数 (默认不开启) 192 | 193 | ```bash 194 | MoreFind -f 1.txt -m 195 | MoreFind -f 1.txt --metric 196 | ``` 197 | 198 | ![image-20231023031640316](README.assets/image-20231023031640316.png) 199 | 200 | a)支持检测最新版本并自动更新 201 | 202 | ```bash 203 | MoreFind -U 204 | MoreFind --update 205 | ``` 206 | 207 | ![image-20231031085701682](README.assets/image-20231031085701682.png) 208 | 209 | 210 | 211 | *)支持导出结果 212 | 213 | ```bash 214 | MoreFind -u -d -i -o result.txt 215 | ``` 216 | 217 | 218 | 219 | *)联动使用 220 | 221 | ```bash 222 | echo -e 'baidu.com ccccxxxx 1.com'|MoreFind -d |MoreFind -l 5 223 | ``` 224 | 225 | 226 | 227 | ## 命令增强 **:boom:** 228 | 229 | 目前新增三个子命令: 230 | 231 | > 子命令相当于 MoreFind 独立出来的一个子模块,起到补充增强的作用,目前代码结构存在多种可能性,整体处于灰度测试状态 232 | 233 | 1) 启发式去重 234 | 235 | ```bash 236 | MoreFind dedu -h 237 | # 基础去重 238 | cat baidu.txt| MoreFind dedu 239 | # 智能去重复,去重相似的,目前只有一个规则 240 | cat baidu.txt| MoreFind dedu --smart 241 | # -t / --theshold 是相似的阈值当超过指定值就不会进行输出 242 | cat baidu.txt| MoreFind dedu --smart -t 20 243 | ``` 244 | 245 | ![image-20231031090513302](README.assets/image-20231031090513302.png) 246 | 247 | 248 | 249 | 2)diff 比较 / 严格比较两个文件的行,支持输出三种结果: 1)A文件有的,B文件没有的行 2)B有A没有的 3)AB都有的 250 | 251 | ```bash 252 | MoreFind diff -h 253 | MoreFind diff 1.txt 2.txt -M 1 254 | MoreFind diff 1.txt 2.txt -M 2 255 | MoreFind diff 1.txt 2.txt -M 3 256 | # 严格按照顺序逐行比较 257 | MoreFind diff 1.txt 2.txt -M 1 --strict 258 | ``` 259 | 260 | ![image-20231031090600695](README.assets/image-20231031090600695.png) 261 | 262 | 263 | 264 | 3)grep 功能,抛弃window的find,实现 OSX/Linux 才有的正则匹配功能,统一使用 265 | 266 | ```bash 267 | MoreFind grep -h 268 | # 正则匹配 269 | cat 1.txt|MoreFind grep "baidu.com" == cat 1.txt|MoreFind grep -P "baidu.com" 270 | # 反选 271 | cat 1.txt|MoreFind grep "baidu.com" -v == cat 1.txt|MoreFind grep -P "baidu.com" -v 272 | ``` 273 | 274 | ![image-20231031091824133](README.assets/image-20231031091824133.png) 275 | 276 | 277 | 278 | ## 使用场景 **:fire:** 279 | 280 | 假设当前你有这样一个文本文件 281 | 282 | ``` 283 | baidu.com 127.0.0.2 baidu.com http://baidu.com:9090 http://www.baidu.com:9091 284 | http://baidu.com/hi.jpg?a=1 285 | http://baidu.com/xxx/hi.jpg http://111.230.198.123:9090 http:/111.230.198.123:9091/ 286 | kk.baidu.com 287 | http://x.kk.baidu.com/1.png 288 | http://123.0.0.1/kkk/%2311 safafs 289 | 127.0.0.1/kkkk 290 | 111.230.198.123/2222/hellophp 291 | 111.230.198.123/2222/hello.php 292 | 192.168.1.1 293 | ``` 294 | 295 | 如果你想提取里面的URL 296 | 297 | ```bash 298 | # linux/osx 299 | cat 1.txt|MoreFind 300 | # window 301 | type 1.txt|MoreFind 302 | # 通用 303 | MoreFind -f 1.txt 304 | ``` 305 | 306 | ![image-20231019123028213](README.assets/image-20231019123028213.png) 307 | 308 | 如果你想提取 schema://host 格式的数据快速导入扫描器,如http://baidu.com/a13/123 -> http://baidu.com/ 309 | 310 | ``` 311 | MoreFind -f 1.txt -k 312 | cat 1.txt|MoreFind -k 313 | # 如果想为默认没有协议的目标添加默认协议 314 | MoreFind -f 1.txt -k --schema "http" 315 | cat 1.txt|MoreFind -k --schema "https" 316 | ``` 317 | 318 | ![image-20240902140053666](README.assets/image-20240902140053666.png) 319 | 320 | ![image-20240902140112091](README.assets/image-20240902140112091.png) 321 | 322 | 如果你只想提取域名,MoreFind 会自动去重,如果你想提取根域名 323 | 324 | ```bash 325 | cat 1.txt|MoreFind -d 326 | cat 1.txt|MoreFind -d --root 327 | ``` 328 | 329 | ![image-20231019130808020](README.assets/image-20231019130808020.png) 330 | 331 | ![image-20231019130928736](README.assets/image-20231019130928736.png) 332 | 333 | 同理,ip也是可以这样的,有时候不想要内网域名 334 | 335 | ```bash 336 | cat 1.txt|MoreFind -i 337 | cat 1.txt|MoreFind -i --exclude 338 | ``` 339 | 340 | ![image-20231019131015426](README.assets/image-20231019131015426.png) 341 | 342 | ![image-20231019132813381](README.assets/image-20231019132813381.png) 343 | 344 | 可能你还想提取带端口的ip和域名 345 | 346 | ```bash 347 | MoreFind -f 1.txt -d --port 348 | MoreFind -f 1.txt -i --port 349 | MoreFind -f 1.txt -d -i --port 350 | ``` 351 | 352 | ![image-20231019133102160](README.assets/image-20231019133102160.png) 353 | 354 | ![image-20231019133117388](README.assets/image-20231019133117388.png) 355 | 356 | ![image-20231019133133696](README.assets/image-20231019133133696.png) 357 | 358 | 你可能想扫描https或者构造一些特殊URL,MoreFind也可以帮你实现 359 | 360 | ```bash 361 | cat 1.txt|MoreFind -d --port -r "https://{}/" 362 | cat 1.txt|MoreFind -d --port -r "https://$/" --flag '$' 363 | cat 1.txt|MoreFind -d --port -r "https://redacted.com/{}/?url={}" 364 | ``` 365 | 366 | ![image-20231019133502798](README.assets/image-20231019133502798.png) 367 | 368 | ![image-20231019133518701](README.assets/image-20231019133518701.png) 369 | 370 | ![image-20231019133533504](README.assets/image-20231019133533504.png) 371 | 372 | 373 | 374 | ## 性能方面 :rocket: 375 | 376 | 二进制文件大小约6.4M, 377 | 378 | 在i7 2.8ghz 环境跑完 646423W 条数据所需的时间为51s, 实际上在iterm会更快,不过MoreFind依然在性能上存在非常多的成长空间,TODO见。 379 | 380 | ```bash 381 | time MoreFind -f ok_domain.txt 382 | ``` 383 | 384 | ![image-20231019131641587](README.assets/image-20231019131641587.png) 385 | 386 | v1.5.0 版本增加不少功能,体积去到 8164KB,追求更高的压缩比使用`upx -9 MoreFind ` 压缩到 4052 KB。 387 | 388 | **Warning** 389 | 390 | > 虽然MoreFind 完全满足日常工作使用,但其不适合处理单行过长的文件,后面可能考虑优化,但是这种场景应该有更好的解决方案,我个人不是很愿意集成到MoreFind,不过还是放TODOS。 391 | 392 | 393 | 394 | ## TODO 条目 **:white_check_mark:** 395 | 396 | - [x] 输出结果自动去重复 397 | - [x] 搜索ip的时候支持排除私有IP地址 398 | - [x] 搜索URL的时候支持排除自定义后缀的URL 399 | - [x] 增加返回结果形式,支持过滤数据返回`域名:port`的格式 400 | - [x] 增加返回结果形式,支持过滤数据返回`ip:port`的格式 401 | - [x] 增加返回结果形式,支持过滤数据返回`协议:域名:port`的格式 402 | - [x] 支持搜索域名的时候根据etld+1的形式只显示根域名 403 | - [x] 读取文件流,输出统计信息,显示每行长度 404 | - [x] 可指定每行长度筛选出符合条件的字符串 405 | - [x] 增加命令行中文版本,防止自己的蹩脚英语影响使用 406 | - [x] 支持生成解析CIDR从而生成指定ip列表 407 | - [x] 支持自定义输出规则,如原输出123 -> http://{}/ -> 转换输出-> http://123/ 408 | - [x] 排除内网IP功能,增加支持对回环本地IP进行过滤 409 | - [x] 增加执行状态参数,实时输出程序的执行过程的进度条 410 | - [x] 完善Log的输出和处理(* 继续需要改进) 411 | - [x] 加入部分URL智能去重代码 412 | - [x] 支持优化读取单行超大文本(* 这一块有点问题) 413 | - [x] 支持简单的grep功能: 正则匹配和反选 414 | - [x] 支持比较两个文件的每一行并输出3种结果 415 | - [x] 支持检测最新版本并实现自更新 416 | - [x] 命令帮助信息规范化,目前默认统一为英文 417 | - [x] 增强cidr生成IP列表功能,支持输出ip的多种格式(thanks for mapcidr) 418 | - [x] 统一结果输出, 将 stdout&file 合并到相同逻辑处理 419 | - [ ] 考虑重构程序,增加对ipv6的支持 420 | - [ ] 支持命令行控制显示中文/英文的帮助提示 421 | - [ ] 重新设计代码结构,并完善脚本异常处理部分 422 | - [ ] 优化项目代码和提高程序执行速度!!! >>>>>>>>>>>>>>>>>>>> -------------------------------------------------------------------------------- /cmd/cmd.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "github.com/mstxq17/MoreFind/core" 7 | "github.com/mstxq17/MoreFind/update" 8 | "github.com/mstxq17/MoreFind/vars" 9 | "github.com/spf13/cobra" 10 | "log" 11 | "os" 12 | "strings" 13 | ) 14 | 15 | var versionCmd = &cobra.Command{ 16 | Use: "version", 17 | Short: "Print the semantic version number of MoreFind", 18 | Long: `All software has versions. This is MoreFind's`, 19 | Run: func(cmd *cobra.Command, args []string) { 20 | v := fmt.Sprintf("MoreFind %s ", vars.VERSION) 21 | fmt.Print(v) 22 | latestVersion, err := update.GetLatestVersion(vars.TOOLNAME, vars.VERSION) 23 | if latestVersion != "" && err == nil { 24 | v := fmt.Sprintf(" -> But latest version %s has released, run with -U / --update get it", latestVersion) 25 | fmt.Println(v) 26 | } 27 | fmt.Println("") 28 | }, 29 | } 30 | var pattern string 31 | var inverseMatch bool // Define a variable to hold the value of the inverse match flag 32 | var cmpMode int 33 | var strictMode bool 34 | var smart bool 35 | var threshold int 36 | var grepCmd = &cobra.Command{ 37 | Use: "grep", 38 | Short: "If no grep , use this", 39 | Long: `The grep command filters and displays lines matching a given pattern within files, akin to the Unix 'grep' command but without the second option.`, 40 | Run: func(cmd *cobra.Command, args []string) { 41 | if len(args) == 1 { 42 | pattern = args[0] 43 | } 44 | fileStdin, _ := handleStdin(file) 45 | defer func() { 46 | if err := fileStdin.Close(); err != nil { 47 | log.Fatal(err) 48 | } 49 | }() 50 | reader := bufio.NewReader(fileStdin) 51 | scanner := bufio.NewScanner(reader) 52 | buf := make([]byte, 0, 64*1024) 53 | scanner.Buffer(buf, MaxTokenSize) 54 | for scanner.Scan() { 55 | line := strings.TrimSpace(scanner.Text()) 56 | matchLine, err := core.MatchLine(line, pattern, inverseMatch) 57 | if err == nil && matchLine != "" { 58 | fmt.Println(matchLine) 59 | } 60 | } 61 | }, 62 | } 63 | 64 | var diffCmd = &cobra.Command{ 65 | Use: "diff", 66 | Short: "File Comparator between two files", 67 | Long: `File Comparator, a robust Golang tool, With options for strict or sorted comparison.`, 68 | Run: func(cmd *cobra.Command, args []string) { 69 | if len(args) == 2 { 70 | fileAName := args[0] 71 | fileBName := args[1] 72 | linesA, err1 := core.ReadLines(fileAName) 73 | linesB, err2 := core.ReadLines(fileBName) 74 | if err1 != nil || err2 != nil { 75 | logger.Fatal(err1, err2) 76 | } 77 | onlyInA, onlyInB, inBoth := core.CompareFiles(linesA, linesB, strictMode) 78 | if cmpMode < 1 || cmpMode > 3 { 79 | logger.Fatalf("cmpMode value must between 1-3, you pass: %v", cmpMode) 80 | } 81 | if cmpMode == 1 { 82 | for _, line := range onlyInA { 83 | if line != "" { 84 | fmt.Println(line) 85 | } 86 | } 87 | } 88 | if cmpMode == 2 { 89 | for _, line := range onlyInB { 90 | if line != "" { 91 | fmt.Println(line) 92 | } 93 | } 94 | } 95 | if cmpMode == 3 { 96 | for _, line := range inBoth { 97 | if line != "" { 98 | fmt.Println(line) 99 | } 100 | } 101 | } 102 | } else { 103 | fmt.Println("Missing enough params ......") 104 | fmt.Printf("Usage: %v\t%s cmp a.txt b.txt -M [1/2/3]%v", NewLine, vars.TOOLNAME, NewLine) 105 | } 106 | }, 107 | } 108 | 109 | var deduCmd = &cobra.Command{ 110 | Use: "dedu", 111 | Short: "De-duplicated lines", 112 | Long: `De-duplicated lines Applying multiple heuristics techniques`, 113 | Run: func(cmd *cobra.Command, args []string) { 114 | fileStdin, _ := handleStdin(file) 115 | defer func() { 116 | if err := fileStdin.Close(); err != nil { 117 | logger.Fatal(err) 118 | } 119 | }() 120 | reader := bufio.NewReader(fileStdin) 121 | scanner := bufio.NewScanner(reader) 122 | buf := make([]byte, 0, 64*1024) 123 | scanner.Buffer(buf, MaxTokenSize) 124 | dr := core.NewDuplicateRemover(threshold, smart) 125 | for scanner.Scan() { 126 | line := scanner.Text() 127 | rResult := dr.RemoveDuplicator(line) 128 | if rResult != "" { 129 | fmt.Println(rResult) 130 | } 131 | } 132 | }, 133 | } 134 | 135 | var docCmd = &cobra.Command{ 136 | Use: "doc", 137 | Short: "Covert xlsx/xls data to plain text", 138 | Long: "Extract plain text from xlsx or xls file quickly then output to stdin or file stream", 139 | Run: func(cmd *cobra.Command, args []string) { 140 | if len(args) == 1 { 141 | filePath := args[0] 142 | binData, err := os.ReadFile(filePath) 143 | if err != nil { 144 | logger.Fatal(err) 145 | } 146 | reader, err := core.NewReader(binData) 147 | if err != nil { 148 | logger.Fatal(err) 149 | } 150 | fmt.Print(reader.Read()) 151 | } else { 152 | fmt.Println("Missing enough params ......") 153 | fmt.Printf("Usage: %v\t%s xlsx 1.xls%v", NewLine, vars.TOOLNAME, NewLine) 154 | fmt.Printf("\t%s xlsx 2.xlsx%v", vars.TOOLNAME, NewLine) 155 | } 156 | }, 157 | } 158 | 159 | func init() { 160 | // try other style to parse params 161 | // 尝试使用不同的风格命令参数获取 162 | grepCmd.Flags().StringVarP(&pattern, "pattern", "P", "", vars.GrepPatternHelpEn) 163 | grepCmd.Flags().BoolVarP(&inverseMatch, "inverse-match", "v", false, vars.InverseMatchHelpEn) 164 | grepCmd.SetUsageTemplate(usageTemplate) 165 | grepCmd.SetHelpTemplate(helpTemplate) 166 | grepCmd.Flags().SortFlags = false 167 | // compare two file and match different mode result 168 | // 比较文件并匹配不同模式的结果 169 | diffCmd.Flags().IntVarP(&cmpMode, "mode", "M", 3, vars.DiffCmdHelpEn) 170 | diffCmd.Flags().BoolVarP(&strictMode, "strict", "", false, vars.StrictModeHelpEn) 171 | diffCmd.SetUsageTemplate(usageTemplate) 172 | diffCmd.SetHelpTemplate(helpTemplate) 173 | diffCmd.Flags().SortFlags = false 174 | // de-duplicated lines 175 | // 去重复行 176 | deduCmd.Flags().BoolVarP(&smart, "smart", "", false, vars.SmartHelpEn) 177 | deduCmd.Flags().IntVarP(&threshold, "threshold", "t", 15, vars.ThresholdHelpEn) 178 | deduCmd.SetUsageTemplate(usageTemplate) 179 | deduCmd.SetHelpTemplate(deduHelpTemplate) 180 | deduCmd.Flags().SortFlags = false 181 | // parse xlsx file 182 | // 解析 xlsx 文件 183 | //xlsxCmd.SetUsageTemplate(usageTemplate) 184 | docCmd.SetHelpTemplate(helpTemplate) 185 | docCmd.Flags().SortFlags = false 186 | // add to root command 187 | // 添加到 主命令 188 | rootCmd.AddCommand(docCmd) 189 | rootCmd.AddCommand(deduCmd) 190 | rootCmd.AddCommand(diffCmd) 191 | rootCmd.AddCommand(grepCmd) 192 | rootCmd.AddCommand(versionCmd) 193 | } 194 | -------------------------------------------------------------------------------- /cmd/cobra.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | // Help template defines the format of the help message. 4 | var helpTemplate = `{{.Short| trim}} 5 | {{.Long| trim}} 6 | 7 | Usage: 8 | {{.CommandPath}} [params] [flags] 9 | {{if .Runnable}} 10 | Flags: 11 | {{.LocalFlags.FlagUsages | trimTrailingWhitespaces}}{{end}} 12 | 13 | Global Flags: 14 | -o, --output string Specifies the output file path. 15 | ` 16 | 17 | // Usage template defines the format of the usage message. 18 | var usageTemplate = `Usage: {{.CommandPath}} [flags]` 19 | 20 | var deduHelpTemplate = `{{.Short| trim}} 21 | {{.Long| trim}} 22 | 23 | Usage: 24 | {{.CommandPath}} [flags] 25 | {{if .Runnable}} 26 | Flags: 27 | {{.LocalFlags.FlagUsages | trimTrailingWhitespaces}}{{end}} 28 | 29 | Global Flags: 30 | -o, --output string Specifies the output file path. 31 | ` 32 | -------------------------------------------------------------------------------- /cmd/output.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "github.com/mstxq17/MoreFind/core" 6 | "github.com/mstxq17/MoreFind/errx" 7 | "os" 8 | "sync" 9 | ) 10 | 11 | var NewLine = core.NewLine() 12 | 13 | // try for refactor in future 14 | // 为以后统一输出做铺垫 15 | func syncOutput(wg *sync.WaitGroup, outputchan chan string) { 16 | // 任务完成,增加计数 17 | defer wg.Done() 18 | var f *os.File 19 | if output != "" { 20 | var err error 21 | f, err = os.Create(output) 22 | if err != nil { 23 | logger.Fatal(errx.NewWithMsgf(err, "Could not create output file '%s':", file)) 24 | } 25 | defer f.Close() 26 | } 27 | for o := range outputchan { 28 | if o == "" { 29 | continue 30 | } 31 | // output to stdout & file stream 32 | // 输出到 stdout & 文件流 33 | if len(myIPFormats) > 0 { 34 | outputItems(f, core.AlterIP(o, myIPFormats)...) 35 | } else { 36 | outputItems(f, o) 37 | } 38 | } 39 | } 40 | 41 | func outputItems(f *os.File, items ...string) { 42 | for _, item := range items { 43 | // 增加安静模式 44 | if !myQuiet { 45 | fmt.Print(item + NewLine) 46 | } 47 | if f != nil { 48 | _, _ = f.WriteString(item + NewLine) 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /cmd/patch.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "net/url" 5 | "regexp" 6 | "strings" 7 | ) 8 | 9 | func deepMakeUrl(checkUrl string) string { 10 | _, err := url.PathUnescape(checkUrl) 11 | if err != nil { 12 | errStr := err.Error() 13 | re := regexp.MustCompile(`invalid URL escape "(.+)"`) 14 | subString := re.FindStringSubmatch(errStr)[1] 15 | idx := strings.LastIndex(checkUrl, subString) 16 | if idx >= 0 { 17 | checkUrl = checkUrl[:idx] + "" + checkUrl[idx+len(subString):] 18 | } 19 | return deepMakeUrl(checkUrl) 20 | } 21 | return checkUrl 22 | } 23 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "github.com/cheggaaa/pb/v3" 7 | "github.com/mstxq17/MoreFind/core" 8 | "github.com/mstxq17/MoreFind/core/extract" 9 | "github.com/mstxq17/MoreFind/core/utils/uniqueutil" 10 | "github.com/mstxq17/MoreFind/update" 11 | "github.com/mstxq17/MoreFind/vars" 12 | "github.com/spf13/cobra" 13 | "golang.org/x/net/publicsuffix" 14 | "log" 15 | "mvdan.cc/xurls/v2" 16 | "net" 17 | "net/url" 18 | "os" 19 | "regexp" 20 | "sort" 21 | "strconv" 22 | "strings" 23 | "sync" 24 | ) 25 | 26 | const ( 27 | MaxTokenSize = 512 * 1024 * 1024 28 | ) 29 | 30 | var logger *log.Logger 31 | 32 | // IPAndPort define custom struct 33 | // 自定义一个结构体 34 | type IPAndPort struct { 35 | IP string 36 | Port string 37 | } 38 | 39 | // KeyValue sorted structure 40 | // 自定义一个排序结构体 41 | type KeyValue struct { 42 | Key string 43 | Value int 44 | } 45 | 46 | type ErrorCallback func() *log.Logger 47 | 48 | func isIPAddr(domain string) bool { 49 | ipaddr := net.ParseIP(domain) 50 | return ipaddr != nil 51 | } 52 | 53 | func isPrivateIP(line string) bool { 54 | // update regex pattern to match loopback and private ip 55 | // 更新正则表达式模式以匹配环回和私有IP 56 | //var iIRegex = regexp.MustCompile("^(10.\\d{1,3}.\\d{1,3}.((0/([89]|1[0-9]|2\\d|3[012]))|(\\d{1,3})))|(172.(1[6789]|2\\d|3[01]).\\d{1,3}.\\d{1,3}(/(1[6789]|2\\d|3[012]))?)|(192.168.\\d{1,3}.\\d{1,3}(/(1[6789]|2\\d|3[012]))?)$") 57 | var iIRegex = regexp.MustCompile("^(127\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}|10\\.\\d{1,3}\\.\\d{1,3}\\.((0/([89]|1[0-9]|2\\d|3[012]))|(\\d{1,3}))|172\\.(1[6789]|2\\d|3[01])\\.\\d{1,3}\\.\\d{1,3}(/(1[6789]|2\\d|3[012]))?|192\\.168\\.\\d{1,3}\\.\\d{1,3}(/(1[6789]|2\\d|3[012]))?)$") 58 | return iIRegex.MatchString(line) 59 | } 60 | 61 | func searchUrl(line string) []string { 62 | rxRelaxed := xurls.Relaxed() 63 | result := rxRelaxed.FindAllString(line, -1) 64 | return result 65 | } 66 | 67 | func searchDomain(line string, rootDomain bool) (string, string) { 68 | /** 69 | 匹配域名并输出 70 | match domain format and output 71 | */ 72 | line = strings.TrimSpace(line) 73 | if strings.HasPrefix(line, "http") == false { 74 | line = "https://" + line 75 | } 76 | // bug fix #2 77 | // 修复issue #2 78 | _, exists := os.LookupEnv("hiddenDev") 79 | if !exists { 80 | line = deepMakeUrl(line) 81 | } 82 | u, err := url.Parse(line) 83 | if err != nil { 84 | // 直接抛出错误 85 | logger.Println(err) 86 | return "", "" 87 | } 88 | domain := u.Hostname() 89 | port := u.Port() 90 | // match the domain strictly 91 | // 严格匹配域名格式 92 | index := strings.Index(domain, ",") 93 | // 修复存在逗号的bug 94 | // patch the bug it contains comma 95 | if index >= 0 { 96 | domain = domain[:index] 97 | } 98 | if isIPAddr(domain) { 99 | return port, domain 100 | } 101 | if rootDomain { 102 | return port, searchRootDomain(domain) 103 | } else { 104 | return port, domain 105 | } 106 | } 107 | 108 | // Reference:https://pkg.go.dev/golang.org/x/net/publicsuffix 109 | /* 110 | Description: search the eTLD + 1(rootDomain) from the completed domain 111 | param domain: completed domain 112 | return: rootDomain 113 | */ 114 | func searchRootDomain(domain string) string { 115 | eTLD, _ := publicsuffix.EffectiveTLDPlusOne(domain) 116 | return eTLD 117 | } 118 | 119 | func searchIp(line string) []IPAndPort { 120 | // only support ipv4, ipv6 will be supported in future 121 | //var ipRegex = regexp.MustCompile("((?:(?:25[0-5]|2[0-4]\\d|((1\\d{2})|([1-9]?\\d)))\\.){3}(?:25[0-5]|2[0-4]\\d|((1\\d{2})|([1-9]?\\d))))") 122 | ipPortRegex := regexp.MustCompile(`((?:(?:25[0-5]|2[0-4]\d|((1\d{2})|([1-9]?\d)))\.){3}(?:25[0-5]|2[0-4]\d|((1\d{2})|([1-9]?\d))))(:\d{1,5})?`) 123 | matches := ipPortRegex.FindAllStringSubmatch(line, -1) 124 | // store entries of result 125 | // 保存多个结果 126 | var result []IPAndPort 127 | for _, match := range matches { 128 | ip := match[1] 129 | port := match[8] 130 | if port != "" { 131 | port = port[1:] 132 | } 133 | entry := IPAndPort{IP: ip, Port: port} 134 | result = append(result, entry) 135 | } 136 | return result 137 | } 138 | 139 | func filterLen(lenRange string) (int, int) { 140 | standardPattern := regexp.MustCompile("^\\d+-\\d+$") 141 | oneIntPattern := regexp.MustCompile("^\\d+$") 142 | if standardPattern.MatchString(lenRange) { 143 | splitRes := strings.Split(lenRange, "-") 144 | minLength, _ := strconv.Atoi(splitRes[0]) 145 | maxLength, _ := strconv.Atoi(splitRes[1]) 146 | return minLength, maxLength 147 | } else if oneIntPattern.MatchString(lenRange) { 148 | maxLength, _ := strconv.Atoi(lenRange) 149 | return 0, maxLength 150 | } else { 151 | logger.Fatal("len Range Invalid, format should be 'min-max', ex 0-100") 152 | return 0, 0 153 | } 154 | } 155 | 156 | // the below two function can be merged and optimized 157 | // 下面两个函数可以根据运行结构,将url解析那一部分抽象出来统一调用 158 | func filterExt(_url string, filterExts string) bool { 159 | fileExt := fileExt(_url) 160 | _exts := strings.Split(filterExts, ",") 161 | // for improve the filtering speed, reducing the comparative work,use map 162 | // 为了提高速度,减少比较,使用map来判断 163 | extMap := map[string]int{} 164 | for _, suffix := range _exts { 165 | // convert to lowercase uniformly 166 | // 统一小写 167 | suffix = strings.TrimSpace(suffix) 168 | suffix = strings.ToLower(suffix) 169 | extMap[suffix] = 1 170 | } 171 | if _, ok := extMap[fileExt]; ok { 172 | return true 173 | } else { 174 | return false 175 | } 176 | } 177 | 178 | func fileExt(_url string) string { 179 | // bug fix #2 180 | // 修复issue #2 181 | _, exists := os.LookupEnv("hiddenDev") 182 | if !exists { 183 | _url = deepMakeUrl(_url) 184 | } 185 | u, err := url.Parse(_url) 186 | if err != nil { 187 | // ignore the exception for preventing from blocking next line 188 | // 忽略异常防止阻塞下一行的处理 189 | logger.Println(err) 190 | } 191 | part := strings.Split(u.Path, "/") 192 | fileName := part[len(part)-1] 193 | if strings.Contains(fileName, ".") { 194 | filePart := strings.Split(fileName, ".") 195 | // convert to lowercase 196 | // 统一转换为小写 197 | return strings.ToLower(filePart[len(filePart)-1]) 198 | } else { 199 | return "" 200 | } 201 | } 202 | 203 | func handleStdin(file string) (*os.File, os.FileInfo) { 204 | var _file *os.File 205 | if file != "" { 206 | var err error 207 | _file, err = os.Open(file) 208 | if err != nil { 209 | panic(err) 210 | } 211 | } else { 212 | _file = os.Stdin 213 | } 214 | // use features to solve whether has input 215 | // 利用特性解决程序是否有输入的问题 216 | fi, _ := _file.Stat() 217 | if (fi.Mode() & os.ModeCharDevice) != 0 { 218 | logger.Println("No input found, exit ...") 219 | // optimize exit logic 220 | // 优化退出逻辑 221 | os.Exit(0) 222 | } 223 | return _file, fi 224 | } 225 | 226 | func updateCommand(cmd *cobra.Command, args []string) { 227 | callBackError := func() *log.Logger { 228 | return logger 229 | } 230 | if myUpdate { 231 | update.GetUpdateToolCallback(vars.TOOLNAME, vars.VERSION, callBackError)() 232 | } 233 | } 234 | 235 | func preCommand(cmd *cobra.Command, args []string) bool { 236 | // 输出 237 | outputchan := make(chan string) 238 | var wg sync.WaitGroup 239 | wg.Add(1) 240 | go syncOutput(&wg, outputchan) 241 | // if cidr flag be selected,deal with it first 242 | // 如果选择 cidr 参数,首先处理它 243 | if myCidr != "" && myCidr != "__pipe__" { 244 | err := core.GenIP(myCidr, outputchan) 245 | if err != nil { 246 | logger.Println(err) 247 | } 248 | close(outputchan) 249 | wg.Wait() 250 | return true 251 | } else { 252 | close(outputchan) 253 | wg.Wait() 254 | return false 255 | } 256 | } 257 | 258 | func runCommand(cmd *cobra.Command, args []string) { 259 | // unified data stream 260 | // 统一数据流 261 | _file, fi := handleStdin(file) 262 | // prevent memory leaking 263 | // 防止内存泄漏 264 | defer func() { 265 | if err := _file.Close(); err != nil { 266 | log.Fatal(err) 267 | } 268 | }() 269 | // define global reader of input 270 | // 定义全局输入读取流 271 | var scanner *bufio.Scanner 272 | if myProgress { 273 | bar := pb.Full.Start64(fi.Size()) 274 | defer func() { 275 | bar.Finish() 276 | }() 277 | reader := bar.NewProxyReader(bufio.NewReader(_file)) 278 | scanner = bufio.NewScanner(reader) 279 | } else { 280 | reader := bufio.NewReader(_file) 281 | scanner = bufio.NewScanner(reader) 282 | } 283 | buf := make([]byte, 0, 64*1024) 284 | // support maximum 512MB buffer every line & support set maximum size through env, unit is MB 285 | // 支持最大读取单行 512MB 大小 & 支持环境变量设置更大值,单位/MB 286 | scanner.Buffer(buf, core.GetEnvOrDefault("MaxTokenSize", MaxTokenSize, 1024*1024)) 287 | // 输出 288 | outputchan := make(chan string) 289 | var wg sync.WaitGroup 290 | wg.Add(1) 291 | go syncOutput(&wg, outputchan) 292 | // todo: current structure may be chaotic, should abstract the handle process 293 | if myCidr == "__pipe__" { 294 | for scanner.Scan() { 295 | line := strings.TrimSpace(scanner.Text()) 296 | err := core.GenIP(line, outputchan) 297 | if err != nil { 298 | logger.Println(err) 299 | } 300 | } 301 | } 302 | if myUrl == false && myDomain == false && myIp == false { 303 | if myShow == true { 304 | count := 0 305 | maxLength := 0 306 | minLength := 0 307 | first := true 308 | countDeduItems := make(map[string]int) 309 | // 存储统计的数据 310 | for scanner.Scan() { 311 | line := scanner.Text() 312 | lineLength := strconv.Itoa(len(line)) 313 | if len(line) > maxLength { 314 | maxLength = len(line) 315 | } 316 | if len(line) > 0 && first == true { 317 | minLength = len(line) 318 | first = false 319 | } 320 | if len(line) < minLength && first == false { 321 | minLength = len(line) 322 | } 323 | stripLine := strings.TrimSpace(line) 324 | count++ 325 | countDeduItems[stripLine]++ 326 | outputLine := fmt.Sprintf("%-5d Len:%-6s\t%s", count, lineLength, line) 327 | outputchan <- outputLine 328 | } 329 | splitPadding := "===================================================" 330 | outputchan <- splitPadding 331 | // show duplicate item count 332 | // 统计重复项的次数 333 | var keyValuePairs []KeyValue 334 | for key, value := range countDeduItems { 335 | keyValuePairs = append(keyValuePairs, KeyValue{Key: key, Value: value}) 336 | } 337 | // 使用自定义排序函数对切片进行排序 338 | sort.Slice(keyValuePairs, func(i, j int) bool { 339 | return keyValuePairs[i].Value > keyValuePairs[j].Value 340 | }) 341 | for index, kv := range keyValuePairs { 342 | outputLine := fmt.Sprintf("%-5d Num:%-6d\t%s", index, kv.Value, kv.Key) 343 | outputchan <- outputLine 344 | } 345 | outputchan <- splitPadding 346 | summaryTotal := fmt.Sprintf("CountLine: %d MaxLength: %d, MinLength: %d", count, maxLength, minLength) 347 | outputchan <- summaryTotal 348 | } 349 | if myLimitLen != "" { 350 | minLen, maxLen := filterLen(myLimitLen) 351 | for scanner.Scan() { 352 | line := strings.TrimSpace(scanner.Text()) 353 | if minLen <= len(line) && len(line) <= maxLen { 354 | outputchan <- line 355 | } 356 | } 357 | } 358 | } 359 | if myUrl == false && myDomain == false && myIp == false && myLink == false { 360 | myUrl = true 361 | } 362 | var urlList []string 363 | var domainList []string 364 | var ipList []string 365 | // remove duplicated url 366 | // 去除重复的url 367 | found := make(map[string]struct{}) 368 | // define stream myself 369 | // 定义自己的输出流 370 | var outputBuffer *core.MyBuffer 371 | var customStringHandler core.CustomStringHandler 372 | if myRule != "" { 373 | outputBuffer = core.NewMyBuffer(true) 374 | customStringHandler.Strategy = 1 375 | customStringHandler.Rule = myRule 376 | customStringHandler.Flag = myFlag 377 | } else { 378 | outputBuffer = core.NewMyBuffer(false) 379 | customStringHandler.Strategy = 0 380 | } 381 | for scanner.Scan() { 382 | line := strings.TrimSpace(scanner.Text()) 383 | if myUrl == true || myDomain == true || myLink == true { 384 | searchUrl := searchUrl(line) 385 | for _, _url := range searchUrl { 386 | trimUrl := strings.TrimSpace(_url) 387 | if myLink == true { 388 | simpleUrl, hasSchema, err := extract.SimpleUrl(trimUrl) 389 | if err != nil { 390 | logger.Printf("handle: %v Err: %v \n", trimUrl, err) 391 | continue 392 | } 393 | if mySchema != "" && !hasSchema { 394 | // 写入输出 395 | simpleUrl = fmt.Sprintf("%v://%v", mySchema, simpleUrl) 396 | if uniqueutil.IsKeyUniq(simpleUrl, found) { 397 | outputBuffer.WriteString(simpleUrl, &customStringHandler) 398 | found[simpleUrl] = struct{}{} 399 | outputchan <- outputBuffer.TempString 400 | } 401 | } else { 402 | // 写入输出 403 | if uniqueutil.IsKeyUniq(simpleUrl, found) { 404 | outputBuffer.WriteString(simpleUrl, &customStringHandler) 405 | found[simpleUrl] = struct{}{} 406 | outputchan <- outputBuffer.TempString 407 | } 408 | } 409 | } 410 | 411 | if myUrl == true { 412 | if output != "" { 413 | urlList = append(urlList, trimUrl) 414 | } 415 | // remove repeated string 416 | if _, ok := found[trimUrl]; !ok { 417 | if myUrlFilter != "" { 418 | if !filterExt(_url, myUrlFilter) { 419 | outputBuffer.WriteString(_url, &customStringHandler) 420 | found[_url] = struct{}{} 421 | } 422 | } else { 423 | outputBuffer.WriteString(_url, &customStringHandler) 424 | found[_url] = struct{}{} 425 | } 426 | outputchan <- outputBuffer.TempString 427 | } 428 | } 429 | if myDomain == true { 430 | port, _domain := searchDomain(trimUrl, myRootDomain) 431 | if _domain == "" || isIPAddr(_domain) { 432 | continue 433 | } 434 | if myWithPort { 435 | if port != "" { 436 | _domain = _domain + ":" + port 437 | } 438 | } 439 | if output != "" { 440 | domainList = append(domainList, _domain) 441 | } 442 | // remove repeated string 443 | if _, ok := found[_domain]; !ok { 444 | outputBuffer.WriteString(_domain, &customStringHandler) 445 | found[_domain] = struct{}{} 446 | outputchan <- outputBuffer.TempString 447 | } 448 | } 449 | } 450 | } 451 | if myIp == true { 452 | searchIp := searchIp(line) 453 | for _, ipps := range searchIp { 454 | ipWithPort := ipps.IP 455 | if myWithPort && ipps.Port != "" { 456 | ipWithPort = ipps.IP + ":" + ipps.Port 457 | } 458 | if output != "" { 459 | ipList = append(ipList, ipWithPort) 460 | } 461 | // remove repeated string 462 | // 删除重复的行 463 | if _, ok := found[ipWithPort]; !ok { 464 | if myPrivateIp == true { 465 | if isPrivateIP(ipWithPort) == false { 466 | outputBuffer.WriteString(ipWithPort, &customStringHandler) 467 | found[ipWithPort] = struct{}{} 468 | } 469 | } else { 470 | outputBuffer.WriteString(ipWithPort, &customStringHandler) 471 | found[ipWithPort] = struct{}{} 472 | } 473 | outputchan <- outputBuffer.TempString 474 | } 475 | } 476 | } 477 | outputBuffer.Reset() 478 | } 479 | // maybe exceed maxTokenSize length 480 | if err := scanner.Err(); err != nil { 481 | logger.Println(err) 482 | } 483 | close(outputchan) 484 | wg.Wait() 485 | } 486 | 487 | var ( 488 | file string 489 | output string 490 | myUrl bool 491 | myDomain bool 492 | myRootDomain bool 493 | myWithPort bool 494 | myIp bool 495 | myPrivateIp bool 496 | myLimitLen string 497 | myShow bool 498 | myUrlFilter string 499 | myCidr string 500 | myRule string 501 | myFlag string 502 | myProgress bool 503 | myUpdate bool 504 | myQuiet bool 505 | myLink bool 506 | mySchema string 507 | myXlsx string 508 | myIPFormats []string 509 | rootCmd = &cobra.Command{ 510 | Use: "morefind", 511 | Short: "MoreFind is a very rapid script for extracting URL、Domain and Ip from data stream", 512 | Long: "", 513 | 514 | Run: func(cmd *cobra.Command, args []string) { 515 | // run high priority command first 516 | // 先执行优先级高的命令,如更新命令 517 | updateCommand(cmd, args) 518 | // 若 preCommand 返回 true,表示命令执行成功,直接返回 519 | if preCommand(cmd, args) { 520 | return 521 | } 522 | // 如果 preCommand 返回 false,继续执行 runCommand 523 | runCommand(cmd, args) 524 | }, 525 | } 526 | ) 527 | 528 | func Execute() { 529 | if err := rootCmd.Execute(); err != nil { 530 | _, err := fmt.Fprintln(os.Stderr, err) 531 | if err != nil { 532 | return 533 | } 534 | os.Exit(1) 535 | } 536 | } 537 | 538 | func init() { 539 | // set flag for global logger in init func 540 | // 在 init 函数中创建全局 logger 并设置标志 541 | logger = log.New(os.Stderr, "", log.Ldate|log.Ltime|log.Lshortfile) 542 | // reduce the amount of calling function 543 | // 减少函数调用次数 544 | NewLine = core.NewLine() 545 | rootCmd.PersistentFlags().StringVarP(&file, "file", "f", "", vars.FileHelpEn) 546 | rootCmd.PersistentFlags().StringVarP(&output, "output", "o", "", vars.OutputHelpEn) 547 | rootCmd.PersistentFlags().BoolVarP(&myLink, "link", "k", false, vars.TargetHelpEn) 548 | rootCmd.PersistentFlags().StringVarP(&mySchema, "schema", "", "", vars.SchemaHelpEn) 549 | rootCmd.PersistentFlags().Lookup("schema").NoOptDefVal = "" 550 | rootCmd.PersistentFlags().BoolVarP(&myIp, "ip", "i", false, vars.IPHelpEn) 551 | rootCmd.PersistentFlags().BoolVarP(&myPrivateIp, "exclude", "", false, vars.ExcludeHelpEn) 552 | rootCmd.PersistentFlags().BoolVarP(&myDomain, "domain", "d", false, vars.DomainHelpEn) 553 | rootCmd.PersistentFlags().BoolVarP(&myRootDomain, "root", "", false, vars.RootDomainHelpEn) 554 | rootCmd.PersistentFlags().BoolVarP(&myWithPort, "port", "p", false, vars.WithPortHelpEn) 555 | rootCmd.PersistentFlags().StringVarP(&myRule, "rule", "r", "", vars.RuleHelpEn) 556 | rootCmd.PersistentFlags().StringVarP(&myFlag, "flag", "", "{}", vars.FlagHelpEn) 557 | rootCmd.PersistentFlags().BoolVarP(&myUrl, "url", "u", false, vars.URLHelpEn) 558 | rootCmd.PersistentFlags().StringVarP(&myUrlFilter, "filter", "", "", vars.URLFilterHelpEn) 559 | // this trick occurs from https://stackoverflow.com/questions/70182858/how-to-create-flag-with-or-without-argument-in-golang-using-cobra 560 | // help me a lot, so log it in the code, google dork: "flag needs an argument: cobra" 561 | // 感谢 https://stackoverflow.com/questions/70182858/how-to-create-flag-with-or-without-argument-in-golang-using-cobra 提供了如何解决--filter 默认参数的问题 562 | rootCmd.PersistentFlags().Lookup("filter").NoOptDefVal = "js,css,json,png,jpg,html,xml,zip,rar" 563 | rootCmd.PersistentFlags().StringVarP(&myCidr, "cidr", "c", "", vars.CidrHelpEn) 564 | rootCmd.PersistentFlags().StringSliceVarP(&myIPFormats, "alter", "a", nil, vars.AlterHelpEn) 565 | rootCmd.PersistentFlags().Lookup("cidr").NoOptDefVal = "__pipe__" 566 | rootCmd.PersistentFlags().StringVarP(&myLimitLen, "len", "l", "", vars.LimitLenHelpEn) 567 | rootCmd.PersistentFlags().BoolVarP(&myShow, "show", "s", false, vars.ShowHelpEn) 568 | rootCmd.PersistentFlags().BoolVarP(&myProgress, "metric", "m", false, vars.ProgressHelpEn) 569 | rootCmd.PersistentFlags().BoolVarP(&myQuiet, "quiet", "q", false, vars.QuietHelpEn) 570 | rootCmd.PersistentFlags().BoolVarP(&myUpdate, "update", "U", false, vars.UpdateHelpEn) 571 | // Dont sorted flag lexicographically 572 | // 禁止排序参数,按代码定义顺序展示 573 | rootCmd.PersistentFlags().SortFlags = false 574 | rootCmd.Flags().SortFlags = false 575 | // Dont sorted subcommand lexicographically 576 | // 禁止子命令排序 577 | cobra.EnableCommandSorting = false 578 | } 579 | -------------------------------------------------------------------------------- /core/diff.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "bufio" 5 | "os" 6 | "sort" 7 | ) 8 | 9 | // 用于比较两个文件a、b每一行,根据需要提取以下情况 10 | // 1: a有的,b没有的行 11 | // 2: a没有的,b有的行 12 | // 3: a、b都有的行 13 | // 严格模式是逐行比较 14 | // 非严格模式是排序后逐行比较,默认 15 | 16 | func ReadLines(filename string) ([]string, error) { 17 | file, err := os.Open(filename) 18 | if err != nil { 19 | return nil, err 20 | } 21 | defer file.Close() 22 | 23 | var lines []string 24 | scanner := bufio.NewScanner(file) 25 | for scanner.Scan() { 26 | lines = append(lines, scanner.Text()) 27 | } 28 | return lines, scanner.Err() 29 | } 30 | 31 | func CompareFiles(a, b []string, strictMode bool) ([]string, []string, []string) { 32 | var onlyInA, onlyInB, inBoth []string 33 | 34 | if strictMode { 35 | // 严格模式:逐行比较 36 | lineACount := len(a) 37 | lineBCount := len(b) 38 | for index, lineA := range a { 39 | if index <= lineBCount-1 { 40 | if lineA != b[index] { 41 | onlyInA = append(onlyInA, lineA) 42 | } else { 43 | if lineACount > lineBCount { 44 | inBoth = append(inBoth, lineA) 45 | } 46 | } 47 | } else { 48 | onlyInA = append(onlyInA, lineA) 49 | } 50 | } 51 | for index, lineB := range b { 52 | if index <= lineACount-1 { 53 | if lineB != a[index] { 54 | onlyInB = append(onlyInA, lineB) 55 | } else { 56 | if lineBCount >= lineACount { 57 | inBoth = append(inBoth, lineB) 58 | } 59 | } 60 | } else { 61 | onlyInB = append(onlyInB, lineB) 62 | } 63 | } 64 | } else { 65 | // 非严格模式:排序后逐行比较 66 | sort.Strings(a) 67 | sort.Strings(b) 68 | tempMap := make(map[string]int8) 69 | for _, item := range a { 70 | tempMap[item] = 1 71 | } 72 | for _, item := range b { 73 | if tempMap[item] == 1 { 74 | tempMap[item] = 3 75 | } else { 76 | tempMap[item] = 2 77 | } 78 | } 79 | for value, flag := range tempMap { 80 | if flag == 1 { 81 | onlyInA = append(onlyInA, value) 82 | } 83 | if flag == 2 { 84 | onlyInB = append(onlyInB, value) 85 | } 86 | if flag == 3 { 87 | inBoth = append(inBoth, value) 88 | } 89 | } 90 | } 91 | return onlyInA, onlyInB, inBoth 92 | } 93 | -------------------------------------------------------------------------------- /core/duplicate.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "regexp" 5 | ) 6 | 7 | const ( 8 | AlphanumericOtherMixed = "{ALPHANUMERIC_OTHER_MIXED}" 9 | PureNumber = "{PURE_NUMBER}" 10 | PureChinese = "{PURE_CHINESE}" 11 | ) 12 | 13 | var Filters = map[string]string{ 14 | AlphanumericOtherMixed: `[0-9A-Za-z_-]{8,}`, 15 | PureNumber: `[0-9]{2,7}`, 16 | PureChinese: `[\p{Han}]{2,}`, 17 | } 18 | 19 | // OrderFilters distribute filter order is required because of unordered map 20 | // OrderFilters 组织好过滤顺序是必须的,解决map的无序问题 21 | var OrderFilters = []string{ 22 | AlphanumericOtherMixed, 23 | PureNumber, 24 | PureChinese, 25 | } 26 | 27 | type DuplicateRemover struct { 28 | linesMap map[string]struct{} 29 | linesCount map[string]int 30 | threshold int 31 | smart bool 32 | ANRegexp map[string]*regexp.Regexp 33 | } 34 | 35 | func NewDuplicateRemover(threshold int, smart bool) *DuplicateRemover { 36 | dr := &DuplicateRemover{ 37 | linesMap: make(map[string]struct{}), 38 | linesCount: make(map[string]int), 39 | threshold: threshold, 40 | smart: smart, 41 | } 42 | // some design problems 43 | // 设计存在问题 44 | dr.ANRegexp, _ = func() (map[string]*regexp.Regexp, error) { 45 | ANRegexp := make(map[string]*regexp.Regexp) 46 | for key, value := range Filters { 47 | ANRegexp[key] = regexp.MustCompile(value) 48 | } 49 | return ANRegexp, nil 50 | }() 51 | return dr 52 | } 53 | 54 | func (dr *DuplicateRemover) RemoveDuplicator(line string) string { 55 | if dr.smart { 56 | gResult := dr.generalize(line) 57 | if _, exists := dr.linesMap[line]; !exists { 58 | dr.linesMap[line] = struct{}{} 59 | dr.linesCount[gResult] += 1 60 | if dr.linesCount[gResult] <= dr.threshold { 61 | return line 62 | } 63 | } 64 | } else { 65 | if _, exists := dr.linesMap[line]; !exists { 66 | dr.linesMap[line] = struct{}{} 67 | return line 68 | } 69 | } 70 | return "" 71 | } 72 | 73 | // 将正则 [0-9A-Za-z]{10,} 一般化,超过阈值则进行智能过滤 74 | func (dr *DuplicateRemover) generalize(line string) string { 75 | for _, key := range OrderFilters { 76 | line = dr.ANRegexp[key].ReplaceAllString(line, key) 77 | } 78 | return line 79 | } 80 | -------------------------------------------------------------------------------- /core/duplicate_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "github.com/stretchr/testify/require" 5 | "testing" 6 | ) 7 | 8 | func TestRemoveDuplicator(t *testing.T) { 9 | var testCase = []string{ 10 | "http://baidu.com/?a=中国你好呀", 11 | "http://baidu.com/?a=中国你好呀哈哈", 12 | } 13 | dr := NewDuplicateRemover(1, true) 14 | for _, line := range testCase { 15 | afterLine := dr.RemoveDuplicator(line) 16 | require.NotEqualValues(t, "http://baidu.com/?a=中国你好呀哈哈", afterLine) 17 | } 18 | } 19 | 20 | func TestCompareUtf8(t *testing.T) { 21 | sh := &SimHash{} 22 | var testCase = []struct { 23 | s1 string 24 | s2 string 25 | expected int64 26 | }{ 27 | {"w我三个字", "我三个字", 0}, 28 | } 29 | for _, tc := range testCase { 30 | result := sh.compareUtf8(tc.s1, tc.s2) 31 | require.Equal(t, tc.expected, result, "Test Failed") 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /core/extract/simple_url.go: -------------------------------------------------------------------------------- 1 | package extract 2 | 3 | import ( 4 | "fmt" 5 | "net/url" 6 | "strings" 7 | ) 8 | 9 | func SimpleUrl(input string) (string, bool, error) { 10 | var hasSchema bool 11 | if strings.Contains(input, "://") { 12 | hasSchema = true 13 | } else { 14 | hasSchema = false 15 | input = fmt.Sprintf("http://%s", input) 16 | } 17 | parsed, err := url.Parse(input) 18 | if err != nil { 19 | return "", false, err 20 | } 21 | if hasSchema == true { 22 | tSimpleUrl := fmt.Sprintf("%s://%s", parsed.Scheme, parsed.Host) 23 | return tSimpleUrl, true, nil 24 | } else { 25 | // 拼接协议进行解析 26 | if err != nil { 27 | return "", false, err 28 | } else { 29 | tSimpleUrl := fmt.Sprintf("%s", parsed.Host) 30 | return tSimpleUrl, false, nil 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /core/extract/simple_url_test.go: -------------------------------------------------------------------------------- 1 | package extract 2 | 3 | import "testing" 4 | 5 | // Test cases for SimpleUrl 6 | func TestSimpleUrl(t *testing.T) { 7 | testCases := []struct { 8 | input string 9 | expected string 10 | hasScheme bool 11 | }{ 12 | {"http://example.com:8080", "http://example.com:8080", true}, 13 | {"http://example.com", "http://example.com", true}, 14 | {"https://example.com/path", "https://example.com", true}, 15 | {"ftp://example.com", "ftp://example.com", true}, 16 | {"example.com/123.php?a=1", "example.com", false}, 17 | {"example.com", "example.com", false}, 18 | {"http://", "http://", true}, // Invalid URL 19 | {"", "", false}, // Empty string 20 | } 21 | 22 | for _, tc := range testCases { 23 | result, hasScheme, err := SimpleUrl(tc.input) 24 | if err != nil { 25 | t.Errorf("input: %s, unexpected error: %v", tc.input, err) 26 | } 27 | if result != tc.expected { 28 | t.Errorf("input: %s, expected: %s, got: %s", tc.input, tc.expected, result) 29 | } 30 | if hasScheme != tc.hasScheme { 31 | t.Errorf("input: %s, expected hasScheme: %v, got: %v", tc.input, tc.hasScheme, hasScheme) 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /core/ip.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "encoding/hex" 5 | "fmt" 6 | "github.com/mstxq17/MoreFind/errx" 7 | "math/big" 8 | "net" 9 | "regexp" 10 | "strings" 11 | ) 12 | 13 | // AlterIP make some come true 14 | // AlterIP 只选取部分实现 15 | func AlterIP(ip string, formats []string) []string { 16 | var alteredIPs []string 17 | for _, format := range formats { 18 | standardIP := net.ParseIP(ip) 19 | switch format { 20 | case "1": 21 | alteredIPs = append(alteredIPs, standardIP.String()) 22 | case "2": 23 | // 0-optimized dotted-decimal notation 24 | // the 0 value segments of an IP address can be omitted (eg. 127.0.0.1 => 127.1) 25 | // regex for zeroes with dot 0000. 26 | var reZeroesWithDot = regexp.MustCompile(`(?m)[0]+\.`) 27 | // regex for .0000 28 | var reDotWithZeroes = regexp.MustCompile(`(?m)\.[0^]+$`) 29 | // suppress 0000. 30 | alteredIP := reZeroesWithDot.ReplaceAllString(standardIP.String(), "") 31 | // suppress .0000 32 | alteredIP = reDotWithZeroes.ReplaceAllString(alteredIP, "") 33 | alteredIPs = append(alteredIPs, alteredIP) 34 | case "3": 35 | // Octal notation (leading zeroes are required): 36 | // eg: 127.0.0.1 => 0177.0.0.01 37 | alteredIP := fmt.Sprintf("%#04o.%#o.%#o.%#o", standardIP[12], standardIP[13], standardIP[14], standardIP[15]) 38 | alteredIPs = append(alteredIPs, alteredIP) 39 | case "4": 40 | alteredIPWithDots := fmt.Sprintf("%#x.%#x.%#x.%#x", standardIP[12], standardIP[13], standardIP[14], standardIP[15]) 41 | alteredIPWithZeroX := fmt.Sprintf("0x%s", hex.EncodeToString(standardIP[12:])) 42 | alteredIPWithRandomPrefixHex, _ := RandomHex(5, standardIP[12:]) 43 | alteredIPWithRandomPrefix := fmt.Sprintf("0x%s", alteredIPWithRandomPrefixHex) 44 | alteredIPs = append(alteredIPs, alteredIPWithDots, alteredIPWithZeroX, alteredIPWithRandomPrefix) 45 | case "5": 46 | // Decimal notation a.k.a dword notation 47 | // 127.0.0.1 => 2130706433 48 | bigIP, _, _ := IPToInteger(standardIP) 49 | alteredIPs = append(alteredIPs, bigIP.String()) 50 | case "6": 51 | // Binary notation# 52 | // 127.0.0.1 => 01111111000000000000000000000001 53 | // converts to int 54 | bigIP, _, _ := IPToInteger(standardIP) 55 | // then to binary 56 | alteredIP := fmt.Sprintf("%b", bigIP) 57 | alteredIPs = append(alteredIPs, alteredIP) 58 | case "7": 59 | // Mixed notation 60 | // Ipv4 only 61 | alteredIP := fmt.Sprintf("%#x.%d.%#o.%#x", standardIP[12], standardIP[13], standardIP[14], standardIP[15]) 62 | alteredIPs = append(alteredIPs, alteredIP) 63 | case "8": 64 | // URL-encoded IP address 65 | // 127.0.0.1 => %31%32%37%2E%30%2E%30%2E%31 66 | // ::1 => %3A%3A%31 67 | alteredIP := Escape(ip) 68 | alteredIPs = append(alteredIPs, alteredIP) 69 | } 70 | } 71 | return alteredIPs 72 | } 73 | 74 | func GenIP(cidr string, outputchan chan string) error { 75 | // fix parse error because of \n in window env 76 | // 修复 window 因为多了换行符导致的错误 77 | cidr = strings.TrimSpace(cidr) 78 | var _error error 79 | if strings.Contains(cidr, "/") { 80 | ip, ipnet, err := net.ParseCIDR(cidr) 81 | if err != nil { 82 | _error = errx.NewMsgf("无法解析CIDR地址: %v", cidr) 83 | return _error 84 | } else { 85 | for sip := ip.Mask(ipnet.Mask); ipnet.Contains(sip); IncNetIP(sip) { 86 | outputchan <- sip.String() 87 | } 88 | } 89 | } 90 | if strings.Contains(cidr, "-") { 91 | var ipRange []string 92 | for _, ipStr := range strings.Split(cidr, "-") { 93 | ipRange = append(ipRange, strings.TrimSpace(ipStr)) 94 | } 95 | if len(ipRange) != 2 { 96 | _error = errx.NewMsgf("无法解析给定的IP段: %v", cidr) 97 | return _error 98 | } 99 | 100 | startIPStr := ipRange[0] 101 | endIPStr := ipRange[1] 102 | errStart := net.ParseIP(startIPStr) 103 | errEnd := net.ParseIP(endIPStr) 104 | if errStart == nil || errEnd == nil { 105 | _error = errx.NewMsgf("无法解析给定的IP段: %v", cidr) 106 | return _error 107 | } 108 | ipList := IPRange(startIPStr, endIPStr) 109 | for _, ip := range ipList { 110 | outputchan <- ip 111 | } 112 | } 113 | if !strings.Contains(cidr, "/") && !strings.Contains(cidr, "-") { 114 | cidr = cidr + "/24" 115 | return GenIP(cidr, outputchan) 116 | } 117 | return _error 118 | } 119 | 120 | func IPRange(startIPStr, endIPStr string) []string { 121 | var ipList []string 122 | startIPInt := ipToUInt32(startIPStr) 123 | endIPInt := ipToUInt32(endIPStr) 124 | for currIPInt := new(big.Int).Set(startIPInt); currIPInt.Cmp(endIPInt) <= 0; incIP(currIPInt) { 125 | ip := intToIP(currIPInt) 126 | ipList = append(ipList, ip) 127 | } 128 | 129 | return ipList 130 | } 131 | 132 | // IPToInteger converts an IP address to its integer representation. 133 | // It supports both IPv4 as well as IPv6 addresses. 134 | func IPToInteger(ip net.IP) (*big.Int, int, error) { 135 | // Binary operation, learn something 136 | // 二进制操作 137 | val := &big.Int{} 138 | val.SetBytes([]byte(ip)) 139 | 140 | if len(ip) == net.IPv4len { 141 | return val, 32, nil //nolint 142 | } else if len(ip) == net.IPv6len { 143 | return val, 128, nil //nolint 144 | } else { 145 | return nil, 0, fmt.Errorf("unsupported address length %d", len(ip)) 146 | } 147 | } 148 | 149 | func IncNetIP(ip net.IP) { 150 | for j := len(ip) - 1; j >= 0; j-- { 151 | ip[j]++ 152 | if ip[j] > 0 { 153 | break 154 | } 155 | } 156 | } 157 | 158 | func intToIP(ipInt *big.Int) string { 159 | ipBytes := ipInt.Bytes() 160 | if len(ipBytes) < 4 { 161 | // 补齐 4 个字节 162 | padBytes := make([]byte, 4-len(ipBytes)) 163 | ipBytes = append(padBytes, ipBytes...) 164 | } 165 | return net.IP(ipBytes).String() 166 | } 167 | 168 | func ipToUInt32(ipStr string) *big.Int { 169 | ip := net.ParseIP(ipStr) 170 | if ip == nil { 171 | return nil 172 | } 173 | 174 | // 将 net.IP 转换为 4 字节的大整数 175 | ipInt := new(big.Int) 176 | ipInt.SetBytes(ip.To4()) 177 | return ipInt 178 | } 179 | 180 | func incIP(ipInt *big.Int) { 181 | ipInt.Add(ipInt, big.NewInt(1)) 182 | } 183 | -------------------------------------------------------------------------------- /core/ip_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "fmt" 5 | "github.com/stretchr/testify/require" 6 | "sync" 7 | "testing" 8 | ) 9 | 10 | func TestGenIP(t *testing.T) { 11 | testCase := "192.168.1.1-192.168.1.2" 12 | expectOut := 2 13 | outputchan := make(chan string) 14 | var wg sync.WaitGroup 15 | wg.Add(1) 16 | go func() { 17 | count := 0 18 | for o := range outputchan { 19 | fmt.Println(o) 20 | count++ 21 | } 22 | require.Equalf(t, expectOut, count, "输出的结果总数不对") 23 | wg.Done() 24 | }() 25 | err := GenIP(testCase, outputchan) 26 | require.Nil(t, err) 27 | close(outputchan) 28 | wg.Wait() 29 | } 30 | -------------------------------------------------------------------------------- /core/matcher.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import "regexp" 4 | 5 | func MatchLine(line, pattern string, inverse bool) (string, error) { 6 | regexPattern, err := regexp.Compile(pattern) 7 | if err != nil { 8 | return "", err 9 | } 10 | result := regexPattern.MatchString(line) 11 | if result && !inverse { 12 | return line, nil 13 | } 14 | if !result && inverse { 15 | return line, nil 16 | } 17 | return "", nil 18 | } 19 | -------------------------------------------------------------------------------- /core/myBuffer.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "bytes" 5 | ) 6 | 7 | type stringHandler interface { 8 | HandleString(s string) string 9 | } 10 | 11 | type MyBuffer struct { 12 | buffer *bytes.Buffer 13 | TempString string 14 | IsFilter bool 15 | } 16 | 17 | type CustomStringHandler struct { 18 | Rule string 19 | Flag string 20 | Strategy int //类型 21 | } 22 | 23 | func (c *CustomStringHandler) HandleString(s string) string { 24 | if c.Strategy == 1 { 25 | return ReplaceMore(c.Rule, s, c.Flag) 26 | } else { 27 | return s 28 | } 29 | 30 | } 31 | 32 | func NewMyBuffer(isFilter bool) *MyBuffer { 33 | return &MyBuffer{ 34 | buffer: new(bytes.Buffer), 35 | IsFilter: isFilter, 36 | } 37 | } 38 | 39 | func (_bytes *MyBuffer) WriteString(s string, handler stringHandler) (n int, err error) { 40 | // change the action of WriteString method 41 | // 修改 WriteString 方法的行为 42 | if _bytes.IsFilter { 43 | _bytes.TempString = handler.HandleString(s) 44 | return _bytes.buffer.WriteString(_bytes.TempString) 45 | } 46 | _bytes.TempString = s 47 | return _bytes.buffer.WriteString(s) 48 | } 49 | 50 | func (_bytes *MyBuffer) String() string { 51 | return _bytes.buffer.String() 52 | } 53 | 54 | func (_bytes *MyBuffer) Reset() { 55 | _bytes.buffer.Reset() 56 | } 57 | -------------------------------------------------------------------------------- /core/replace.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | // Used to parse rules and then replace string contents such as hao123.com -> http://${}/ -> http://123.com/ 8 | // 用于解析规则,然后替换字符串内容 比如 hao123.com -> http://${}/ -> http://123.com/ 9 | 10 | func ReplaceMore(rule string, input string, flag string) string { 11 | if !strings.Contains(rule, flag) || flag == "" { 12 | return rule + input 13 | } 14 | newOutput := strings.Replace(rule, flag, input, -1) 15 | return newOutput 16 | } 17 | -------------------------------------------------------------------------------- /core/similarity.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "fmt" 5 | "unicode/utf8" 6 | ) 7 | 8 | // Universal Similarity Recognition 9 | // 通用相似度识别 10 | 11 | // simhash implementation 12 | // SimHash 算法实现 13 | 14 | type SimHash struct { 15 | } 16 | 17 | // 分词&权重 18 | func (sh *SimHash) compareUtf8(s1, s2 string) float64 { 19 | s1Len := utf8.RuneCountInString(s1) 20 | s2Len := utf8.RuneCountInString(s2) 21 | fmt.Println(s1Len, s2Len, len(s1), len(s2)) 22 | return 0 23 | } 24 | -------------------------------------------------------------------------------- /core/similarity_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | -------------------------------------------------------------------------------- /core/utils.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "bytes" 5 | "crypto/rand" 6 | "encoding/hex" 7 | "fmt" 8 | "os" 9 | "strconv" 10 | ) 11 | 12 | const upperhex = "0123456789ABCDEF" 13 | 14 | func GetEnvOrDefault(key string, defaultValue int, ratios int) int { 15 | if envValue, exists := os.LookupEnv(key); exists { 16 | if envValueInt, err := strconv.Atoi(envValue); err == nil { 17 | return envValueInt * ratios 18 | } 19 | } 20 | return defaultValue 21 | } 22 | 23 | func Escape(s string) string { 24 | var b bytes.Buffer 25 | for i := 0; i < len(s); i++ { 26 | b.WriteString("%") 27 | b.WriteByte(upperhex[s[i]>>4]) 28 | b.WriteByte(upperhex[s[i]&15]) 29 | } 30 | return b.String() 31 | } 32 | 33 | func RandomHex(n int, suffix []byte) (string, error) { 34 | _bytes := make([]byte, n) 35 | if _, err := rand.Read(_bytes); err != nil { 36 | return "", err 37 | } 38 | return hex.EncodeToString(append(_bytes, suffix...)), nil 39 | } 40 | 41 | func NewLine() string { 42 | var PS = fmt.Sprintf("%v", os.PathSeparator) 43 | var LineBreak = "\n" 44 | if PS != "/" { 45 | LineBreak = "\r\n" 46 | } 47 | return LineBreak 48 | } 49 | -------------------------------------------------------------------------------- /core/utils/uniqueutil/uniqueutil.go: -------------------------------------------------------------------------------- 1 | package uniqueutil 2 | 3 | func IsKeyUniq(key string, found map[string]struct{}) bool { 4 | if _, ok := found[key]; !ok { 5 | return true 6 | } 7 | return false 8 | } 9 | -------------------------------------------------------------------------------- /core/xlsx.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "bytes" 5 | "github.com/mstxq17/MoreFind/errx" 6 | "github.com/pbnjay/grate" 7 | _ "github.com/pbnjay/grate/xls" 8 | "github.com/tealeg/xlsx" 9 | "io" 10 | "os" 11 | "strings" 12 | ) 13 | 14 | type OfficeReader interface { 15 | Read() (io.Reader, error) 16 | } 17 | 18 | type XlsxReader struct { 19 | FilePath string 20 | BinData []byte 21 | } 22 | 23 | type XlsReader struct { 24 | FilePath string 25 | BinData []byte 26 | } 27 | 28 | func NewReader(b []byte) (OfficeReader, error) { 29 | if len(b) < 4 { 30 | return nil, errx.NewMsg("Invalid Bytes less then magic number 4 length") 31 | } 32 | magicBytes := b[:4] 33 | var officeReader OfficeReader 34 | switch { 35 | case bytes.Equal(magicBytes, []byte{0xD0, 0xCF, 0x11, 0xE0}): 36 | officeReader = NewXLSReaderFromBinData(b) 37 | case bytes.Equal(magicBytes, []byte{0x50, 0x4B, 0x03, 0x04}): 38 | officeReader = NewXLSXReaderFromBinData(b) 39 | } 40 | return officeReader, nil 41 | } 42 | 43 | // NewXLSXReaderFromFile NewXLSXReader 是一个工厂函数,根据参数创建 xlsxReader 实例 44 | func NewXLSXReaderFromFile(filePath string) *XlsxReader { 45 | return &XlsxReader{ 46 | FilePath: filePath, 47 | } 48 | } 49 | 50 | // NewXLSXReaderFromBinData 是一个工厂函数,根据参数创建 xlsxReader 实例 51 | func NewXLSXReaderFromBinData(binData []byte) *XlsxReader { 52 | return &XlsxReader{ 53 | BinData: binData, 54 | } 55 | } 56 | 57 | // NewXLSReaderFromFile 是一个工厂函数,根据参数创建 xlsxReader 实例 58 | func NewXLSReaderFromFile(filePath string) *XlsReader { 59 | return &XlsReader{ 60 | FilePath: filePath, 61 | } 62 | } 63 | 64 | // NewXLSReaderFromBinData 是一个工厂函数,根据参数创建 xlsxReader 实例 65 | func NewXLSReaderFromBinData(binData []byte) *XlsReader { 66 | return &XlsReader{ 67 | BinData: binData, 68 | } 69 | } 70 | 71 | func (xr *XlsxReader) Read() (io.Reader, error) { 72 | var xlsxFile *xlsx.File 73 | var err error 74 | if xr.FilePath != "" { 75 | xlsxFile, err = xlsx.OpenFile(xr.FilePath) 76 | if err != nil { 77 | return nil, err 78 | } 79 | 80 | } else if len(xr.BinData) > 0 { 81 | xlsxFile, err = xlsx.OpenBinary(xr.BinData) 82 | if err != nil { 83 | return nil, err 84 | } 85 | } else { 86 | return nil, errx.NewMsg("Value Error, Read xlsx file must provide input") 87 | } 88 | var buffer bytes.Buffer 89 | for _, sheet := range xlsxFile.Sheets { 90 | for _, row := range sheet.Rows { 91 | var cellsText []string 92 | for _, cell := range row.Cells { 93 | cellsText = append(cellsText, cell.String()) 94 | } 95 | buffer.WriteString(strings.Join(cellsText, "\t") + NewLine()) 96 | } 97 | } 98 | return &buffer, nil 99 | } 100 | 101 | func (xr *XlsReader) Read() (io.Reader, error) { 102 | var wb grate.Source 103 | var err error 104 | if xr.FilePath != "" { 105 | wb, err = grate.Open(xr.FilePath) 106 | if err != nil { 107 | return nil, err 108 | } 109 | 110 | } else if len(xr.BinData) > 0 { 111 | // create temp file as transfer station 112 | // 创建一个临时文件做中转站 113 | tempFile, err := os.CreateTemp("", "temp.*.xls") 114 | if err != nil { 115 | return nil, err 116 | } 117 | // delete temp file 118 | // 删除临时文件 119 | defer os.Remove(tempFile.Name()) 120 | if _, err := tempFile.Write(xr.BinData); err != nil { 121 | return nil, err 122 | } 123 | wb, err = grate.Open(tempFile.Name()) 124 | if err != nil { 125 | return nil, err 126 | } 127 | } else { 128 | return nil, errx.NewMsg("Value Error, Read xls file must provide input") 129 | } 130 | var buffer bytes.Buffer 131 | sheets, _ := wb.List() // list available sheets 132 | for _, s := range sheets { // enumerate each sheet name 133 | sheet, _ := wb.Get(s) // open the sheet 134 | for sheet.Next() { // enumerate each row of data 135 | row := sheet.Strings() // get the row's content as []string 136 | if len(row) > 0 { 137 | buffer.WriteString(strings.Join(row, "\t") + NewLine()) 138 | } 139 | 140 | } 141 | } 142 | wb.Close() 143 | return &buffer, nil 144 | } 145 | -------------------------------------------------------------------------------- /core/xlsx_test.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "fmt" 5 | "github.com/stretchr/testify/require" 6 | "os" 7 | "strings" 8 | "testing" 9 | ) 10 | 11 | func TestOfficeReader(t *testing.T) { 12 | testCases := []struct { 13 | input string 14 | expect interface{} 15 | }{ 16 | {input: "Sample.xlsx", expect: nil}, 17 | {input: "Sample.xls", expect: nil}, 18 | } 19 | for _, testCase := range testCases { 20 | input := testCase.input 21 | expect := testCase.expect 22 | if strings.HasSuffix(input, "xlsx") { 23 | var reader OfficeReader = NewXLSXReaderFromFile(input) 24 | _, err := reader.Read() 25 | //fmt.Println(content) 26 | require.Equal(t, expect, err) 27 | binData, _ := os.ReadFile(input) 28 | reader = NewXLSXReaderFromBinData(binData) 29 | content, err := reader.Read() 30 | fmt.Println(content) 31 | require.Equal(t, expect, err) 32 | reader, err = NewReader(binData) 33 | require.Equal(t, expect, err) 34 | } 35 | if strings.HasSuffix(input, "xls") { 36 | var reader OfficeReader = NewXLSReaderFromFile(input) 37 | _, err := reader.Read() 38 | require.Equal(t, expect, err) 39 | binData, _ := os.ReadFile(input) 40 | reader = NewXLSReaderFromBinData(binData) 41 | _, err = reader.Read() 42 | //fmt.Println(content) 43 | require.Equal(t, expect, err) 44 | reader, err = NewReader(binData) 45 | require.Equal(t, expect, err) 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /errx/errors.go: -------------------------------------------------------------------------------- 1 | package errx 2 | 3 | import ( 4 | "github.com/pkg/errors" 5 | ) 6 | 7 | // NewWrapError 自定义错误处理函数 8 | func NewWrapError(err error, message string) error { 9 | if err == nil { 10 | return nil 11 | } 12 | return errors.Wrap(err, message) 13 | 14 | } 15 | 16 | func NewWithMsgf(err error, format string, args ...any) error { 17 | if err == nil { 18 | return nil 19 | } 20 | return errors.WithMessagef(err, format, args...) 21 | } 22 | 23 | func NewMsgf(format string, args ...any) error { 24 | return errors.Errorf(format, args...) 25 | } 26 | 27 | func NewMsg(message string) error { 28 | return errors.New(message) 29 | } 30 | 31 | func As(err error, target interface{}) bool { 32 | return errors.As(err, target) 33 | } 34 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mstxq17/MoreFind 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/inconshreveable/mousetrap v1.0.0 // indirect 7 | github.com/spf13/cobra v1.4.0 8 | golang.org/x/net v0.15.0 9 | mvdan.cc/xurls/v2 v2.4.0 10 | ) 11 | 12 | require ( 13 | github.com/Masterminds/semver/v3 v3.2.1 14 | github.com/cheggaaa/pb/v3 v3.1.4 15 | github.com/google/go-github/v30 v30.1.0 16 | github.com/minio/selfupdate v0.6.0 17 | github.com/pbnjay/grate v0.0.0-20231006022435-3f8e65d74a14 18 | github.com/pkg/errors v0.9.1 19 | github.com/stretchr/testify v1.8.0 20 | github.com/tealeg/xlsx v1.0.5 21 | ) 22 | 23 | require ( 24 | aead.dev/minisign v0.2.0 // indirect 25 | github.com/VividCortex/ewma v1.2.0 // indirect 26 | github.com/davecgh/go-spew v1.1.1 // indirect 27 | github.com/fatih/color v1.15.0 // indirect 28 | github.com/google/go-querystring v1.0.0 // indirect 29 | github.com/mattn/go-colorable v0.1.13 // indirect 30 | github.com/mattn/go-isatty v0.0.19 // indirect 31 | github.com/mattn/go-runewidth v0.0.14 // indirect 32 | github.com/pmezard/go-difflib v1.0.0 // indirect 33 | github.com/rivo/uniseg v0.2.0 // indirect 34 | github.com/spf13/pflag v1.0.5 // indirect 35 | golang.org/x/crypto v0.13.0 // indirect 36 | golang.org/x/sys v0.12.0 // indirect 37 | gopkg.in/yaml.v3 v3.0.1 // indirect 38 | ) 39 | -------------------------------------------------------------------------------- /gotest/function_test.go: -------------------------------------------------------------------------------- 1 | package gotest 2 | 3 | import ( 4 | "fmt" 5 | "golang.org/x/net/publicsuffix" 6 | "log" 7 | "net" 8 | "net/url" 9 | "regexp" 10 | "strconv" 11 | "strings" 12 | "testing" 13 | ) 14 | 15 | /** 16 | The test standard is that puts the xxx_test.go and xxx.go in same package(directory), 17 | however, some private independent function can be tested in the third party, likes this gotest package。 18 | */ 19 | 20 | func isPrivateIP(line string) bool { 21 | var iIRegex = regexp.MustCompile("^(10.\\d{1,3}.\\d{1,3}.((0/([89]|1[0-9]|2\\d|3[012]))|(\\d{1,3})))|(172.(1[6789]|2\\d|3[01]).\\d{1,3}.\\d{1,3}(/(1[6789]|2\\d|3[012]))?)|(192.168.\\d{1,3}.\\d{1,3}(/(1[6789]|2\\d|3[012]))?)$") 22 | return iIRegex.MatchString(line) 23 | } 24 | 25 | func searchRootDomain(domain string) string { 26 | eTLD, _ := publicsuffix.EffectiveTLDPlusOne(domain) 27 | return eTLD 28 | } 29 | 30 | func filterExt(fileExt string, filterExts string) bool { 31 | _exts := strings.Split(filterExts, ",") 32 | // for improve the filtering speed, reducing the comparative worke,use map 33 | // 为了提高速度,减少比较,使用map来判断 34 | extMap := map[string]int{} 35 | for _, suffix := range _exts { 36 | // convert to lowercase uniformly 37 | // 统一小写 38 | suffix = strings.TrimSpace(suffix) 39 | suffix = strings.ToLower(suffix) 40 | extMap[suffix] = 1 41 | } 42 | if _, ok := extMap[fileExt]; ok { 43 | return true 44 | } else { 45 | return false 46 | } 47 | } 48 | 49 | func fileExt(_url string) string { 50 | u, err := url.Parse(_url) 51 | if err != nil { 52 | log.Fatal(err) 53 | } 54 | part := strings.Split(u.Path, "/") 55 | fileName := part[len(part)-1] 56 | if strings.Contains(fileName, ".") { 57 | filePart := strings.Split(fileName, ".") 58 | // convert to lowercase 59 | // 统一转换为小写 60 | return strings.ToLower(filePart[len(filePart)-1]) 61 | } else { 62 | return "" 63 | } 64 | } 65 | 66 | func inc(ip net.IP) { 67 | for j := len(ip) - 1; j >= 0; j-- { 68 | ip[j]++ 69 | if ip[j] > 0 { 70 | break 71 | } 72 | } 73 | } 74 | 75 | func genIP(cidr string) { 76 | ip, ipnet, err := net.ParseCIDR(cidr) 77 | if err != nil { 78 | fmt.Println("无法解析CIDR地址:", err) 79 | } 80 | for ip := ip.Mask(ipnet.Mask); ipnet.Contains(ip); inc(ip) { 81 | fmt.Println(ip) 82 | } 83 | } 84 | 85 | func Test_genIP(t *testing.T) { 86 | testCase1 := "192.168.1.0/29" 87 | genIP(testCase1) 88 | } 89 | 90 | func Test_isPrivateIP(t *testing.T) { 91 | testCase1 := "192.168.1.1" 92 | testCase2 := "111.210.196.23" 93 | if isPrivateIP(testCase1) == true && isPrivateIP(testCase2) == false { 94 | t.Log("测试通过") 95 | } else { 96 | t.Error("测试失败") 97 | } 98 | } 99 | 100 | func Test_searchRootDomain(t *testing.T) { 101 | testDomains := []string{ 102 | "x.baidu.com", 103 | "kk.qq.com", 104 | "x11.xxx.github.io", 105 | "h.x.中国", 106 | } 107 | for _, domain := range testDomains { 108 | rootDomain := searchRootDomain(domain) 109 | if domain == rootDomain { 110 | t.Error("测试失败") 111 | } 112 | t.Log(rootDomain + " pass") 113 | } 114 | t.Log("全部测试通过") 115 | } 116 | 117 | func Test_fileExt(t *testing.T) { 118 | testUrls := []string{ 119 | "https://baidu.com/", 120 | "https://baidu.com/123", 121 | "https://baidu.com/123.png", 122 | "https://baidu.com/123.png.jpg", 123 | } 124 | for index, _url := range testUrls { 125 | t.Log(strconv.Itoa(index) + ":" + _url) 126 | switch index { 127 | case 0: 128 | t.Log("fileExt:" + fileExt(_url)) 129 | if fileExt(_url) != "" { 130 | t.Error("测试失败") 131 | } 132 | case 1: 133 | t.Log("fileExt:" + fileExt(_url)) 134 | if fileExt(_url) != "" { 135 | t.Error("测试失败") 136 | } 137 | case 2: 138 | t.Log("fileExt:" + fileExt(_url)) 139 | if fileExt(_url) != "png" { 140 | t.Error("测试失败") 141 | } 142 | case 3: 143 | t.Log("fileExt:" + fileExt(_url)) 144 | if fileExt(_url) != "jpg" { 145 | t.Error("测试失败") 146 | } 147 | } 148 | } 149 | } 150 | 151 | func Test_filterExt(t *testing.T) { 152 | testUrl := "https://baidu.com/123.png" 153 | if filterExt(fileExt(testUrl), "png, jpg") { 154 | t.Log("测试通过") 155 | } else { 156 | t.Error("测试失败") 157 | } 158 | testUrl1 := "https://baidu.com/" 159 | if filterExt(fileExt(testUrl1), "png, jpg") { 160 | t.Error("测试失败") 161 | } else { 162 | t.Log("测试通过") 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /gotest/patch_test.go: -------------------------------------------------------------------------------- 1 | package gotest 2 | 3 | import ( 4 | "net/url" 5 | "regexp" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | type URL struct { 11 | Scheme string // protocol 12 | Host string // host or host:port 13 | Path string // relative paths may omit leading slash 14 | } 15 | 16 | func deepMakeUrl(checkUrl string) string { 17 | _, err := url.PathUnescape(checkUrl) 18 | if err != nil { 19 | errStr := err.Error() 20 | re := regexp.MustCompile(`invalid URL escape "(.+)"`) 21 | subString := re.FindStringSubmatch(errStr)[1] 22 | idx := strings.LastIndex(checkUrl, subString) 23 | if idx >= 0 { 24 | checkUrl = checkUrl[:idx] + "" + checkUrl[idx+len(subString):] 25 | } 26 | return deepMakeUrl(checkUrl) 27 | } 28 | return checkUrl 29 | } 30 | 31 | func Test_deepMakeUrl(t *testing.T) { 32 | testCases := []struct { 33 | url string 34 | excpected string 35 | }{ 36 | {"https://baidu.com/%23", "https://baidu.com/%23"}, 37 | {"https://baidu.com/%23%1", "https://baidu.com/%23"}, 38 | {"https://baidu.com/%23%", "https://baidu.com/%23"}, 39 | {"https://baidu.com/%23%1%1", "https://baidu.com/%231"}, 40 | {"https://baidu.com/%232%%", "https://baidu.com/%232"}, 41 | {"https://baidu.com/%232%%23", "https://baidu.com/%2323"}, 42 | } 43 | for _, tc := range testCases { 44 | _, err := url.Parse(tc.excpected) 45 | if err != nil { 46 | t.Errorf("Expected %s not valid", tc.excpected) 47 | } 48 | result := deepMakeUrl(tc.url) 49 | if result != tc.excpected { 50 | t.Errorf("Origin %s Expected %s, got %s", tc.url, tc.excpected, result) 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /gr_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | goreleaser --skip "publish" --snapshot --clean -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/mstxq17/MoreFind/cmd" 5 | ) 6 | 7 | /* 8 | * 9 | 程序执行流程如下 10 | 1)解析参数 -domain -url -ip 11 | 2)从管道读取输入 12 | 3)输出结果 13 | */ 14 | 15 | func main() { 16 | cmd.Execute() 17 | } 18 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | # Go parameters 2 | GOCMD=go 3 | GOBUILD=$(GOCMD) build 4 | LDFLAGS := -s -w 5 | 6 | ifneq ($(shell go env GOOS),darwin) 7 | LDFLAGS := -extldflags "-static" 8 | endif 9 | 10 | all: build 11 | build: 12 | $(GOBUILD) $(GOFLAGS) -ldflags '$(LDFLAGS)' -trimpath -o "MoreFind" main.go 13 | -------------------------------------------------------------------------------- /push.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 提示用户输入版本号 4 | read -p "请输入版本号(例如 v1.0.0): " version 5 | 6 | # 检查版本号是否为空 7 | if [ -z "$version" ]; then 8 | echo "版本号不能为空" 9 | exit 1 10 | fi 11 | 12 | # 删除本地标签 13 | if git tag -d "$version" 2>/dev/null; then 14 | echo "本地标签 $version 已删除" 15 | else 16 | echo "本地标签 $version 不存在或无法删除" 17 | fi 18 | 19 | # 删除远程标签 20 | if git push origin --delete "$version" 2>/dev/null; then 21 | echo "远程标签 $version 已删除" 22 | else 23 | echo "远程标签 $version 不存在或无法删除" 24 | fi 25 | 26 | # 推送新的标签 27 | if git tag "$version" && git push origin "$version"; then 28 | echo "标签 $version 已成功推送到远程仓库" 29 | else 30 | echo "标签 $version 推送失败" 31 | exit 1 32 | fi -------------------------------------------------------------------------------- /test.go: -------------------------------------------------------------------------------- 1 | package main 2 | -------------------------------------------------------------------------------- /update/gh.go: -------------------------------------------------------------------------------- 1 | package update 2 | 3 | import ( 4 | "archive/tar" 5 | "archive/zip" 6 | "bytes" 7 | "compress/gzip" 8 | "crypto/tls" 9 | "github.com/cheggaaa/pb/v3" 10 | "github.com/google/go-github/v30/github" 11 | "github.com/mstxq17/MoreFind/errx" 12 | "golang.org/x/net/context" 13 | "io" 14 | "io/fs" 15 | "net/http" 16 | "runtime" 17 | "strings" 18 | "time" 19 | ) 20 | 21 | const ( 22 | Owner = "mstxq17" 23 | ToolName = "MoreFind" 24 | ) 25 | 26 | var ( 27 | extIfFound = ".exe" 28 | // 下载时间应该设置大一些防止网速不好的情况 29 | GlobalTimeout = time.Duration(60) * time.Second 30 | DefaultHttpClient *http.Client 31 | ) 32 | 33 | type AssetFileCallback func(path string, fileInfo fs.FileInfo, data io.Reader) error 34 | 35 | type GHReleaseDownloader struct { 36 | owner string 37 | repoName string // by default repoName is toolName 38 | assetName string 39 | AssetID int 40 | Format AssetFormat 41 | Latest *github.RepositoryRelease 42 | ghClient *github.Client 43 | httpClient *http.Client 44 | } 45 | 46 | // 获取最新发新版信息并报告错误 47 | func (d *GHReleaseDownloader) getLatestRelease() error { 48 | release, resp, err := d.ghClient.Repositories.GetLatestRelease(context.Background(), d.owner, d.repoName) 49 | var rateLimitErr *github.RateLimitError 50 | if err != nil { 51 | if resp != nil && resp.StatusCode == http.StatusNotFound { 52 | return errx.NewMsgf("updater -> repo %v/%v not found got %v", d.owner, d.repoName) 53 | } else if errx.As(err, &rateLimitErr) { 54 | return errx.NewMsg("hit github ratelimit while downloading latest release") 55 | } 56 | if resp == nil { 57 | return errx.NewWrapError(err, "updater -> network connect error") 58 | } 59 | return errx.NewWrapError(err, "updater -> unknown error that not be handled") 60 | } 61 | d.Latest = release 62 | return nil 63 | } 64 | 65 | func NewghReleaseDownloader(RepoName string) (*GHReleaseDownloader, error) { 66 | var owner, repoName string 67 | if strings.Contains(RepoName, "/") { 68 | // if it has diagonal that means mstxq17/MoreFind 69 | // 如果/存在,则说明是 mstxq17/MoreFind 的形式 70 | arr := strings.Split(RepoName, "/") 71 | if len(arr) != 2 { 72 | return nil, errx.NewMsgf("update RepoName: %v cannot be parsed", RepoName) 73 | } 74 | owner = arr[0] 75 | repoName = arr[1] 76 | } else { 77 | owner = Owner 78 | repoName = RepoName 79 | } 80 | if repoName == "" { 81 | repoName = ToolName 82 | //return nil, errx.NewMsg("update RepoName: repoName name cannot be empty") 83 | } 84 | // 全局超时配置client 85 | httpClient := DefaultHttpClient 86 | ghrd := GHReleaseDownloader{ghClient: github.NewClient(httpClient), repoName: repoName, owner: owner, httpClient: httpClient} 87 | err := ghrd.getLatestRelease() 88 | return &ghrd, err 89 | } 90 | 91 | func (d *GHReleaseDownloader) getToolAssetID(latest *github.RepositoryRelease) error { 92 | // MoreFind_1.4.6_darwin_arm64.tar.gz 93 | builder := &strings.Builder{} 94 | builder.WriteString(d.repoName) 95 | builder.WriteString("_") 96 | builder.WriteString(latest.GetTagName()) 97 | builder.WriteString("_") 98 | builder.WriteString(runtime.GOOS) 99 | builder.WriteString("_") 100 | if strings.EqualFold(runtime.GOARCH, "amd64") { 101 | builder.WriteString("x86_64") 102 | } else { 103 | builder.WriteString(runtime.GOARCH) 104 | } 105 | loop: 106 | for _, v := range latest.Assets { 107 | asset := v.GetName() 108 | switch { 109 | case strings.Contains(asset, Tar.FileExtension()): 110 | if strings.EqualFold(asset, builder.String()+Tar.FileExtension()) { 111 | d.AssetID = int(v.GetID()) 112 | d.Format = Tar 113 | d.assetName = asset 114 | break loop 115 | } 116 | case strings.Contains(asset, Zip.FileExtension()): 117 | if strings.EqualFold(asset, builder.String()+Zip.FileExtension()) { 118 | d.AssetID = int(v.GetID()) 119 | d.Format = Zip 120 | d.assetName = asset 121 | break loop 122 | } 123 | } 124 | } 125 | builder.Reset() 126 | // handle if id is zero (no asset found) 127 | if d.AssetID == 0 { 128 | return errx.NewMsgf("updater: could not find release asset for your platform (%s/%s)", runtime.GOOS, runtime.GOARCH) 129 | } 130 | return nil 131 | } 132 | 133 | // downloadAssetwithID 134 | func (d *GHReleaseDownloader) downloadAssetwithID(id int64) (*http.Response, error) { 135 | _, rdurl, err := d.ghClient.Repositories.DownloadReleaseAsset(context.Background(), d.owner, d.repoName, id, nil) 136 | if err != nil { 137 | return nil, err 138 | } 139 | resp, err := d.httpClient.Get(rdurl) 140 | if err != nil { 141 | return nil, errx.NewMsg("failed to download release asset") 142 | } 143 | if resp.StatusCode != http.StatusOK { 144 | return nil, errx.NewMsgf("something went wrong got %v while downloading asset, expected status 200", resp.StatusCode) 145 | } 146 | if resp.Body == nil { 147 | return nil, errx.NewMsg("something went wrong got response without body") 148 | } 149 | return resp, nil 150 | } 151 | 152 | func (d *GHReleaseDownloader) DownloadTool() (*bytes.Buffer, error) { 153 | if err := d.getToolAssetID(d.Latest); err != nil { 154 | return nil, err 155 | } 156 | resp, err := d.downloadAssetwithID(int64(d.AssetID)) 157 | if err != nil { 158 | return nil, err 159 | } 160 | defer resp.Body.Close() 161 | if !HideProgressBar { 162 | bar := pb.New64(resp.ContentLength).SetMaxWidth(100) 163 | bar.Start() 164 | resp.Body = bar.NewProxyReader(resp.Body) 165 | defer bar.Finish() 166 | } 167 | bin, err := io.ReadAll(resp.Body) 168 | if err != nil { 169 | return nil, errx.NewMsg("failed to read response body") 170 | } 171 | return bytes.NewBuffer(bin), nil 172 | } 173 | 174 | func (d *GHReleaseDownloader) GetExecutableFromAsset() ([]byte, error) { 175 | var bin []byte 176 | var err error 177 | getToolCallback := func(path string, fileInfo fs.FileInfo, data io.Reader) error { 178 | if !strings.EqualFold(strings.TrimSuffix(fileInfo.Name(), extIfFound), ToolName) { 179 | return nil 180 | } 181 | bin, err = io.ReadAll(data) 182 | return err 183 | } 184 | buff, err := d.DownloadTool() 185 | if err != nil { 186 | return nil, err 187 | } 188 | _ = UnpackAssetWithCallback(d.Format, bytes.NewReader(buff.Bytes()), getToolCallback) 189 | return bin, errx.NewWrapError(err, "executable not found in archive") // Note: WrapfWithNil wraps msg if err != nil 190 | } 191 | 192 | // UnpackAssetWithCallback unpacks asset and executes callback function on every file in data 193 | func UnpackAssetWithCallback(format AssetFormat, data *bytes.Reader, callback AssetFileCallback) error { 194 | if format != Zip && format != Tar { 195 | return errx.NewMsg("unpack -> github asset format not supported. only zip and tar are supported") 196 | } 197 | if format == Zip { 198 | zipReader, err := zip.NewReader(data, data.Size()) 199 | if err != nil { 200 | return err 201 | } 202 | for _, f := range zipReader.File { 203 | data, err := f.Open() 204 | if err != nil { 205 | return err 206 | } 207 | if err := callback(f.Name, f.FileInfo(), data); err != nil { 208 | return err 209 | } 210 | _ = data.Close() 211 | } 212 | } else if format == Tar { 213 | gzipReader, err := gzip.NewReader(data) 214 | if err != nil { 215 | return err 216 | } 217 | tarReader := tar.NewReader(gzipReader) 218 | // iterate through the files in the archive 219 | for { 220 | header, err := tarReader.Next() 221 | if err == io.EOF { 222 | break 223 | } 224 | if err != nil { 225 | return err 226 | } 227 | if err := callback(header.Name, header.FileInfo(), tarReader); err != nil { 228 | return err 229 | } 230 | } 231 | } 232 | return nil 233 | } 234 | 235 | func init() { 236 | DefaultHttpClient = &http.Client{ 237 | Timeout: GlobalTimeout, 238 | Transport: &http.Transport{ 239 | Proxy: http.ProxyFromEnvironment, 240 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 241 | }, 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /update/type.go: -------------------------------------------------------------------------------- 1 | package update 2 | 3 | type AssetFormat uint 4 | 5 | const ( 6 | Zip AssetFormat = iota 7 | Tar 8 | Unknown 9 | ) 10 | 11 | // FileExtension of this asset format 12 | func (a AssetFormat) FileExtension() string { 13 | if a == Zip { 14 | return ".zip" 15 | } else if a == Tar { 16 | return ".tar.gz" 17 | } 18 | return "" 19 | } 20 | -------------------------------------------------------------------------------- /update/update.go: -------------------------------------------------------------------------------- 1 | package update 2 | 3 | import ( 4 | "bytes" 5 | "github.com/Masterminds/semver/v3" 6 | "github.com/minio/selfupdate" 7 | "github.com/mstxq17/MoreFind/errx" 8 | "log" 9 | "os" 10 | ) 11 | 12 | var ( 13 | HideProgressBar = true 14 | HideReleaseNotes = true 15 | ) 16 | 17 | type ErrorCallback func() *log.Logger 18 | 19 | // GetUpdateToolCallback reserve over-design for study 20 | // 保留一定的冗余的"SB"设计用来学习 21 | func GetUpdateToolCallback(toolName, version string, errorCallback ErrorCallback) func() { 22 | return GetUpdateToolFromRepoCallback(toolName, version, "", errorCallback) 23 | } 24 | 25 | func GetLatestVersion(repoName string, version string) (string, error) { 26 | gh, err := NewghReleaseDownloader(repoName) 27 | if err != nil { 28 | return "", err 29 | } 30 | latestVersion, err := semver.NewVersion(gh.Latest.GetTagName()) 31 | currentVersion, err := semver.NewVersion(version) 32 | if IsOutdated(currentVersion.String(), latestVersion.String()) { 33 | return gh.Latest.GetTagName(), err 34 | } else { 35 | return "", errx.NewMsg("no need update") 36 | } 37 | } 38 | 39 | func GetUpdateToolFromRepoCallback(toolName, version, repoName string, errorCallback ErrorCallback) func() { 40 | return func() { 41 | logger := errorCallback() 42 | if repoName == "" { 43 | repoName = toolName 44 | } 45 | gh, err := NewghReleaseDownloader(repoName) 46 | if err != nil { 47 | logger.Fatal(err) 48 | } 49 | latestVersion, err := semver.NewVersion(gh.Latest.GetTagName()) 50 | currentVersion, err := semver.NewVersion(version) 51 | if err != nil { 52 | logger.Fatal(errx.NewWithMsgf(err, "failed to parse semversion from tagname `%v` got %v", gh.Latest.GetTagName())) 53 | } 54 | logger.Printf("Get Latest Version: v%v", latestVersion.String()) 55 | if !IsOutdated(currentVersion.String(), latestVersion.String()) { 56 | logger.Printf("%v is already updated to the latest version: v%v", toolName, latestVersion.String()) 57 | os.Exit(0) 58 | } 59 | // check permissions before downloading release 60 | updateOpts := selfupdate.Options{} 61 | if err := updateOpts.CheckPermissions(); err != nil { 62 | logger.Fatal(errx.NewWithMsgf(err, "update of %v %v -> %v failed , insufficient permission detected got: %v", toolName, currentVersion.String(), latestVersion.String())) 63 | } 64 | HideProgressBar = false 65 | bin, err := gh.GetExecutableFromAsset() 66 | if err != nil { 67 | logger.Fatal(errx.NewWithMsgf(err, "executable %v not found in release assetID `%v` got", toolName, gh.AssetID)) 68 | } 69 | if err = selfupdate.Apply(bytes.NewBuffer(bin), updateOpts); err != nil { 70 | logger.Printf("update of %v %v -> %v failed, rolling back update", toolName, currentVersion.String(), latestVersion.String()) 71 | if err := selfupdate.RollbackError(err); err != nil { 72 | logger.Println("") 73 | logger.Printf("updater -> rollback of update of %v failed got %v,pls reinstall %v", toolName, err, toolName) 74 | } 75 | os.Exit(1) 76 | } 77 | logger.Printf("%v successfully updated %v -> %v (latest)", toolName, currentVersion.String(), latestVersion.String()) 78 | if !HideReleaseNotes { 79 | output := gh.Latest.GetBody() 80 | logger.Printf("\n\n%v\n", output) 81 | } 82 | os.Exit(0) 83 | } 84 | } 85 | 86 | // IsOutdated returns true if current version is outdated 87 | func IsOutdated(current, latest string) bool { 88 | currentVer, _ := semver.NewVersion(current) 89 | latestVer, _ := semver.NewVersion(latest) 90 | if currentVer == nil || latestVer == nil { 91 | return current != latest 92 | } 93 | return latestVer.GreaterThan(currentVer) 94 | } 95 | -------------------------------------------------------------------------------- /update/update_test.go: -------------------------------------------------------------------------------- 1 | package update 2 | 3 | import ( 4 | "github.com/stretchr/testify/require" 5 | "testing" 6 | ) 7 | 8 | func TestUpdateVariable(t *testing.T) { 9 | _, err := NewghReleaseDownloader("morefind") 10 | require.Nil(t, err) 11 | } 12 | -------------------------------------------------------------------------------- /vars/common.go: -------------------------------------------------------------------------------- 1 | package vars 2 | 3 | const TOOLNAME string = "MoreFind" 4 | -------------------------------------------------------------------------------- /vars/help.go: -------------------------------------------------------------------------------- 1 | package vars 2 | 3 | const ( 4 | FileHelpEn = "Specifies the input file path." 5 | FileHelpZh = "指定输入文件路径。" 6 | 7 | XlsxHelpEn = "Specifies the output file path." 8 | XlsxHelpZh = "指定输出文件路径。" 9 | 10 | OutputHelpEn = "Specifies the output file path." 11 | OutputHelpZh = "指定输出文件路径。" 12 | 13 | IPHelpEn = "Matches IPs from the input pipe or file." 14 | IPHelpZh = "从输入管道或文件中匹配 IP。" 15 | 16 | TargetHelpEn = "Matches schema://host from the input pipe or file." 17 | TargetHelpZh = "从输入管道或文件中匹配 schema://host。" 18 | 19 | SchemaHelpEn = "When use with -t, the default is set to the specified protocol" 20 | SchemaHelpZh = "与 -t 一起使用,默认设置为指定协议" 21 | 22 | ExcludeHelpEn = "Excludes internal/private IP segments when using -i/--ip." 23 | ExcludeHelpZh = "在使用 -i/--ip 时排除内部/私有 IP 段。" 24 | 25 | DomainHelpEn = "Matches domains from the input pipe or file." 26 | DomainHelpZh = "从输入管道或文件中匹配域名。" 27 | 28 | RootDomainHelpEn = "Outputs only the primary domain when using -d/--domain." 29 | RootDomainHelpZh = "在使用 -d/--domain 时仅输出主要域名。" 30 | 31 | WithPortHelpEn = "Filters only domain & IP:port combinations." 32 | WithPortHelpZh = "仅筛选域名和 IP:端口 组合。" 33 | 34 | RuleHelpEn = "Utilizes a custom replacement rule (custom output replacement rule: https://{}/)." 35 | RuleHelpZh = "使用自定义输出替换规则(自定义输出替换规则:https://{}/)。" 36 | 37 | FlagHelpEn = "Specifies the replacement identification." 38 | FlagHelpZh = "指定替换标识。" 39 | 40 | URLHelpEn = "Matches URLs from the input pipe or file." 41 | URLHelpZh = "从输入管道或文件中匹配 URL。" 42 | 43 | URLFilterHelpEn = "Filters URLs with specific extensions." 44 | URLFilterHelpZh = "使用特定扩展名过滤 URL。" 45 | 46 | CidrHelpEn = "Outputs the specified CIDR IP list." 47 | CidrHelpZh = "输出指定 CIDR 范围内的所有 IP。" 48 | 49 | LimitLenHelpEn = "Matches input specified length string, e.g., \"-l 35\" == \"-l 0-35\"." 50 | LimitLenHelpZh = "匹配每行指定长度的字符串,例如,\"-l 35\" == \"-l 0-35\"。" 51 | 52 | ShowHelpEn = "Displays the length of each line and provides summaries." 53 | ShowHelpZh = "显示每行的长度并提供摘要。" 54 | 55 | ProgressHelpEn = "Outputs execution progress metrics." 56 | ProgressHelpZh = "读取大量行时输出执行进度指标状态。" 57 | 58 | QuietHelpEn = "Enable quit mode,keep silence." 59 | QuietHelpZh = "安装模式" 60 | 61 | UpdateHelpEn = "Updates the tool engine to the latest released version." 62 | UpdateHelpZh = "将工具引擎更新到最新版本。" 63 | 64 | GrepPatternHelpEn = "Pattern for regex." 65 | GrepPatternHelpZh = "正则表达式模式。" 66 | 67 | InverseMatchHelpEn = "Invert the match pattern." 68 | InverseMatchHelpZh = "反转匹配模式。" 69 | 70 | DiffCmdHelpEn = "Compares files using different modes:\n1: A-B\n2: B-A\n3: A&B" 71 | DiffCmdHelpZh = "使用不同模式比较文件:\n1:A-B\n2:B-A\n3:A&B" 72 | 73 | StrictModeHelpEn = "Match lines strictly one by one (non-default)." 74 | StrictModeHelpZh = "严格逐行匹配(非默认)。" 75 | 76 | SmartHelpEn = "Use heuristic technique to remove duplicated lines." 77 | SmartHelpZh = "使用启发式技术去除重复行。" 78 | 79 | ThresholdHelpEn = "Set threshold for smart strategy." 80 | ThresholdHelpZh = "设置智能策略的阈值。" 81 | 82 | AlterHelpEn = "IP Alters (0,1,2,3,4,5,6,7,8)" 83 | AlterHelpZh = "IP 变换 (0,1,2,3,4,5,6,7,8)" 84 | ) 85 | -------------------------------------------------------------------------------- /vars/version.go: -------------------------------------------------------------------------------- 1 | package vars 2 | 3 | // VERSION number 4 | // 版本号 5 | const VERSION string = "1.5.7" 6 | --------------------------------------------------------------------------------