├── .gitignore ├── License ├── README.md ├── app ├── bootstrap │ └── bootstrap.go ├── handler │ ├── article.go │ ├── common.go │ ├── index.go │ ├── install.go │ └── setting.go ├── provider │ └── article.go ├── request │ ├── articleSource.go │ ├── install.go │ └── setting.go └── route │ └── base.go ├── config.dist.json ├── config ├── collector.go ├── config.go ├── constant.go ├── content.go ├── mysql.go └── server.go ├── core ├── article.go └── collector.go ├── dictionary.txt ├── doc.md ├── go.mod ├── go.sum ├── library └── content.go ├── main.go ├── public ├── echarts.js ├── echartsTheme.js ├── index.png ├── install.png ├── list.png ├── publish.png ├── remote.png ├── setting.png └── source.png ├── services └── mysql.go └── template ├── article ├── keywords.html ├── list.html └── source.html ├── errors └── 404.html ├── index.html ├── install └── index.html ├── partial ├── footer.html └── header.html └── setting ├── index.html └── publish.html /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | /vendor 4 | /config.json 5 | /collector* 6 | /test -------------------------------------------------------------------------------- /License: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020-NOW Fesion 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 万能文章采集器(collector) 2 | 这是一个由golang编写的采集器,可以自动识别文章列表和文章内容。使用它来采集文章并不需要编写正则表达式,你只需要提供文章列表页的连接即可。 3 | 4 | ## 为什么会有这个万能文章采集器 5 | * 市面上有几种采集工具,大多都需要针对不同的网站定制不同的采集规则,才能最终采集到想要的结果。本采集器内置了常用的采集规则,只要添加文章列表连接,就能将内容采集回来。 6 | * 本采集器采用多线程并行采集,可在同一时间采集更多的内容。 7 | * 本采集器只专注于采集文章这一件事,不用来定制采集其他内容,只专心做一件事。 8 | 9 | ## 万能文章采集器能采集哪些内容 10 | 本采集器可以采集到的的内容有:文章标题、文章关键词、文章描述、文章详情内容、文章作者、文章发布时间、文章浏览量。 11 | 12 | ## 什么时候需要使用到万能文章采集器 13 | 当我们需要给网站采集文章的时候,本采集器就可以派上用场了,本采集器不需要有人值守,24小时不间断运行,每隔10分钟就会自动遍历一遍采集列表,抓取包含有文章的连接,随时将文字抓取回来,还可以设置自动发布,自动发布到指定文章表中。 14 | 15 | ## 万能文章采集器可用在哪里运行 16 | 本采集器可用运行在 Windows系统、Mac 系统、Linux系统(Centos、Ubuntu等),可用下载编译好的程序直接执行,也可以下载源码自己编译。 17 | 18 | ## 万能文章采集器可用伪原创吗 19 | 本采集器暂时还不支持伪原创功能,后期会增加适当的伪原创选项。 20 | 21 | ## 如何安装使用 22 | * 下载可执行文件 23 | 请从Releases 中根据你的操作系统下载最新版的可执行文件,解压后,然后双击运行可执行文件,在打开的浏览器中的可视化界面,填写数据库信息,完成初始化配置,添加采集源,即可开始采集之旅。 24 | 如果你是在服务器端运行,或者程序没有自动打开浏览器,请按命令界面提示,在浏览器输入访问地址,默认的访问地址是 https://127.0.0.1:8088 25 | * 自助编译 26 | 先clone代码到本地,本地安装go运行环境,在collector目录下打开cmd/Terminal命令行窗口,执行命。如果你没配置代理的话,还需要新设置go的代理 27 | ```shell script 28 | go env -w GOPROXY=https://goproxy.cn,direct 29 | ``` 30 | 最后执行下面命令 31 | ```shell script 32 | go mod tidy 33 | go mod vendor 34 | go build 35 | # to linux 36 | # CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o collector 37 | # to windows 38 | # CGO_ENABLED=0 GOOS=windows GOARCH=amd64 go build -o collector.exe 39 | # to mac 40 | # CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -o collector 41 | ``` 42 | 编译结束后,运行编译出来的文件,然后双击运行可执行文件,在打开的浏览器中的可视化界面,填写数据库信息,完成初始化配置,添加采集源,即可开始采集之旅。 43 | 44 | ## 开发计划 45 | * 增加可视化添加采集列表连接、查看修改已采集内容操作界面 ✅ 46 | * 增加自动发布到远程服务器网站功能 ✅ 47 | * 增加关键词自动替换(伪原创的一部分) 48 | * 增加内容自动分段重组功能(待定) 49 | 50 | ## 协助完善 51 | 欢迎有能力有贡献精神的个人或团体参与到本采集器的开发完善工作中来,共同完善采集功能。请fork一个分支,然后在上面修改,修改完了提交pull request合并请求。 52 | 53 | ## 版权声明 54 | © Sinclair,tpyzlxy@gmail.com 55 | 56 | Released under the [MIT License](https://github.com/fesiong/collector/blob/master/License) 57 | -------------------------------------------------------------------------------- /app/bootstrap/bootstrap.go: -------------------------------------------------------------------------------- 1 | package bootstrap 2 | 3 | import ( 4 | "collector/app/route" 5 | "collector/config" 6 | "collector/core" 7 | "context" 8 | "fmt" 9 | "github.com/kataras/iris/v12" 10 | "github.com/kataras/iris/v12/middleware/recover" 11 | "os/exec" 12 | "runtime" 13 | "time" 14 | ) 15 | 16 | type Bootstrap struct { 17 | Application *iris.Application 18 | Port int 19 | LoggerLevel string 20 | } 21 | 22 | func New(port int, loggerLevel string) *Bootstrap { 23 | var bootstrap Bootstrap 24 | bootstrap.Application = iris.New() 25 | bootstrap.Port = port 26 | bootstrap.LoggerLevel = loggerLevel 27 | 28 | //crond 29 | core.Crond() 30 | 31 | return &bootstrap 32 | } 33 | 34 | func (bootstrap *Bootstrap) loadGlobalMiddleware() { 35 | bootstrap.Application.Use(recover.New()) 36 | } 37 | 38 | func (bootstrap *Bootstrap) loadRoutes() { 39 | route.Register(bootstrap.Application) 40 | } 41 | 42 | func (bootstrap *Bootstrap) Serve() { 43 | bootstrap.Application.Logger().SetLevel(bootstrap.LoggerLevel) 44 | bootstrap.loadGlobalMiddleware() 45 | bootstrap.loadRoutes() 46 | 47 | //AutoMigrateDB 48 | core.AutoMigrateDB() 49 | 50 | pugEngine := iris.Django(fmt.Sprintf("%stemplate", config.ExecPath), ".html") 51 | 52 | if config.ServerConfig.Env == "development" { 53 | //测试环境下动态加载 54 | pugEngine.Reload(true) 55 | } 56 | 57 | pugEngine.AddFunc("stampToDate", TimestampToDate) 58 | bootstrap.Application.RegisterView(pugEngine) 59 | 60 | go Open(fmt.Sprintf("http://127.0.0.1:%d", config.ServerConfig.Port)) 61 | 62 | bootstrap.Application.Run( 63 | iris.Addr(fmt.Sprintf(":%d", bootstrap.Port)), 64 | iris.WithoutServerError(iris.ErrServerClosed), 65 | iris.WithoutBodyConsumptionOnUnmarshal, 66 | ) 67 | } 68 | 69 | func TimestampToDate(in uint, layout string) string { 70 | t := time.Unix(int64(in), 0) 71 | return t.Format(layout) 72 | } 73 | 74 | func (bootstrap *Bootstrap) Shutdown() error { 75 | bootstrap.Application.Shutdown(context.Background()) 76 | 77 | return nil 78 | } 79 | 80 | func Open(uri string) { 81 | time.Sleep(1 * time.Second) 82 | var commands = map[string]string{ 83 | "windows": "cmd /c start", 84 | "darwin": "open", 85 | "linux": "xdg-open", 86 | } 87 | 88 | run, ok := commands[runtime.GOOS] 89 | if !ok { 90 | fmt.Println(fmt.Sprintf("请手动在浏览器中打开网址: %s", uri)) 91 | return 92 | } 93 | 94 | cmd := exec.Command(run, uri) 95 | err := cmd.Start() 96 | if err != nil { 97 | fmt.Println(fmt.Sprintf("请手动在浏览器中打开网址: %s", uri)) 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /app/handler/article.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "collector/app/provider" 5 | "collector/app/request" 6 | "collector/config" 7 | "collector/core" 8 | "github.com/kataras/iris/v12" 9 | ) 10 | 11 | func Keywords(ctx iris.Context) { 12 | ctx.View("article/keywords.html") 13 | } 14 | 15 | func ArticleSource(ctx iris.Context) { 16 | ctx.View("article/source.html") 17 | } 18 | 19 | func ArticleList(ctx iris.Context) { 20 | ctx.View("article/list.html") 21 | } 22 | 23 | func ArticleListApi(ctx iris.Context) { 24 | currentPage := ctx.URLParamIntDefault("page", 1) 25 | pageSize := ctx.URLParamIntDefault("limit", 20) 26 | 27 | articleList, total, err := provider.GetArticleList(currentPage, pageSize) 28 | if err != nil { 29 | ctx.JSON(iris.Map{ 30 | "code": config.StatusFailed, 31 | "msg": err.Error(), 32 | }) 33 | return 34 | } 35 | 36 | ctx.JSON(iris.Map{ 37 | "code": config.StatusOK, 38 | "msg": "", 39 | "data": articleList, 40 | "count": total, 41 | }) 42 | } 43 | 44 | func ArticleDeleteApi(ctx iris.Context) { 45 | var req request.Article 46 | if err := ctx.ReadForm(&req); err != nil { 47 | ctx.JSON(iris.Map{ 48 | "code": config.StatusFailed, 49 | "msg": err.Error(), 50 | }) 51 | return 52 | } 53 | 54 | article, err := provider.GetArticleById(req.ID) 55 | if err != nil { 56 | ctx.JSON(iris.Map{ 57 | "code": config.StatusFailed, 58 | "msg": err.Error(), 59 | }) 60 | return 61 | } 62 | 63 | err = article.Delete() 64 | if err != nil { 65 | ctx.JSON(iris.Map{ 66 | "code": config.StatusFailed, 67 | "msg": err.Error(), 68 | }) 69 | return 70 | } 71 | 72 | ctx.JSON(iris.Map{ 73 | "code": config.StatusOK, 74 | "msg": "删除成功", 75 | }) 76 | } 77 | 78 | func ArticleSourceListApi(ctx iris.Context) { 79 | currentPage := ctx.URLParamIntDefault("page", 1) 80 | pageSize := ctx.URLParamIntDefault("limit", 20) 81 | 82 | sourceList, total, err := provider.GetArticleSourceList(currentPage, pageSize) 83 | if err != nil { 84 | ctx.JSON(iris.Map{ 85 | "code": config.StatusFailed, 86 | "msg": err.Error(), 87 | }) 88 | return 89 | } 90 | 91 | ctx.JSON(iris.Map{ 92 | "code": config.StatusOK, 93 | "msg": "", 94 | "data": sourceList, 95 | "count": total, 96 | }) 97 | } 98 | 99 | func ArticleSourceDeleteApi(ctx iris.Context) { 100 | var req request.ArticleSource 101 | if err := ctx.ReadForm(&req); err != nil { 102 | ctx.JSON(iris.Map{ 103 | "code": config.StatusFailed, 104 | "msg": err.Error(), 105 | }) 106 | return 107 | } 108 | 109 | source, err := provider.GetArticleSourceById(req.ID) 110 | if err != nil { 111 | ctx.JSON(iris.Map{ 112 | "code": config.StatusFailed, 113 | "msg": err.Error(), 114 | }) 115 | return 116 | } 117 | 118 | err = source.Delete() 119 | if err != nil { 120 | ctx.JSON(iris.Map{ 121 | "code": config.StatusFailed, 122 | "msg": err.Error(), 123 | }) 124 | return 125 | } 126 | 127 | ctx.JSON(iris.Map{ 128 | "code": config.StatusOK, 129 | "msg": "删除成功", 130 | }) 131 | } 132 | 133 | func ArticleSourceSaveApi(ctx iris.Context) { 134 | var req request.ArticleSource 135 | err := ctx.ReadForm(&req) 136 | if err != nil { 137 | ctx.JSON(iris.Map{ 138 | "code": config.StatusFailed, 139 | "msg": err.Error(), 140 | }) 141 | return 142 | } 143 | var source *core.ArticleSource 144 | if req.ID > 0 { 145 | source, err = provider.GetArticleSourceById(req.ID) 146 | if err != nil { 147 | ctx.JSON(iris.Map{ 148 | "code": config.StatusFailed, 149 | "msg": err.Error(), 150 | }) 151 | return 152 | } 153 | } else { 154 | source, err = provider.GetArticleSourceByUrl(req.Url) 155 | if err == nil { 156 | ctx.JSON(iris.Map{ 157 | "code": config.StatusFailed, 158 | "msg": "该数据源已存在,不用重复添加", 159 | }) 160 | return 161 | } 162 | source = &core.ArticleSource{} 163 | source.Url = req.Url 164 | } 165 | 166 | if req.Url != "" { 167 | source.Url = req.Url 168 | } 169 | source.ErrorTimes = req.ErrorTimes 170 | 171 | err = source.Save() 172 | if err != nil { 173 | ctx.JSON(iris.Map{ 174 | "code": config.StatusFailed, 175 | "msg": err.Error(), 176 | }) 177 | return 178 | } 179 | //添加完,马上抓取 180 | core.GetArticleLinks(source) 181 | 182 | ctx.JSON(iris.Map{ 183 | "code": config.StatusOK, 184 | "msg": "添加/修改成功", 185 | "data": source, 186 | }) 187 | } 188 | 189 | func ArticlePublishApi(ctx iris.Context) { 190 | var req request.Article 191 | if err := ctx.ReadForm(&req); err != nil { 192 | ctx.JSON(iris.Map{ 193 | "code": config.StatusFailed, 194 | "msg": err.Error(), 195 | }) 196 | return 197 | } 198 | 199 | article, err := provider.GetArticleById(req.ID) 200 | if err != nil { 201 | ctx.JSON(iris.Map{ 202 | "code": config.StatusFailed, 203 | "msg": err.Error(), 204 | }) 205 | return 206 | } 207 | 208 | core.AutoPublish(article) 209 | 210 | ctx.JSON(iris.Map{ 211 | "code": config.StatusOK, 212 | "msg": "发布成功", 213 | }) 214 | } 215 | 216 | func ArticleCatchApi(ctx iris.Context) { 217 | var req request.Article 218 | if err := ctx.ReadForm(&req); err != nil { 219 | ctx.JSON(iris.Map{ 220 | "code": config.StatusFailed, 221 | "msg": err.Error(), 222 | }) 223 | return 224 | } 225 | 226 | article, err := provider.GetArticleById(req.ID) 227 | if err != nil { 228 | ctx.JSON(iris.Map{ 229 | "code": config.StatusFailed, 230 | "msg": err.Error(), 231 | }) 232 | return 233 | } 234 | 235 | go core.GetArticleDetail(article) 236 | 237 | ctx.JSON(iris.Map{ 238 | "code": config.StatusOK, 239 | "msg": "抓取任务已执行", 240 | }) 241 | } 242 | 243 | func ArticleSourceCatchApi(ctx iris.Context) { 244 | var req request.ArticleSource 245 | if err := ctx.ReadForm(&req); err != nil { 246 | ctx.JSON(iris.Map{ 247 | "code": config.StatusFailed, 248 | "msg": err.Error(), 249 | }) 250 | return 251 | } 252 | 253 | source, err := provider.GetArticleSourceById(req.ID) 254 | if err != nil { 255 | ctx.JSON(iris.Map{ 256 | "code": config.StatusFailed, 257 | "msg": err.Error(), 258 | }) 259 | return 260 | } 261 | 262 | go core.GetArticleLinks(source) 263 | 264 | ctx.JSON(iris.Map{ 265 | "code": config.StatusOK, 266 | "msg": "抓取任务执行", 267 | }) 268 | } -------------------------------------------------------------------------------- /app/handler/common.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "collector/config" 5 | "collector/services" 6 | "github.com/kataras/iris/v12" 7 | ) 8 | 9 | func NotFound(ctx iris.Context) { 10 | ctx.View("errors/404.html") 11 | } 12 | 13 | func Inspect(ctx iris.Context) { 14 | if services.DB == nil { 15 | ctx.Redirect("/install") 16 | return 17 | } 18 | 19 | ctx.Next() 20 | } 21 | 22 | func InspectJson(ctx iris.Context) { 23 | if services.DB == nil { 24 | ctx.JSON(iris.Map{ 25 | "code": config.StatusFailed, 26 | "msg": "请先完成初始化操作", 27 | }) 28 | return 29 | } 30 | 31 | ctx.Next() 32 | } 33 | -------------------------------------------------------------------------------- /app/handler/index.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "collector/config" 5 | "collector/core" 6 | "collector/services" 7 | "github.com/kataras/iris/v12" 8 | "time" 9 | ) 10 | 11 | func Index(ctx iris.Context) { 12 | ctx.View("index.html") 13 | } 14 | 15 | func IndexEchartsApi(ctx iris.Context) { 16 | //今天数据 17 | hours := []string{ 18 | "01:00", "02:00", "03:00", "04:00", "05:00", "06:00", "07:00", "08:00", "09:00", "10:00", "11:00", "12:00", "13:00", "14:00", "15:00", "16:00", "17:00", "18:00", "19:00", "20:00", "21:00", "22:00", "23:00", "23:59", 19 | } 20 | loc, _ := time.LoadLocation("Local") 21 | nowTime := time.Now() 22 | days := []string{ 23 | nowTime.Format("01-02"), 24 | nowTime.AddDate(0, 0, -1).Format("01-02"), 25 | nowTime.AddDate(0, 0, -2).Format("01-02"), 26 | nowTime.AddDate(0, 0, -3).Format("01-02"), 27 | nowTime.AddDate(0, 0, -4).Format("01-02"), 28 | nowTime.AddDate(0, 0, -5).Format("01-02"), 29 | nowTime.AddDate(0, 0, -6).Format("01-02"), 30 | } 31 | var articleHourCounts []int 32 | var articleDayCounts []int 33 | var sourceHourCounts []int 34 | var sourceDayCounts []int 35 | for _, v := range hours { 36 | var articleCount int 37 | var sourceCount int 38 | countTime, _ := time.ParseInLocation("2006-01-02 15:04", time.Now().Format("2006-01-02")+" "+v, loc) 39 | endTime := countTime.Unix() 40 | startTime := endTime - 3600 41 | services.DB.Model(&core.Article{}).Where("`status` = 1").Where("`created_time` >= ?", startTime).Where("`created_time` < ?", endTime).Count(&articleCount) 42 | articleHourCounts = append(articleHourCounts, articleCount) 43 | 44 | services.DB.Model(&core.Article{}).Where("`status` = 1").Where("`created_time` >= ?", startTime).Where("`created_time` < ?", endTime).Group("source_id").Count(&sourceCount) 45 | sourceHourCounts = append(sourceHourCounts, sourceCount) 46 | } 47 | for i, _ := range days { 48 | var articleCount int 49 | var sourceCount int 50 | 51 | countDay, _ := time.ParseInLocation("2006-01-02 15:04", nowTime.AddDate(0, 0, -i).Format("2006-01-02 00:00"), loc) 52 | startTime := countDay.Unix() 53 | endTime := startTime + 86400 54 | services.DB.Model(&core.Article{}).Where("`status` = 1").Where("`created_time` >= ?", startTime).Where("`created_time` < ?", endTime).Count(&articleCount) 55 | articleDayCounts = append(articleDayCounts, articleCount) 56 | 57 | services.DB.Model(&core.Article{}).Where("`status` = 1").Where("`created_time` >= ?", startTime).Where("`created_time` < ?", endTime).Group("source_id").Count(&sourceCount) 58 | 59 | sourceDayCounts = append(sourceDayCounts, sourceCount) 60 | } 61 | 62 | ctx.JSON(iris.Map{ 63 | "code": config.StatusOK, 64 | "msg": "", 65 | "data": iris.Map{ 66 | "articleHourCounts": articleHourCounts, 67 | "articleDayCounts": articleDayCounts, 68 | "sourceHourCounts": sourceHourCounts, 69 | "sourceDayCounts": sourceDayCounts, 70 | "hours": hours, 71 | "days": days, 72 | }, 73 | }) 74 | } 75 | -------------------------------------------------------------------------------- /app/handler/install.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "collector/app/request" 5 | "collector/config" 6 | "collector/core" 7 | "collector/services" 8 | "fmt" 9 | "github.com/jinzhu/gorm" 10 | "github.com/kataras/iris/v12" 11 | ) 12 | 13 | func Install(ctx iris.Context) { 14 | if services.DB != nil { 15 | ctx.Redirect("/") 16 | return 17 | } 18 | 19 | ctx.View("install/index.html") 20 | } 21 | 22 | func InstallForm(ctx iris.Context) { 23 | if services.DB != nil { 24 | ctx.Redirect("/") 25 | return 26 | } 27 | var req request.Install 28 | if err := ctx.ReadForm(&req); err != nil { 29 | ctx.JSON(iris.Map{ 30 | "code": config.StatusFailed, 31 | "msg": err.Error(), 32 | }) 33 | return 34 | } 35 | 36 | mysqlUrl := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=%s&parseTime=True&loc=Local", 37 | req.User, req.Password, req.Host, req.Port, req.Database, config.MySQLConfig.Charset) 38 | _, err := gorm.Open("mysql", mysqlUrl) 39 | if err != nil { 40 | ctx.JSON(iris.Map{ 41 | "code": config.StatusFailed, 42 | "msg": err.Error(), 43 | }) 44 | return 45 | } 46 | 47 | config.JsonData.MySQL.Database = req.Database 48 | config.JsonData.MySQL.User = req.User 49 | config.JsonData.MySQL.Password = req.Password 50 | config.JsonData.MySQL.Host = req.Host 51 | config.JsonData.MySQL.Port = req.Port 52 | config.JsonData.MySQL.Url = mysqlUrl 53 | err = config.WriteConfig() 54 | if err != nil { 55 | ctx.JSON(iris.Map{ 56 | "code": config.StatusFailed, 57 | "msg": err.Error(), 58 | }) 59 | return 60 | } 61 | 62 | config.InitJSON() 63 | services.InitDB() 64 | core.AutoMigrateDB() 65 | 66 | ctx.JSON(iris.Map{ 67 | "code": config.StatusOK, 68 | "msg": "采集工具初始化成功", 69 | }) 70 | } 71 | -------------------------------------------------------------------------------- /app/handler/setting.go: -------------------------------------------------------------------------------- 1 | package handler 2 | 3 | import ( 4 | "collector/app/request" 5 | "collector/config" 6 | "github.com/kataras/iris/v12" 7 | "strings" 8 | ) 9 | 10 | func DefaultSetting(ctx iris.Context) { 11 | ctx.View("setting/index.html") 12 | } 13 | 14 | func PublishSetting(ctx iris.Context) { 15 | 16 | ctx.View("setting/publish.html") 17 | } 18 | 19 | func DefaultSettingApi(ctx iris.Context) { 20 | ctx.JSON(iris.Map{ 21 | "code": config.StatusOK, 22 | "msg": "", 23 | "data": config.JsonData.Collector, 24 | }) 25 | } 26 | 27 | func PublishSettingApi(ctx iris.Context) { 28 | ctx.JSON(iris.Map{ 29 | "code": config.StatusOK, 30 | "msg": "", 31 | "data": config.JsonData.Content, 32 | }) 33 | } 34 | 35 | func DefaultSettingForm(ctx iris.Context) { 36 | var req request.DefaultSetting 37 | if err := ctx.ReadForm(&req); err != nil { 38 | ctx.JSON(iris.Map{ 39 | "code": config.StatusFailed, 40 | "msg": err.Error(), 41 | }) 42 | return 43 | } 44 | 45 | config.JsonData.Collector.ErrorTimes = req.ErrorTimes 46 | config.JsonData.Collector.Channels = req.Channels 47 | config.JsonData.Collector.TitleMinLength = req.TitleMinLength 48 | config.JsonData.Collector.ContentMinLength = req.ContentMinLength 49 | config.JsonData.Collector.TitleExclude = req.TitleExclude 50 | config.JsonData.Collector.TitleExcludePrefix = req.TitleExcludePrefix 51 | config.JsonData.Collector.TitleExcludeSuffix = req.TitleExcludeSuffix 52 | config.JsonData.Collector.ContentExclude = req.ContentExclude 53 | config.JsonData.Collector.ContentExcludeLine = req.ContentExcludeLine 54 | 55 | err := config.WriteConfig() 56 | if err != nil { 57 | ctx.JSON(iris.Map{ 58 | "code": config.StatusFailed, 59 | "msg": err.Error(), 60 | }) 61 | return 62 | } 63 | 64 | config.InitJSON() 65 | 66 | ctx.JSON(iris.Map{ 67 | "code": config.StatusOK, 68 | "msg": "配置成功", 69 | }) 70 | } 71 | 72 | func PublishSettingForm(ctx iris.Context) { 73 | var req request.ContentSetting 74 | if err := ctx.ReadForm(&req); err != nil { 75 | ctx.JSON(iris.Map{ 76 | "code": config.StatusFailed, 77 | "msg": err.Error(), 78 | }) 79 | return 80 | } 81 | 82 | config.JsonData.Content.AutoPublish = req.AutoPublish 83 | config.JsonData.Content.TableName = req.TableName 84 | config.JsonData.Content.IdField = req.IdField 85 | config.JsonData.Content.TitleField = req.TitleField 86 | config.JsonData.Content.CreatedTimeField = req.CreatedTimeField 87 | config.JsonData.Content.KeywordsField = req.KeywordsField 88 | config.JsonData.Content.DescriptionField = req.DescriptionField 89 | config.JsonData.Content.ContentTableName = req.ContentTableName 90 | config.JsonData.Content.ContentIdField = req.ContentIdField 91 | config.JsonData.Content.ContentField = req.ContentField 92 | config.JsonData.Content.RemoteUrl = req.RemoteUrl 93 | config.JsonData.Content.ContentType = req.ContentType 94 | 95 | var headers []config.KeyValue 96 | for _, v := range req.Headers { 97 | vv := strings.Split(v, ":") 98 | if len(vv) >= 2 { 99 | headers = append(headers, config.KeyValue{ 100 | Key: vv[0], 101 | Value: vv[1], 102 | }) 103 | } 104 | } 105 | var cookies []config.KeyValue 106 | for _, v := range req.Cookies { 107 | vv := strings.Split(v, ":") 108 | if len(vv) >= 2 { 109 | cookies = append(cookies, config.KeyValue{ 110 | Key: vv[0], 111 | Value: vv[1], 112 | }) 113 | } 114 | } 115 | var extraFields []config.KeyValue 116 | for _, v := range req.ExtraFields { 117 | vv := strings.Split(v, ":") 118 | if len(vv) >= 2 { 119 | extraFields = append(extraFields, config.KeyValue{ 120 | Key: vv[0], 121 | Value: vv[1], 122 | }) 123 | } 124 | } 125 | 126 | config.JsonData.Content.Headers = headers 127 | config.JsonData.Content.Cookies = cookies 128 | config.JsonData.Content.ExtraFields = extraFields 129 | 130 | err := config.WriteConfig() 131 | if err != nil { 132 | ctx.JSON(iris.Map{ 133 | "code": config.StatusFailed, 134 | "msg": err.Error(), 135 | }) 136 | return 137 | } 138 | 139 | config.InitJSON() 140 | 141 | ctx.JSON(iris.Map{ 142 | "code": config.StatusOK, 143 | "msg": "配置成功", 144 | }) 145 | } 146 | -------------------------------------------------------------------------------- /app/provider/article.go: -------------------------------------------------------------------------------- 1 | package provider 2 | 3 | import ( 4 | "collector/core" 5 | "collector/services" 6 | ) 7 | 8 | func GetArticleSourceList(currentPage int, pageSize int) ([]core.ArticleSource, int, error) { 9 | var sources []core.ArticleSource 10 | offset := (currentPage - 1) * pageSize 11 | var total int 12 | 13 | builder := services.DB.Model(core.ArticleSource{}).Order("id desc") 14 | if err := builder.Count(&total).Limit(pageSize).Offset(offset).Find(&sources).Error; err != nil { 15 | return nil, 0, err 16 | } 17 | 18 | return sources, total, nil 19 | } 20 | 21 | func GetArticleList(currentPage int, pageSize int) ([]core.Article, int, error) { 22 | var articles []core.Article 23 | offset := (currentPage - 1) * pageSize 24 | var total int 25 | 26 | builder := services.DB.Model(core.Article{}).Order("id desc") 27 | if err := builder.Count(&total).Limit(pageSize).Offset(offset).Find(&articles).Error; err != nil { 28 | return nil, 0, err 29 | } 30 | if len(articles) > 0 { 31 | for i, v := range articles { 32 | var articleData core.ArticleData 33 | if err := services.DB.Model(core.ArticleData{}).Where("`id` = ?", v.Id).First(&articleData).Error; err == nil { 34 | articles[i].Content = articleData.Content 35 | } 36 | } 37 | } 38 | return articles, total, nil 39 | } 40 | 41 | func GetArticleById(id int) (*core.Article, error) { 42 | var article core.Article 43 | if err := services.DB.Model(core.Article{}).Where("`id` = ?", id).First(&article).Error; err != nil { 44 | return nil, err 45 | } 46 | var articleData core.ArticleData 47 | if err := services.DB.Model(core.ArticleData{}).Where("`id` = ?", id).First(&articleData).Error; err != nil { 48 | return nil, err 49 | } 50 | article.Content = articleData.Content 51 | 52 | return &article, nil 53 | } 54 | 55 | func GetArticleSourceById(id int) (*core.ArticleSource, error) { 56 | var source core.ArticleSource 57 | if err := services.DB.Model(core.ArticleSource{}).Where("`id` = ?", id).First(&source).Error; err != nil { 58 | return nil, err 59 | } 60 | 61 | return &source, nil 62 | } 63 | 64 | func GetArticleSourceByUrl(uri string) (*core.ArticleSource, error) { 65 | var source core.ArticleSource 66 | if err := services.DB.Model(core.ArticleSource{}).Where("`url` = ?", uri).First(&source).Error; err != nil { 67 | return nil, err 68 | } 69 | 70 | return &source, nil 71 | } 72 | -------------------------------------------------------------------------------- /app/request/articleSource.go: -------------------------------------------------------------------------------- 1 | package request 2 | 3 | type ArticleSource struct { 4 | ID int `form:"id"` 5 | Url string `form:"url" validate:"required"` 6 | ErrorTimes int `form:"error_times"` 7 | UrlType int `form:"url_type"` 8 | } 9 | 10 | type Article struct { 11 | ID int `form:"id"` 12 | } 13 | -------------------------------------------------------------------------------- /app/request/install.go: -------------------------------------------------------------------------------- 1 | package request 2 | 3 | type Install struct { 4 | Database string `form:"database" validate:"required"` 5 | User string `form:"user" validate:"required"` 6 | Password string `form:"password" validate:"required"` 7 | Host string `form:"host" validate:"required"` 8 | Port int `form:"port" validate:"required"` 9 | Charset string `form:"charset"` 10 | } 11 | -------------------------------------------------------------------------------- /app/request/setting.go: -------------------------------------------------------------------------------- 1 | package request 2 | 3 | type DefaultSetting struct { 4 | ErrorTimes int `form:"error_times"` 5 | Channels int `form:"channels"` 6 | TitleMinLength int `form:"title_min_length"` 7 | ContentMinLength int `form:"content_min_length"` 8 | TitleExclude []string `form:"title_exclude[]"` 9 | TitleExcludePrefix []string `form:"title_exclude_prefix[]"` 10 | TitleExcludeSuffix []string `form:"title_exclude_suffix[]"` 11 | ContentExclude []string `form:"content_exclude[]"` 12 | ContentExcludeLine []string `form:"content_exclude_line[]"` 13 | } 14 | 15 | type ContentSetting struct { 16 | AutoPublish int `form:"auto_publish"` 17 | TableName string `form:"table_name"` 18 | IdField string `form:"id_field"` 19 | TitleField string `form:"title_field"` 20 | CreatedTimeField string `form:"created_time_field"` 21 | KeywordsField string `form:"keywords_field"` 22 | DescriptionField string `form:"description_field"` 23 | AuthorField string `form:"author_field"` 24 | ViewsField string `form:"views_field"` 25 | ContentTableName string `form:"content_table_name"` 26 | ContentIdField string `form:"content_id_field"` 27 | ContentField string `form:"content_field"` 28 | RemoteUrl string `form:"remote_url"` 29 | ContentType string `form:"content_type"` 30 | Headers []string `form:"headers[]"` 31 | Cookies []string `form:"cookies[]"` 32 | ExtraFields []string `form:"extra_fields[]"` 33 | } 34 | -------------------------------------------------------------------------------- /app/route/base.go: -------------------------------------------------------------------------------- 1 | package route 2 | 3 | import ( 4 | "collector/app/handler" 5 | "collector/config" 6 | "fmt" 7 | "github.com/kataras/iris/v12" 8 | ) 9 | 10 | func Register(app *iris.Application) { 11 | app.Use(Cors) 12 | 13 | app.OnErrorCode(iris.StatusNotFound, handler.NotFound) 14 | app.OnErrorCode(iris.StatusInternalServerError, handler.NotFound) 15 | 16 | app.HandleDir("/", fmt.Sprintf("%spublic", config.ExecPath)) 17 | app.Get("/", handler.Inspect, handler.Index) 18 | 19 | app.Get("/install", handler.Install) 20 | app.Post("/install", handler.InstallForm) 21 | 22 | app.Get("/source", handler.Inspect, handler.ArticleSource) 23 | app.Get("/article", handler.Inspect, handler.ArticleList) 24 | app.Get("/keywords", handler.Inspect, handler.Keywords) 25 | app.Get("/setting", handler.Inspect, handler.DefaultSetting) 26 | app.Get("/publish", handler.Inspect, handler.PublishSetting) 27 | 28 | app.Post("/setting", handler.InspectJson, handler.DefaultSettingForm) 29 | app.Post("/publish", handler.InspectJson, handler.PublishSettingForm) 30 | 31 | api := app.Party("/api", handler.InspectJson) 32 | { 33 | api.Get("/index/echarts", handler.IndexEchartsApi) 34 | 35 | api.Get("/article/list", handler.ArticleListApi) 36 | api.Post("/article/delete", handler.ArticleDeleteApi) 37 | api.Post("/article/publish", handler.ArticlePublishApi) 38 | api.Post("/article/catch", handler.ArticleCatchApi) 39 | 40 | api.Get("/article/source/list", handler.ArticleSourceListApi) 41 | api.Post("/article/source/delete", handler.ArticleSourceDeleteApi) 42 | api.Post("/article/source/save", handler.ArticleSourceSaveApi) 43 | api.Post("/article/source/catch", handler.ArticleSourceCatchApi) 44 | api.Get("/setting", handler.DefaultSettingApi) 45 | api.Get("/publish", handler.PublishSettingApi) 46 | } 47 | } 48 | 49 | func Cors(ctx iris.Context) { 50 | ctx.Header("Access-Control-Allow-Origin", "*") 51 | if ctx.Request().Method == "OPTIONS" { 52 | ctx.Header("Access-Control-Allow-Methods", "GET,POST,PUT,DELETE,PATCH,OPTIONS") 53 | ctx.Header("Access-Control-Allow-Headers", "Content-Type, Api, Accept, Authorization, Version, Token") 54 | ctx.StatusCode(204) 55 | return 56 | } 57 | ctx.Next() 58 | } 59 | -------------------------------------------------------------------------------- /config.dist.json: -------------------------------------------------------------------------------- 1 | { 2 | "mysql": { 3 | "database": "", 4 | "user": "", 5 | "password": "", 6 | "host": "localhost", 7 | "port": 3306, 8 | "charset": "utf8mb4", 9 | "max_idle_connections": 1000, 10 | "max_open_connections": 100000 11 | }, 12 | "server": { 13 | "site_name": "万能采集器", 14 | "host": "localhost", 15 | "env": "production", 16 | "log_level": "error", 17 | "port": 8088 18 | }, 19 | "collector": { 20 | "error_times": 5, 21 | "channels": 5, 22 | "title_min_length": 6, 23 | "content_min_length": 200, 24 | "title_exclude": [ 25 | "关于我们" 26 | ], 27 | "title_exclude_prefix": [ 28 | "NotFound" 29 | ], 30 | "title_exclude_suffix": [ 31 | "网站" 32 | ], 33 | "content_exclude": [ 34 | "版权声明" 35 | ], 36 | "content_exclude_line": [ 37 | "更多精彩", 38 | "字体" 39 | ] 40 | }, 41 | "content": { 42 | "auto_publish": 0, 43 | "table_name": "fe_new_article", 44 | "id_field": "id", 45 | "title_field": "title", 46 | "created_time_field": "add_time", 47 | "keywords_field": "keywords", 48 | "description_field": "description", 49 | "author_field": "", 50 | "views_field": "", 51 | "content_table_name": "", 52 | "content_id_field": "id", 53 | "content_field": "content", 54 | "remote_url": "", 55 | "content_type": "urlencode", 56 | "headers": null, 57 | "cookies": null, 58 | "extra_fields": null 59 | } 60 | } -------------------------------------------------------------------------------- /config/collector.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | type collectorConfig struct { 4 | ErrorTimes int `json:"error_times"` 5 | Channels int `json:"channels"` 6 | TitleMinLength int `json:"title_min_length"` 7 | ContentMinLength int `json:"content_min_length"` 8 | TitleExclude []string `json:"title_exclude"` 9 | TitleExcludePrefix []string `json:"title_exclude_prefix"` 10 | TitleExcludeSuffix []string `json:"title_exclude_suffix"` 11 | ContentExclude []string `json:"content_exclude"` 12 | ContentExcludeLine []string `json:"content_exclude_line"` 13 | } 14 | 15 | var defaultCollectorConfig = collectorConfig{ 16 | ErrorTimes: 5, 17 | Channels: 5, 18 | TitleMinLength: 6, 19 | ContentMinLength: 200, 20 | TitleExclude: []string{ 21 | "法律声明", 22 | "站点地图", 23 | "区长信箱", 24 | "政务服务", 25 | "政务公开", 26 | "领导介绍", 27 | "首页", 28 | "当前页", 29 | "当前位置", 30 | "来源:", 31 | "点击:", 32 | "关注我们", 33 | "浏览次数", 34 | "信息分类", 35 | "索引号", 36 | }, 37 | TitleExcludePrefix: []string{ 38 | "404", 39 | "403", 40 | }, 41 | TitleExcludeSuffix: []string{ 42 | "网", 43 | "政府", 44 | "门户", 45 | }, 46 | ContentExclude: []string{ 47 | "版权声明", 48 | }, 49 | ContentExcludeLine: []string{ 50 | "背景色:", 51 | "时间:", 52 | "作者:", 53 | "来源:", 54 | "编辑:", 55 | "时间:", 56 | "来源:", 57 | "作者:", 58 | "编辑:", 59 | "摄影:", 60 | "摄影:", 61 | "官方微信", 62 | "一篇:", 63 | "相关附件", 64 | "qrcode", 65 | "微信扫一扫", 66 | "用手机浏览", 67 | "打印正文", 68 | "浏览次数", 69 | }, 70 | } 71 | -------------------------------------------------------------------------------- /config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "os" 10 | "path/filepath" 11 | "regexp" 12 | "strings" 13 | "unicode/utf8" 14 | ) 15 | 16 | type configData struct { 17 | MySQL mySQLConfig `json:"mysql"` 18 | Server serverConfig `json:"server"` 19 | Collector collectorConfig `json:"collector"` 20 | Content contentConfig `json:"content"` 21 | } 22 | 23 | var ExecPath string 24 | 25 | func InitJSON() { 26 | sep := string(os.PathSeparator) 27 | root := filepath.Dir(os.Args[0]) 28 | ExecPath, _ = filepath.Abs(root) 29 | if strings.Contains(ExecPath, "/T") || strings.Contains(ExecPath, "temp") { 30 | ExecPath, _ = os.Getwd() 31 | } 32 | length := utf8.RuneCountInString(ExecPath) 33 | lastChar := ExecPath[length-1:] 34 | if lastChar != sep { 35 | ExecPath = ExecPath + sep 36 | } 37 | 38 | //生成public目录 39 | _, err := os.Stat(ExecPath + "public") 40 | if err != nil && os.IsNotExist(err) { 41 | err = os.Mkdir(ExecPath+"public", os.ModePerm) 42 | if err != nil { 43 | fmt.Println("无法创建public目录: ", err.Error()) 44 | os.Exit(-1) 45 | } 46 | } 47 | 48 | buf, err := ioutil.ReadFile(fmt.Sprintf("%sconfig.json", ExecPath)) 49 | configStr := "" 50 | if err != nil { 51 | //文件不存在 52 | fmt.Println("根目录下不存在配置文件config.json") 53 | os.Exit(-1) 54 | } 55 | configStr = string(buf[:]) 56 | reg := regexp.MustCompile(`/\*.*\*/`) 57 | 58 | configStr = reg.ReplaceAllString(configStr, "") 59 | buf = []byte(configStr) 60 | 61 | if err := json.Unmarshal(buf, &JsonData); err != nil { 62 | fmt.Println("配置文件格式有误: ", err.Error()) 63 | os.Exit(-1) 64 | } 65 | 66 | //load Mysql 67 | MySQLConfig = JsonData.MySQL 68 | if MySQLConfig.Database != "" { 69 | url := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=%s&parseTime=True&loc=Local", 70 | MySQLConfig.User, MySQLConfig.Password, MySQLConfig.Host, MySQLConfig.Port, MySQLConfig.Database, MySQLConfig.Charset) 71 | MySQLConfig.Url = url 72 | } 73 | 74 | //load server 75 | ServerConfig = JsonData.Server 76 | ServerConfig.ExecPath = ExecPath 77 | 78 | //load collector 79 | CollectorConfig = loadCollectorConfig(JsonData.Collector) 80 | 81 | //load content 82 | ContentConfig = JsonData.Content 83 | } 84 | 85 | var JsonData configData 86 | var MySQLConfig mySQLConfig 87 | var ServerConfig serverConfig 88 | var CollectorConfig collectorConfig 89 | var ContentConfig contentConfig 90 | 91 | func init() { 92 | InitJSON() 93 | } 94 | 95 | func loadCollectorConfig(collector collectorConfig) collectorConfig { 96 | if collector.ErrorTimes == 0 { 97 | collector.ErrorTimes = defaultCollectorConfig.ErrorTimes 98 | } 99 | if collector.Channels == 0 { 100 | collector.Channels = defaultCollectorConfig.Channels 101 | } 102 | if collector.TitleMinLength == 0 { 103 | collector.TitleMinLength = defaultCollectorConfig.TitleMinLength 104 | } 105 | if collector.ContentMinLength == 0 { 106 | collector.ContentMinLength = defaultCollectorConfig.ContentMinLength 107 | } 108 | for _, v := range defaultCollectorConfig.TitleExclude { 109 | exists := false 110 | for _, vv := range collector.TitleExclude { 111 | if vv == v { 112 | exists = true 113 | } 114 | } 115 | if !exists { 116 | collector.TitleExclude = append(collector.TitleExclude, v) 117 | } 118 | } 119 | for _, v := range defaultCollectorConfig.TitleExcludePrefix { 120 | exists := false 121 | for _, vv := range collector.TitleExcludePrefix { 122 | if vv == v { 123 | exists = true 124 | } 125 | } 126 | if !exists { 127 | collector.TitleExcludePrefix = append(collector.TitleExcludePrefix, v) 128 | } 129 | } 130 | for _, v := range defaultCollectorConfig.TitleExcludeSuffix { 131 | exists := false 132 | for _, vv := range collector.TitleExcludeSuffix { 133 | if vv == v { 134 | exists = true 135 | } 136 | } 137 | if !exists { 138 | collector.TitleExcludeSuffix = append(collector.TitleExcludeSuffix, v) 139 | } 140 | } 141 | for _, v := range defaultCollectorConfig.ContentExclude { 142 | exists := false 143 | for _, vv := range collector.ContentExclude { 144 | if vv == v { 145 | exists = true 146 | } 147 | } 148 | if !exists { 149 | collector.ContentExclude = append(collector.ContentExclude, v) 150 | } 151 | } 152 | for _, v := range defaultCollectorConfig.ContentExcludeLine { 153 | exists := false 154 | for _, vv := range collector.ContentExcludeLine { 155 | if vv == v { 156 | exists = true 157 | } 158 | } 159 | if !exists { 160 | collector.ContentExcludeLine = append(collector.ContentExcludeLine, v) 161 | } 162 | } 163 | 164 | return collector 165 | } 166 | 167 | func WriteConfig() error { 168 | //将现有配置写回文件 169 | configFile, err := os.OpenFile(fmt.Sprintf("%sconfig.json", ExecPath), os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666) 170 | if err != nil { 171 | return err 172 | } 173 | 174 | defer configFile.Close() 175 | 176 | buff := &bytes.Buffer{} 177 | 178 | buf, err := json.MarshalIndent(JsonData, "", "\t") 179 | if err != nil { 180 | return err 181 | } 182 | buff.Write(buf) 183 | 184 | _, err = io.Copy(configFile, buff) 185 | if err != nil { 186 | return err 187 | } 188 | 189 | return nil 190 | } 191 | -------------------------------------------------------------------------------- /config/constant.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | const StatusOK = 0 4 | const StatusFailed = -1 5 | -------------------------------------------------------------------------------- /config/content.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | type KeyValue struct { 4 | Key string `json:"key" form:"key"` 5 | Value string `json:"value" form:"value"` 6 | } 7 | 8 | type contentConfig struct { 9 | AutoPublish int `json:"auto_publish"` 10 | TableName string `json:"table_name"` 11 | IdField string `json:"id_field"` 12 | TitleField string `json:"title_field"` 13 | CreatedTimeField string `json:"created_time_field"` 14 | KeywordsField string `json:"keywords_field"` 15 | DescriptionField string `json:"description_field"` 16 | AuthorField string `json:"author_field"` 17 | ViewsField string `json:"views_field"` 18 | ContentTableName string `json:"content_table_name"` 19 | ContentIdField string `json:"content_id_field"` 20 | ContentField string `json:"content_field"` 21 | RemoteUrl string `json:"remote_url"` 22 | ContentType string `json:"content_type"` 23 | Headers []KeyValue `json:"headers"` 24 | Cookies []KeyValue `json:"cookies"` 25 | ExtraFields []KeyValue `json:"extra_fields"` 26 | } 27 | -------------------------------------------------------------------------------- /config/mysql.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | type mySQLConfig struct { 4 | Database string `json:"database"` 5 | User string `json:"user"` 6 | Password string `json:"password"` 7 | Host string `json:"host"` 8 | Port int `json:"port"` 9 | Charset string `json:"charset"` 10 | MaxIdleConnections int `json:"max_idle_connections"` 11 | MaxOpenConnections int `json:"max_open_connections"` 12 | Url string `json:"-"` 13 | } 14 | -------------------------------------------------------------------------------- /config/server.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | type serverConfig struct { 4 | SiteName string `json:"site_name"` 5 | Host string `json:"host"` 6 | Env string `json:"env"` 7 | LogLevel string `json:"log_level"` 8 | Port int `json:"port"` 9 | ExecPath string `json:"-"` 10 | } 11 | -------------------------------------------------------------------------------- /core/article.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "collector/services" 5 | "github.com/jinzhu/gorm" 6 | "time" 7 | ) 8 | 9 | type Article struct { 10 | Id int `json:"id" gorm:"column:id;type:int(10) unsigned not null AUTO_INCREMENT;primary_key"` 11 | SourceId int `json:"source_id" gorm:"column:source_id;type:int(11) not null;default:0"` 12 | Title string `json:"title" gorm:"column:title;type:varchar(190) not null;default:'';index:idx_title"` 13 | Keywords string `json:"keywords" gorm:"column:keywords;type:varchar(250) not null;default:''"` 14 | Description string `json:"description" gorm:"column:description;type:varchar(250) not null;default:''"` 15 | Content string `json:"content" gorm:"-"` 16 | ArticleType int `json:"article_type" gorm:"column:article_type;type:tinyint(1) unsigned not null;default:0;index:idx_article_type"` 17 | OriginUrl string `json:"origin_url" gorm:"column:origin_url;type:varchar(250) not null;default:'';index:idx_origin_url"` 18 | Author string `json:"author" gorm:"column:author;type:varchar(100) not null;default:''"` 19 | Views int `json:"views" gorm:"column:views;type:int(10) not null;default:0;index:idx_views"` 20 | Status int `json:"status" gorm:"column:status;type:tinyint(1) unsigned not null;default:0;index:idx_status"` 21 | CreatedTime int `json:"created_time" gorm:"column:created_time;type:int(11) unsigned not null;default:0;index:idx_created_time"` 22 | UpdatedTime int `json:"updated_time" gorm:"column:updated_time;type:int(11) unsigned not null;default:0;index:idx_updated_time"` 23 | DeletedTime int `json:"-" gorm:"column:deleted_time;type:int(11) unsigned not null;default:0"` 24 | OriginDomain string `json:"-" gorm:"-"` 25 | OriginPath string `json:"-" gorm:"-"` 26 | ContentText string `json:"-" gorm:"-"` 27 | PubDate string `json:"-" gorm:"-"` 28 | } 29 | 30 | type ArticleData struct { 31 | Id int `json:"id" gorm:"column:id;type:int(10) ;unsigned not null AUTO_INCREMENT;primary_key"` 32 | Content string `json:"content" gorm:"column:content;type:longtext;not null;default:''"` 33 | } 34 | 35 | type ArticleSource struct { 36 | Id int `json:"id" gorm:"column:id;type:int(10) unsigned not null AUTO_INCREMENT;primary_key"` 37 | Url string `json:"url" gorm:"column:url;type:varchar(190) not null;default:'';index:idx_url"` 38 | UrlType int `json:"url_type" gorm:"column:url_type;type:tinyint(1) not null;default:0"` 39 | ErrorTimes int `json:"error_times" gorm:"column:error_times;type:int(10) not null;default:0;index:idx_error_times"` 40 | } 41 | 42 | func (article *Article) Save(db *gorm.DB) error { 43 | if article.Id == 0 { 44 | article.CreatedTime = int(time.Now().Unix()) 45 | } 46 | 47 | if err := db.Save(article).Error; err != nil { 48 | return err 49 | } 50 | articleData := ArticleData{ 51 | Id: article.Id, 52 | Content: article.Content, 53 | } 54 | db.Save(&articleData) 55 | 56 | return nil 57 | } 58 | 59 | func (article *Article) Delete() error { 60 | db := services.DB 61 | if err := db.Delete(article).Error; err != nil { 62 | return err 63 | } 64 | 65 | db.Where("id = ?", article.Id).Delete(ArticleData{}) 66 | 67 | return nil 68 | } 69 | 70 | func (source *ArticleSource) Save() error { 71 | db := services.DB 72 | if err := db.Save(source).Error; err != nil { 73 | return err 74 | } 75 | 76 | return nil 77 | } 78 | 79 | func (source *ArticleSource) Delete() error { 80 | db := services.DB 81 | if err := db.Delete(source).Error; err != nil { 82 | return err 83 | } 84 | 85 | return nil 86 | } 87 | 88 | func AutoMigrateDB() { 89 | if services.DB != nil { 90 | //自动迁移数据库 91 | services.DB.AutoMigrate( 92 | &Article{}, 93 | &ArticleData{}, 94 | &ArticleSource{}, 95 | ) 96 | } 97 | } -------------------------------------------------------------------------------- /core/collector.go: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import ( 4 | "collector/config" 5 | "collector/services" 6 | "fmt" 7 | "github.com/Chain-Zhang/pinyin" 8 | "github.com/PuerkitoBio/goquery" 9 | "github.com/fesiong/goproject/convert" 10 | "github.com/parnurzeal/gorequest" 11 | "github.com/polaris1119/keyword" 12 | "github.com/robfig/cron/v3" 13 | "log" 14 | "net/http" 15 | "net/url" 16 | "path/filepath" 17 | "regexp" 18 | "strconv" 19 | "strings" 20 | "sync" 21 | "time" 22 | "unicode/utf8" 23 | ) 24 | 25 | var waitGroup sync.WaitGroup 26 | var ch chan string 27 | 28 | func Crond() { 29 | //一次使用几个通道 30 | ch = make(chan string, config.CollectorConfig.Channels) 31 | 32 | fmt.Println("collection") 33 | 34 | //link := &Article{ 35 | // OriginUrl: "http://scitech.people.com.cn/n1/2020/0910/c1007-31856039.html", 36 | //} 37 | //CollectDetail(link) 38 | //log.Println(link.Title, "--------", link.Content) 39 | //var sources []ArticleSource 40 | //services.DB.Model(ArticleSource{}).Find(&sources) 41 | //for _, source := range sources { 42 | // waitGroup.Add(1) 43 | // link := &Article{ 44 | // OriginUrl: source.Url, 45 | // } 46 | // go CollectDetail(link) 47 | //} 48 | //waitGroup.Wait() 49 | //os.Exit(0) 50 | //1小时运行一次,采集地址,加入到地址池 51 | //每分钟运行一次,检查是否有需要采集的文章s 52 | crontab := cron.New(cron.WithSeconds()) 53 | //10分钟抓一次列表 54 | crontab.AddFunc("1 */10 * * * *", CollectListTask) 55 | //1分钟抓一次详情 56 | crontab.AddFunc("1 */1 * * * *", CollectDetailTask) 57 | crontab.Start() 58 | //启动的时候,先执行一遍 59 | go CollectListTask() 60 | go CollectDetailTask() 61 | } 62 | 63 | func CollectListTask() { 64 | if services.DB == nil { 65 | return 66 | } 67 | fmt.Println("collect list") 68 | db := services.DB 69 | var articleSources []ArticleSource 70 | err := db.Model(ArticleSource{}).Where("`error_times` < ?", config.CollectorConfig.ErrorTimes).Find(&articleSources).Error 71 | if err != nil { 72 | return 73 | } 74 | 75 | for _, v := range articleSources { 76 | //ch <- fmt.Sprintf("%d", i) 77 | //waitGroup.Add(1) 78 | getArticleLinks(v) 79 | } 80 | 81 | //waitGroup.Wait() 82 | } 83 | 84 | func CollectDetailTask() { 85 | if services.DB == nil { 86 | return 87 | } 88 | fmt.Println("collect detail") 89 | //检查article的地址 90 | var articleList []Article 91 | 92 | db := services.DB 93 | db.Debug().Model(Article{}).Where("status = 0").Order("id asc").Limit(config.CollectorConfig.Channels * 100).Scan(&articleList) 94 | for _, vv := range articleList { 95 | ch <- vv.OriginUrl 96 | waitGroup.Add(1) 97 | go getArticleDetail(vv) 98 | } 99 | 100 | waitGroup.Wait() 101 | } 102 | 103 | func getArticleLinks(v ArticleSource) { 104 | //defer func() { 105 | // waitGroup.Done() 106 | // <-ch 107 | //}() 108 | GetArticleLinks(&v) 109 | } 110 | 111 | func GetArticleLinks(v *ArticleSource) { 112 | //defer func() { 113 | // waitGroup.Done() 114 | // <-ch 115 | //}() 116 | db := services.DB 117 | articleList, err := CollectLinks(v.Url) 118 | if err == nil { 119 | for _, article := range articleList { 120 | //先检查数据库里有没有,没有的话,就抓回来 121 | article.CreatedTime = int(time.Now().Unix()) 122 | article.SourceId = v.Id 123 | article.ArticleType = v.UrlType 124 | article.Status = 0 125 | db.Model(Article{}).Where(Article{OriginUrl: article.OriginUrl}).FirstOrCreate(&article) 126 | } 127 | } else { 128 | db.Model(v).Update("error_times", v.ErrorTimes+1) 129 | } 130 | } 131 | 132 | func getArticleDetail(v Article) { 133 | defer func() { 134 | waitGroup.Done() 135 | <-ch 136 | }() 137 | 138 | GetArticleDetail(&v) 139 | } 140 | 141 | func GetArticleDetail(v *Article) { 142 | db := services.DB 143 | //标记当前为执行中 144 | db.Model(Article{}).Where("`id` = ?", v.Id).Update("status", 2) 145 | 146 | _ = CollectDetail(v) 147 | 148 | //更新到数据库中 149 | status := int(1) 150 | if v.Content == "" { 151 | status = 3 152 | } 153 | if utf8.RuneCountInString(v.Title) < config.CollectorConfig.TitleMinLength { 154 | status = 3 155 | } 156 | urlArr := strings.Split(v.OriginUrl, "/") 157 | if len(urlArr) <= 3 { 158 | status = 3 159 | } 160 | if len(urlArr) <= 4 && strings.HasPrefix(v.OriginUrl, "/") { 161 | status = 3 162 | } 163 | 164 | if strings.Contains(v.Title, "法律声明") || strings.Contains(v.Title, "关于我们") || strings.Contains(v.Title, "站点地图") || strings.Contains(v.Title, "区长信箱") || strings.Contains(v.Title, "政务服务网") || strings.Contains(v.Title, "政务公开") || strings.Contains(v.Title, "人民政府网站") || strings.Contains(v.Title, "门户网站") || strings.Contains(v.Title, "领导介绍") || strings.Contains(v.Title, "403") || strings.Contains(v.Title, "404") || strings.Contains(v.Title, "Government") || strings.Contains(v.Title, "China") { 165 | status = 3 166 | } 167 | //小于500字 内容,不过审 168 | if utf8.RuneCountInString(v.ContentText) < config.CollectorConfig.ContentMinLength { 169 | status = 3 170 | } 171 | if strings.Contains(v.ContentText, "ICP备") || strings.Contains(v.ContentText, "政府网站标识码") || strings.Contains(v.ContentText, "以上版本浏览本站") || strings.Contains(v.ContentText, "版权声明") || strings.Contains(v.ContentText, "公网安备") { 172 | status = 3 173 | } 174 | 175 | db.Model(Article{}).Where("`id` = ?", v.Id).Update("status", status) 176 | 177 | timeTemplate1 := "2006-01-02 15:04:05" 178 | timestamp := int(time.Now().Unix()) 179 | pubTime, _ := time.ParseInLocation(timeTemplate1, v.PubDate, time.Local) 180 | if pubTime.Unix() > 0 { 181 | timestamp = int(pubTime.Unix()) 182 | } 183 | 184 | v.UpdatedTime = int(time.Now().Unix()) 185 | v.CreatedTime = timestamp 186 | v.Status = status 187 | 188 | article := v 189 | fmt.Println(status, v.Title, v.OriginUrl) 190 | article.Save(db) 191 | 192 | AutoPublish(article) 193 | } 194 | 195 | func AutoPublish(article *Article) { 196 | if config.ContentConfig.AutoPublish == 0 || article.Status != 1 { 197 | return 198 | } 199 | publishData := map[string]string{ 200 | config.ContentConfig.TitleField: article.Title, 201 | } 202 | if config.ContentConfig.KeywordsField != "" { 203 | publishData[config.ContentConfig.KeywordsField] = article.Keywords 204 | } 205 | if config.ContentConfig.DescriptionField != "" { 206 | publishData[config.ContentConfig.DescriptionField] = article.Description 207 | } 208 | if config.ContentConfig.CreatedTimeField != "" { 209 | publishData[config.ContentConfig.CreatedTimeField] = strconv.Itoa(article.CreatedTime) 210 | } 211 | if config.ContentConfig.AuthorField != "" { 212 | publishData[config.ContentConfig.AuthorField] = article.Author 213 | } 214 | if config.ContentConfig.ViewsField != "" { 215 | publishData[config.ContentConfig.ViewsField] = strconv.Itoa(article.Views) 216 | } 217 | if config.ContentConfig.TableName == config.ContentConfig.ContentTableName || config.ContentConfig.ContentTableName == "" || config.ContentConfig.AutoPublish == 2 { 218 | if config.ContentConfig.ContentField != "" { 219 | publishData[config.ContentConfig.ContentField] = article.Content 220 | } 221 | } 222 | if len(config.ContentConfig.ExtraFields) > 0 { 223 | for _, v := range config.ContentConfig.ExtraFields { 224 | value := v.Value 225 | if v.Value == "{id}" { 226 | //获取id 227 | value = strconv.Itoa(article.Id) 228 | } else if v.Value == "{py}" { 229 | //获取标题首字母 230 | str, err := pinyin.New(article.Title).Split("-").Mode(pinyin.WithoutTone).Convert() 231 | if err == nil { 232 | value = "" 233 | strArr := strings.Split(str, "-") 234 | for _, v := range strArr { 235 | value += string(v[0]) 236 | } 237 | } 238 | } else if v.Value == "{pinyin}" { 239 | //获取标题拼音 240 | str, err := pinyin.New(article.Title).Split("").Mode(pinyin.WithoutTone).Convert() 241 | if err == nil { 242 | value = str 243 | } 244 | } else if v.Value == "{time}" { 245 | //获取标题首字母 246 | value = strconv.Itoa(int(time.Now().Unix())) 247 | } else if v.Value == "{date}" { 248 | //获取标题首字母 249 | value = time.Now().Format("2006-01-02") 250 | } 251 | publishData[v.Key] = value 252 | } 253 | } 254 | 255 | if config.ContentConfig.AutoPublish == 1 { 256 | //本地发布 257 | publishDataKeys := make([]string, len(publishData)) 258 | publishDataValues := make([]string, len(publishData)) 259 | j := 0 260 | for k, v := range publishData { 261 | publishDataKeys[j] = k 262 | publishDataValues[j] = fmt.Sprintf("'%s'", v) 263 | j++ 264 | } 265 | 266 | insertId := int64(0) 267 | result, err := services.DB.DB().Exec(fmt.Sprintf("INSERT INTO `%s` (%s)VALUES(%s)", config.ContentConfig.TableName, strings.Join(publishDataKeys, ","), strings.Join(publishDataValues, ","))) 268 | if err == nil { 269 | insertId, err = result.LastInsertId() 270 | if config.ContentConfig.ContentTableName != "" && config.ContentConfig.TableName != config.ContentConfig.ContentTableName { 271 | services.DB.Exec(fmt.Sprintf("INSERT INTO `%s` (%s, %s)VALUES(?, ?)", config.ContentConfig.ContentTableName, config.ContentConfig.ContentIdField, config.ContentConfig.ContentField), insertId, article.Content) 272 | } 273 | } 274 | } else if config.ContentConfig.AutoPublish == 2 && config.ContentConfig.RemoteUrl != "" { 275 | //headers 276 | sg := gorequest.New().Timeout(10 * time.Second).Post(config.ContentConfig.RemoteUrl) 277 | if config.ContentConfig.ContentType == "json" { 278 | sg = sg.Set("Content-Type", "multipart/form-data") 279 | } else if config.ContentConfig.ContentType == "urlencode" { 280 | sg = sg.Set("Content-Type", "application/x-www-form-urlencoded") 281 | } else { 282 | sg = sg.Set("Content-Type", "application/json") 283 | } 284 | if len(config.ContentConfig.Headers) > 0 { 285 | for _, v := range config.ContentConfig.Headers { 286 | sg = sg.Set(v.Key, v.Value) 287 | } 288 | } 289 | if len(config.ContentConfig.Cookies) > 0 { 290 | urlInfo, _ := url.Parse(config.ContentConfig.RemoteUrl) 291 | for _, v := range config.ContentConfig.Cookies { 292 | cookie := &http.Cookie{ 293 | Name: v.Key, 294 | Value: v.Value, 295 | Path: "/", 296 | Domain: urlInfo.Hostname(), 297 | Expires: time.Now().Add(86400 * time.Second), 298 | } 299 | sg = sg.AddCookie(cookie) 300 | } 301 | } 302 | //不接收处理结果 303 | resp, _, errs := sg.SendMap(publishData).End() 304 | if len(errs) > 0 { 305 | fmt.Println(errs) 306 | return 307 | } 308 | defer resp.Body.Close() 309 | log.Println(resp.Status) 310 | } 311 | } 312 | 313 | func CollectLinks(link string) ([]Article, error) { 314 | requestData, err := convert.Request(link) 315 | if err != nil { 316 | log.Println(err) 317 | return nil, err 318 | } 319 | 320 | htmlR := strings.NewReader(requestData.Body) 321 | doc, err := goquery.NewDocumentFromReader(htmlR) 322 | if err != nil { 323 | return nil, err 324 | } 325 | 326 | var articles []Article 327 | aLinks := doc.Find("a") 328 | //读取所有连接 329 | for i := range aLinks.Nodes { 330 | href, exists := aLinks.Eq(i).Attr("href") 331 | title := strings.TrimSpace(aLinks.Eq(i).Text()) 332 | if exists { 333 | href = ParseLink(href, link) 334 | } 335 | if len(href) > 250 { 336 | href = string(href[:250]) 337 | } 338 | //斜杠/结尾的抛弃 339 | //if strings.HasSuffix(href, "/") == false { 340 | articles = append(articles, Article{ 341 | Title: title, 342 | OriginUrl: href, 343 | }) 344 | //} 345 | } 346 | 347 | return articles, nil 348 | } 349 | 350 | func ParseLink(link string, baseUrl string) string { 351 | if !strings.HasSuffix(baseUrl, "/") { 352 | baseUrl += "/" 353 | } 354 | if strings.Contains(link, "javascript") || strings.Contains(link, "void") || link == "#" || link == "./" || link == "../" || link == "../../" { 355 | return "" 356 | } 357 | 358 | link = replaceDot(link, baseUrl) 359 | 360 | return link 361 | } 362 | 363 | func replaceDot(currUrl string, baseUrl string) string { 364 | if strings.HasPrefix(currUrl, "//") { 365 | currUrl = fmt.Sprintf("https:%s", currUrl) 366 | } 367 | urlInfo, err := url.Parse(currUrl) 368 | if err != nil { 369 | return "" 370 | } 371 | if urlInfo.Scheme != "" { 372 | return currUrl 373 | } 374 | baseInfo, err := url.Parse(baseUrl) 375 | if err != nil { 376 | return "" 377 | } 378 | 379 | u := baseInfo.Scheme + "://" + baseInfo.Host 380 | var path string 381 | if strings.Index(urlInfo.Path, "/") == 0 { 382 | path = urlInfo.Path 383 | } else { 384 | path = filepath.Dir(baseInfo.Path) + "/" + urlInfo.Path 385 | } 386 | 387 | rst := make([]string, 0) 388 | pathArr := strings.Split(path, "/") 389 | 390 | // 如果path是已/开头,那在rst加入一个空元素 391 | if pathArr[0] == "" { 392 | rst = append(rst, "") 393 | } 394 | for _, p := range pathArr { 395 | if p == ".." { 396 | if len(rst) > 0 { 397 | if rst[len(rst)-1] == ".." { 398 | rst = append(rst, "..") 399 | } else { 400 | rst = rst[:len(rst)-1] 401 | } 402 | } 403 | } else if p != "" && p != "." { 404 | rst = append(rst, p) 405 | } 406 | } 407 | return u + strings.Join(rst, "/") 408 | } 409 | 410 | func CollectDetail(article *Article) error { 411 | requestData, err := convert.Request(article.OriginUrl) 412 | if err != nil { 413 | log.Println(err) 414 | return err 415 | } 416 | //先删除一些不必要的标签 417 | re, _ := regexp.Compile("\\") 418 | requestData.Body = re.ReplaceAllString(requestData.Body, "") 419 | re, _ = regexp.Compile("\\") 420 | requestData.Body = re.ReplaceAllString(requestData.Body, "") 421 | 422 | htmlR := strings.NewReader(requestData.Body) 423 | doc, err := goquery.NewDocumentFromReader(htmlR) 424 | if err != nil { 425 | return err 426 | } 427 | 428 | //获取前缀 429 | article.GetDomain() 430 | 431 | //如果是百度百科地址,单独处理 432 | if strings.Contains(article.OriginUrl, "baike.baidu.com") { 433 | article.ParseBaikeDetail(doc, requestData.Body) 434 | } else { 435 | article.ParseNormalDetail(doc, requestData.Body) 436 | } 437 | nameRune := []rune(article.Description) 438 | curLen := len(nameRune) 439 | if curLen > 150 { 440 | article.Description = string(nameRune[:150]) 441 | } 442 | 443 | return nil 444 | } 445 | 446 | func (article *Article) ParseBaikeDetail(doc *goquery.Document, body string) { 447 | //获取标题 448 | article.Title = doc.Find("h1").Text() 449 | //获取描述 450 | reg := regexp.MustCompile(``) 451 | match := reg.FindStringSubmatch(body) 452 | if len(match) > 1 { 453 | article.Description = match[1] 454 | } 455 | //获取关键词 456 | reg = regexp.MustCompile(``) 457 | match = reg.FindStringSubmatch(body) 458 | if len(match) > 1 { 459 | article.Keywords = match[1] 460 | } else if article.Title != "" { 461 | keywords := GetKeywords(article.Title, 5) 462 | article.Keywords = strings.Join(keywords, ",") 463 | } 464 | 465 | doc.Find(".edit-icon").Remove() 466 | contentList := doc.Find(".para-title,.para") 467 | content := "" 468 | for i := range contentList.Nodes { 469 | content += "

" + contentList.Eq(i).Text() + "

" 470 | } 471 | 472 | article.Content = content 473 | } 474 | 475 | func (article *Article) ParseNormalDetail(doc *goquery.Document, body string) { 476 | article.ParseTitle(doc, body) 477 | 478 | if article.Title != "" { 479 | keywords := GetKeywords(article.Title, 5) 480 | article.Keywords = strings.Join(keywords, ",") 481 | } 482 | 483 | //尝试获取正文内容 484 | article.ParseContent(doc, body) 485 | 486 | //尝试获取作者 487 | reg := regexp.MustCompile(`]*>`) 488 | match := reg.FindStringSubmatch(body) 489 | if len(match) > 1 { 490 | author := match[1] 491 | if author == "" { 492 | reg := regexp.MustCompile(`(?i)(来源|作者)\s*(:|:|\s)\s*([^\s]+)`) 493 | match := reg.FindStringSubmatch(body) 494 | if len(match) > 1 { 495 | author = match[3] 496 | } 497 | } 498 | article.Author = author 499 | } 500 | 501 | //尝试获取法布时间 502 | reg = regexp.MustCompile(`(?i)]*>`) 503 | match = reg.FindStringSubmatch(body) 504 | if len(match) > 1 { 505 | pubDate := match[1] 506 | if pubDate == "" { 507 | reg = regexp.MustCompile(`(?i)([0-9]{4})\s*[\-|\/|年]\s*([0-9]{1,2})\s*[\-|\/|月]\s*([0-9]{1,2})\s*([\-|\/|日])?\s*(([0-9]{1,2})\s*[:|:|时]\s*([0-9]{1,2})\s*([:|:|分])?\s*([0-9]{1,2})?)?`) 508 | match = reg.FindStringSubmatch(body) 509 | if len(match) > 1 { 510 | if match[1] != "" { 511 | pubDate = match[1] + "-" + match[2] + "-" + match[3] 512 | } 513 | if match[5] != "" { 514 | pubDate += " " + match[6] + ":" + match[7] 515 | if match[9] != "" { 516 | pubDate += ":" + match[9] 517 | } else { 518 | pubDate += ":00" 519 | } 520 | } else { 521 | pubDate += " 12:00:00" 522 | } 523 | } 524 | } 525 | article.PubDate = pubDate 526 | } 527 | } 528 | 529 | func (article *Article) ParseTitle(doc *goquery.Document, body string) { 530 | //尝试获取标题 531 | //先尝试获取h1标签 532 | title := "" 533 | h1s := doc.Find("h1") 534 | if h1s.Length() > 0 { 535 | for i := range h1s.Nodes { 536 | item := h1s.Eq(i) 537 | item.Children().Remove() 538 | text := strings.TrimSpace(item.Text()) 539 | textLen := utf8.RuneCountInString(text) 540 | if textLen >= config.CollectorConfig.TitleMinLength && textLen > utf8.RuneCountInString(title) && !HasContain(text, config.CollectorConfig.TitleExclude) && !HasPrefix(text, config.CollectorConfig.TitleExcludePrefix) && !HasSuffix(text, config.CollectorConfig.TitleExcludeSuffix) { 541 | title = text 542 | } 543 | } 544 | } 545 | if title == "" { 546 | //获取 政府网站的 547 | text, exist := doc.Find("meta[name=ArticleTitle]").Attr("content") 548 | if exist { 549 | text = strings.TrimSpace(text) 550 | if utf8.RuneCountInString(text) >= config.CollectorConfig.TitleMinLength && !HasContain(text, config.CollectorConfig.TitleExclude) && !HasPrefix(text, config.CollectorConfig.TitleExcludePrefix) && !HasSuffix(text, config.CollectorConfig.TitleExcludeSuffix) { 551 | title = text 552 | } 553 | } 554 | } 555 | if title == "" { 556 | //获取title标签 557 | text := doc.Find("title").Text() 558 | text = strings.ReplaceAll(text, "_", "-") 559 | sepIndex := strings.Index(text, "-") 560 | if sepIndex > 0 { 561 | text = text[:sepIndex] 562 | } 563 | text = strings.TrimSpace(text) 564 | if utf8.RuneCountInString(text) >= config.CollectorConfig.TitleMinLength && !HasContain(text, config.CollectorConfig.TitleExclude) && !HasPrefix(text, config.CollectorConfig.TitleExcludePrefix) && !HasSuffix(text, config.CollectorConfig.TitleExcludeSuffix) { 565 | title = text 566 | } 567 | } 568 | 569 | log.Println(len(title), title) 570 | if title == "" { 571 | //获取title标签 572 | //title = doc.Find("#title,.title,.bt,.articleTit").First().Text() 573 | h2s := doc.Find("#title,.title,.bt,.articleTit,.right-xl>p,.biaoti") 574 | if h2s.Length() > 0 { 575 | for i := range h2s.Nodes { 576 | item := h2s.Eq(i) 577 | item.Children().Remove() 578 | text := strings.TrimSpace(item.Text()) 579 | textLen := utf8.RuneCountInString(item.Text()) 580 | if textLen >= config.CollectorConfig.TitleMinLength && textLen > utf8.RuneCountInString(title) && !HasContain(text, config.CollectorConfig.TitleExclude) && !HasPrefix(text, config.CollectorConfig.TitleExcludePrefix) && !HasSuffix(text, config.CollectorConfig.TitleExcludeSuffix) { 581 | title = text 582 | } 583 | } 584 | } 585 | } 586 | if title == "" { 587 | //如果标题为空,那么尝试h2 588 | h2s := doc.Find("h2,.name") 589 | if h2s.Length() > 0 { 590 | for i := range h2s.Nodes { 591 | item := h2s.Eq(i) 592 | item.Children().Remove() 593 | text := strings.TrimSpace(item.Text()) 594 | textLen := utf8.RuneCountInString(text) 595 | if textLen >= config.CollectorConfig.TitleMinLength && textLen > utf8.RuneCountInString(title) && !HasContain(text, config.CollectorConfig.TitleExclude) && !HasPrefix(text, config.CollectorConfig.TitleExcludePrefix) && !HasSuffix(text, config.CollectorConfig.TitleExcludeSuffix) { 596 | title = text 597 | } 598 | } 599 | } 600 | } 601 | 602 | title = strings.Replace(strings.Replace(strings.TrimSpace(title), "\t", "", -1), "\n", " ", -1) 603 | title = strings.Replace(title, "
", "", -1) 604 | title = strings.Replace(title, "
", "", -1) 605 | //只要第一个 606 | if utf8.RuneCountInString(title) > 50 { 607 | //减少误伤 608 | title = strings.ReplaceAll(title, "、", "-") 609 | } 610 | title = strings.ReplaceAll(title, "_", "-") 611 | sepIndex := strings.Index(title, "-") 612 | if sepIndex > 0 { 613 | title = title[:sepIndex] 614 | } 615 | 616 | article.Title = title 617 | } 618 | 619 | func (article *Article) ParseContent(doc *goquery.Document, body string) { 620 | content := "" 621 | contentText := "" 622 | description := "" 623 | contentLength := 0 624 | 625 | //对一些固定的内容,直接获取值 626 | contentItems := doc.Find("UCAPCONTENT,#mainText,.article-content,#article-content,#articleContnet,.entry-content,.the_body,.rich_media_content,#js_content,.word_content,.pages_content,.wendang_content,#content,.RichText,.markdown-section") 627 | if contentItems.Length() > 0 { 628 | for i := range contentItems.Nodes { 629 | contentItem := contentItems.Eq(i) 630 | content, _ = contentItem.Html() 631 | contentText = contentItem.Text() 632 | contentText = strings.Replace(contentText, " ", "", -1) 633 | contentText = strings.Replace(contentText, "\n", "", -1) 634 | contentText = strings.Replace(contentText, "\r", "", -1) 635 | contentText = strings.Replace(contentText, "\t", "", -1) 636 | nameRune := []rune(contentText) 637 | curLen := len(nameRune) 638 | if curLen > 150 { 639 | description = string(nameRune[:150]) 640 | } 641 | //判断内容的真实性 642 | if curLen < config.CollectorConfig.ContentMinLength { 643 | contentText = "" 644 | } 645 | aCount := 0 646 | aLinks := contentItem.Find("a") 647 | if aLinks.Length() > 0 { 648 | for i := range aLinks.Nodes { 649 | href, exist := aLinks.Eq(i).Attr("href") 650 | aText := strings.TrimSpace(aLinks.Eq(i).Text()) 651 | if exist && href != "" && !strings.HasPrefix(href, "#") && aText != "" { 652 | aCount++ 653 | } 654 | } 655 | } 656 | if aCount > 5 { 657 | //太多连接了,直接放弃该内容 658 | contentText = "" 659 | } 660 | //查找内部div,如果存在,则使用它替代上一级 661 | divs := contentItem.Find("div") 662 | //只有内部没有div了或者内部div内容太少,才认为是真正的内容 663 | if divs.Length() > 0 { 664 | for i := range divs.Nodes { 665 | div := divs.Eq(i) 666 | if (div.Find("div").Length() == 0 || utf8.RuneCountInString(div.Find("div").Text()) < 100) && div.ChildrenFiltered("p").Length() > 0 && utf8.RuneCountInString(div.Text()) >= config.CollectorConfig.ContentMinLength { 667 | contentItem = div 668 | break 669 | } 670 | } 671 | } 672 | //排除一些不对的标签 673 | otherItems := contentItem.Find("input,textarea,form,button,footer,.footer") 674 | if otherItems.Length() > 0 { 675 | otherItems.Remove() 676 | } 677 | contentItem.Find("h1").Remove() 678 | //根据规则过滤 679 | if HasContain(contentText, config.CollectorConfig.ContentExclude) { 680 | contentText = "" 681 | } 682 | 683 | inner := contentItem.Find("*") 684 | for i := range inner.Nodes { 685 | item := inner.Eq(i) 686 | if HasContain(item.Text(), config.CollectorConfig.ContentExcludeLine) { 687 | item.Remove() 688 | } 689 | } 690 | 691 | if len(contentText) > 0 { 692 | break 693 | } 694 | } 695 | } 696 | 697 | if contentText == "" { 698 | content = "" 699 | //通用的获取方法 700 | divs := doc.Find("div,article") 701 | for i := range divs.Nodes { 702 | item := divs.Eq(i) 703 | pCount := item.ChildrenFiltered("p").Length() 704 | brCount := item.ChildrenFiltered("br").Length() 705 | aCount := 0 706 | aLinks := item.Find("a").Find("a") 707 | if aLinks.Length() > 0 { 708 | for i := range aLinks.Nodes { 709 | href, exist := aLinks.Eq(i).Attr("href") 710 | aText := strings.TrimSpace(aLinks.Eq(i).Text()) 711 | if exist && href != "" && !strings.HasPrefix(href, "#") && aText != "" { 712 | aCount++ 713 | } 714 | } 715 | } 716 | if aCount > 5 { 717 | //太多连接了,直接放弃该内容 718 | continue 719 | } 720 | //排除一些不对的标签 721 | otherLength := item.Find("input,textarea,form,button,footer,.footer").Length() 722 | if otherLength > 0 { 723 | continue 724 | } 725 | if item.Find("div").Length() > 0 && utf8.RuneCountInString(item.Find("div").Text()) >= config.CollectorConfig.ContentMinLength { 726 | continue 727 | } 728 | if pCount > 0 || brCount > 0 { 729 | //表示查找到了一个p 730 | //移除空格和换行 731 | checkText := item.Text() 732 | checkText = strings.Replace(checkText, " ", "", -1) 733 | checkText = strings.Replace(checkText, "\n", "", -1) 734 | checkText = strings.Replace(checkText, "\r", "", -1) 735 | checkText = strings.Replace(checkText, "\t", "", -1) 736 | nameRune := []rune(checkText) 737 | curLen := len(nameRune) 738 | 739 | //根据规则过滤 740 | if HasContain(checkText, config.CollectorConfig.ContentExclude) { 741 | continue 742 | } 743 | if curLen <= config.CollectorConfig.ContentMinLength { 744 | continue 745 | } 746 | 747 | item.Find("h1,a").Remove() 748 | inner := item.Find("*") 749 | for i := range inner.Nodes { 750 | innerItem := inner.Eq(i) 751 | if HasContain(innerItem.Text(), config.CollectorConfig.ContentExcludeLine) { 752 | innerItem.Remove() 753 | } 754 | } 755 | 756 | if curLen > contentLength { 757 | contentLength = curLen 758 | content, _ = item.Html() 759 | contentText = checkText 760 | if curLen <= 150 { 761 | description = string(nameRune) 762 | } else { 763 | description = string(nameRune[:150]) 764 | } 765 | } 766 | } 767 | } 768 | } 769 | //对内容进行处理 770 | re, _ := regexp.Compile("src=[\"']+?(.*?)[\"']+?[^>]+?>") 771 | content = re.ReplaceAllStringFunc(content, article.ReplaceSrc) 772 | 773 | re2, _ := regexp.Compile("href=[\"']+?(.*?)[\"']+?[^>]+?>") 774 | content = re2.ReplaceAllStringFunc(content, article.ReplaceHref) 775 | 776 | article.ContentText = contentText 777 | article.Description = strings.TrimSpace(description) 778 | article.Content = strings.TrimSpace(content) 779 | } 780 | 781 | func (article *Article) GetDomain() { 782 | baseUrlArr := strings.Split(article.OriginUrl, "/") 783 | pathUrlArr := baseUrlArr[:len(baseUrlArr)-1] 784 | baseUrlArr = baseUrlArr[:3] 785 | baseUrl := strings.Join(baseUrlArr, "/") 786 | article.OriginDomain = baseUrl 787 | article.OriginPath = strings.Join(pathUrlArr, "/") 788 | } 789 | 790 | func (article *Article) ReplaceSrc(src string) string { 791 | re, _ := regexp.Compile("src=[\"']+?(.*?)[\"']+?[^>]+?>") 792 | match := re.FindStringSubmatch(src) 793 | if len(match) < 1 { 794 | return src 795 | } 796 | 797 | if match[1] != "" { 798 | newSrc := ParseLink(match[1], article.OriginPath) 799 | src = strings.Replace(src, match[1], newSrc, -1) 800 | } 801 | return src 802 | } 803 | 804 | func (article *Article) ReplaceHref(src string) string { 805 | re, _ := regexp.Compile("href=[\"']+?(.*?)[\"']+?[^>]+?>") 806 | match := re.FindStringSubmatch(src) 807 | if len(match) < 1 { 808 | return src 809 | } 810 | 811 | if match[1] != "" { 812 | newSrc := ParseLink(match[1], article.OriginPath) 813 | src = strings.Replace(src, match[1], newSrc, -1) 814 | } 815 | return src 816 | } 817 | 818 | func InArray(need string, needArray []string) bool { 819 | for _, v := range needArray { 820 | if need == v { 821 | return true 822 | } 823 | } 824 | 825 | return false 826 | } 827 | 828 | func HasPrefix(need string, needArray []string) bool { 829 | for _, v := range needArray { 830 | if strings.HasPrefix(need, v) { 831 | return true 832 | } 833 | } 834 | 835 | return false 836 | } 837 | 838 | func HasSuffix(need string, needArray []string) bool { 839 | for _, v := range needArray { 840 | if strings.HasSuffix(need, v) { 841 | return true 842 | } 843 | } 844 | 845 | return false 846 | } 847 | 848 | func HasContain(need string, needArray []string) bool { 849 | for _, v := range needArray { 850 | if strings.Contains(need, v) { 851 | return true 852 | } 853 | } 854 | 855 | return false 856 | } 857 | 858 | func GetKeywords(content string, num int) []string { 859 | var words []string 860 | length := 2 861 | keywords := keyword.Extractor.Extract(content, 1000) 862 | for _, v := range keywords { 863 | if utf8.RuneCountInString(v) >= length { 864 | words = append(words, v) 865 | } 866 | } 867 | 868 | if len(words) > num { 869 | return words[:num] 870 | } 871 | return words 872 | } 873 | 874 | func init() { 875 | keyword.Extractor.Init(keyword.DefaultProps, true, config.ExecPath+"dictionary.txt") 876 | } 877 | -------------------------------------------------------------------------------- /doc.md: -------------------------------------------------------------------------------- 1 | # 不用写采集规则也可以轻松采集网站文章,揭秘一款万能文章采集软件的工作原理 2 | 3 | 一直以来,大家都在用各种各样的采集器或网站自带的采集功能,如织梦采集侠、火车头采集器、八爪鱼采集器等,这些采集软件都有一个共同的特点,就是要编写采集规则才能采集到文章,这个技术性的问题,对于新手来说,经常都是丈二和尚摸不着头脑,可真不是意见容易的事。即使是对于老站长,需要采集多个网站数据的时候,需要分别给不同的网站写不同的采集规则,也是一个费力费时费事的工作。很多做站群的朋友都深有体会,需要给每个站写采集规则,那简直是苦不堪言。有人说,站长是一个网络搬运工,这话很在理,互联网上的文章,都是你搬我的,我搬你的,互相搬来搬去。那么有没有一个采集软件既免费,又开源的采集软件呢?万能文章采集器就像为你量身定做的采集软件一样,本采集器内置了常用的采集规则,只要添加文章列表连接,就能将内容采集回来。 4 | 5 | ### 万能文章采集器有什么优势 6 | * 本采集器内置了常用的采集规则,只要添加文章列表连接,就能将内容采集回来。 7 | * 本采集器采用多线程并行采集,可在同一时间采集更多的内容。 8 | * 本采集器只专注于采集文章这一件事,不用来定制采集其他内容,只专心做一件事。 9 | 10 | ### 万能文章采集器能采集哪些内容 11 | 本采集器可以采集到的的内容有:文章标题、文章关键词、文章描述、文章详情内容、文章作者、文章发布时间、文章浏览量。 12 | 13 | ### 万能文章采集器可用在哪里运行 14 | 本采集器可用运行在 Windows系统、Mac 系统、Linux系统(Centos、Ubuntu等),可用下载编译好的程序直接执行,也可以下载源码自己编译。 15 | 16 | ### 万能文章采集软件使用教程 17 | * 下载万能文章采集软件 18 | 我们可以从GitHub上下载这个免费的采集软件[https://github.com/fesiong/collector/releases](https://github.com/fesiong/collector/releases),根据自己的系统,下载适合的软件,比如使用的是Windows系统,则下载collector.windows.zip。使用的linux的用户或需要安装到服务器的用户,可以下载linux版本collector.linux.zip。 19 | 20 | * 安装万能文章采集器 21 | 采集器安装非常简单,只需将下载下来的安装包解压,到任意文件夹都可以,然后双击运行collector.exe,默认情况下它会自动打开 http://127.0.0.1:8088 ,如果它不自动打开,则你可以直接在浏览器中输入 http://127.0.0.1:8088 打开,然后会提示填写数据库信息, 22 | ![安装](public/install.png) 23 | 因为采集过程需要用到数据库,按正常的网站数据库填写就可以了,类似安装织梦cms等其他cms一样。安装好了就可以开始使用了。 24 | ![首页](public/index.png) 25 | 26 | * 如何使用万能文章采集器 27 | 已经安装完毕了,现在你只需要找到你需要采集的网站列表页,添加到数据源中,采集器就会自动开始采集了。数据源你甚至不需要仔细找列表,你也可以直接添加需要采集的网址的首页,都可以,它会自动识别文章数据,并抓取回来。添加完了数据源后,不用着急,采集软件设置了每10分钟采集一次文章列表,每分钟采集一次文章详情。添加完数据源后,只需要等待就可以了。 28 | ![采集源](public/source.png) 29 | ![文章列表](public/list.png) 30 | 31 | * 配置采集规则 32 | 这一款采集器不需要编写正则表达式,我们可以通过简单的配置来完成采集的规则配置: 33 | ![文章列表](public/setting.png) 34 | 35 | * 如何自动发布文章到网站 36 | 采集器自带了2种发布方式,一种是同数据库直接发布,这样的效率是最高的,你可以安装到你的网站同一个数据库中,可以提高发布效率,也可以采用远程发布方式来发布,填写发布地址就可以发布了。 37 | ![本地发布](public/publish.png) 38 | ![远程发布](public/remote.png) 39 | 40 | ### 结束语 41 | 上面就是万能文章采集器使用方法和工作原理了,按照上面的步骤你可以轻松采集到你想要的文章,它是24小时不间断工作的,你打开了采集器,它就能源源不断的给你采集文章并自动发布了。 -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module collector 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/Chain-Zhang/pinyin v0.1.3 7 | github.com/PuerkitoBio/goquery v1.5.1 8 | github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d // indirect 9 | github.com/ajg/form v1.5.1 // indirect 10 | github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 11 | github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072 // indirect 12 | github.com/fesiong/goproject/convert v0.0.0-20210407013830-243f22255055 13 | github.com/google/go-querystring v1.0.0 // indirect 14 | github.com/huichen/sego v0.0.0-20180617034105-3f3c8a8cfacc // indirect 15 | github.com/imkira/go-interpol v1.1.0 // indirect 16 | github.com/issue9/assert v1.4.0 // indirect 17 | github.com/jinzhu/gorm v1.9.14 18 | github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88 // indirect 19 | github.com/kataras/iris/v12 v12.1.8 20 | github.com/mattn/go-colorable v0.1.7 // indirect 21 | github.com/moul/http2curl v1.0.0 // indirect 22 | github.com/onsi/ginkgo v1.14.0 // indirect 23 | github.com/parnurzeal/gorequest v0.2.16 24 | github.com/polaris1119/keyword v0.0.0-20170608075927-96ae6735f2f2 25 | github.com/robfig/cron/v3 v3.0.1 26 | github.com/sergi/go-diff v1.1.0 // indirect 27 | github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect 28 | github.com/stretchr/testify v1.5.1 // indirect 29 | github.com/valyala/fasthttp v1.16.0 // indirect 30 | github.com/xeipuuv/gojsonschema v1.2.0 // indirect 31 | github.com/yalp/jsonpath v0.0.0-20180802001716-5cc68e5049a0 // indirect 32 | github.com/yudai/gojsondiff v1.0.0 // indirect 33 | github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82 // indirect 34 | github.com/yudai/pp v2.0.1+incompatible // indirect 35 | golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a // indirect 36 | golang.org/x/net v0.0.0-20200904194848-62affa334b73 37 | golang.org/x/sys v0.0.0-20200909081042-eff7692f9009 // indirect 38 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect 39 | ) 40 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8= 2 | github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= 3 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 4 | github.com/Chain-Zhang/pinyin v0.1.3 h1:RzErNyNwVa8z2sOLCuXSOtVdY/AsARb8mBzI2p2qtnE= 5 | github.com/Chain-Zhang/pinyin v0.1.3/go.mod h1:5iHpt9p4znrnaP59/hfPMnAojajkDxQaP9io+tRMPho= 6 | github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53 h1:sR+/8Yb4slttB4vD+b9btVEnWgL3Q00OBTzVT8B9C0c= 7 | github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53/go.mod h1:+3IMCy2vIlbG1XG/0ggNQv0SvxCAIpPM5b1nCz56Xno= 8 | github.com/CloudyKit/jet/v3 v3.0.0 h1:1PwO5w5VCtlUUl+KTOBsTGZlhjWkcybsGaAau52tOy8= 9 | github.com/CloudyKit/jet/v3 v3.0.0/go.mod h1:HKQPgSJmdK8hdoAbKUUWajkHyHo4RaU5rMdUywE7VMo= 10 | github.com/Joker/hpp v1.0.0 h1:65+iuJYdRXv/XyN62C1uEmmOx3432rNG/rKlX6V7Kkc= 11 | github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY= 12 | github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= 13 | github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= 14 | github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398 h1:WDC6ySpJzbxGWFh4aMxFFC28wwGp5pEuoTtvA4q/qQ4= 15 | github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0= 16 | github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d h1:ir/IFJU5xbja5UaBEQLjcvn7aAU01nqU/NUyOBEU+ew= 17 | github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d/go.mod h1:PRWNwWq0yifz6XDPZu48aSld8BWwBfr2JKB2bGWiEd4= 18 | github.com/ajg/form v1.5.1 h1:t9c7v8JUKu/XxOGBU0yjNpaMloxGEJhUkqFRq0ibGeU= 19 | github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY= 20 | github.com/andybalholm/brotli v1.0.0 h1:7UCwP93aiSfvWpapti8g88vVVGp2qqtGyePsSuDafo4= 21 | github.com/andybalholm/brotli v1.0.0/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= 22 | github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= 23 | github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= 24 | github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= 25 | github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 h1:OYA+5W64v3OgClL+IrOD63t4i/RW7RqrAVl9LTZ9UqQ= 26 | github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394/go.mod h1:Q8n74mJTIgjX4RBBcHnJ05h//6/k6foqmgE45jTQtxg= 27 | github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible h1:Ppm0npCCsmuR9oQaBtRuZcmILVE74aXE+AmrJj8L2ns= 28 | github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g= 29 | github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= 30 | github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= 31 | github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= 32 | github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= 33 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 34 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 35 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 36 | github.com/denisenkom/go-mssqldb v0.0.0-20191124224453-732737034ffd h1:83Wprp6ROGeiHFAP8WJdI2RoxALQYgdllERc3N5N2DM= 37 | github.com/denisenkom/go-mssqldb v0.0.0-20191124224453-732737034ffd/go.mod h1:xbL0rPBG9cCiLr28tMa8zpbdarY27NDyej4t/EjAShU= 38 | github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= 39 | github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= 40 | github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= 41 | github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 h1:clC1lXBpe2kTj2VHdaIu9ajZQe4kcEY9j0NsnDDBZ3o= 42 | github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= 43 | github.com/elazarl/goproxy v0.0.0-20200809112317-0581fc3aee2d h1:rtM8HsT3NG37YPjz8sYSbUSdElP9lUsQENYzJDZDUBE= 44 | github.com/elazarl/goproxy v0.0.0-20200809112317-0581fc3aee2d/go.mod h1:Ro8st/ElPeALwNFlcTpWmkr6IoMFfkjXAvTHpevnDsM= 45 | github.com/elazarl/goproxy/ext v0.0.0-20190711103511-473e67f1d7d2 h1:dWB6v3RcOy03t/bUadywsbyrQwCqZeNIEX6M1OtSZOM= 46 | github.com/elazarl/goproxy/ext v0.0.0-20190711103511-473e67f1d7d2/go.mod h1:gNh8nYJoAm43RfaxurUnxr+N1PwuFV3ZMl/efxlIlY8= 47 | github.com/erikstmartin/go-testdb v0.0.0-20160219214506-8d10e4a1bae5 h1:Yzb9+7DPaBjB8zlTR87/ElzFsnQfuHnVUVqpZZIcV5Y= 48 | github.com/erikstmartin/go-testdb v0.0.0-20160219214506-8d10e4a1bae5/go.mod h1:a2zkGnVExMxdzMo3M0Hi/3sEU+cWnZpSni0O6/Yb/P0= 49 | github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= 50 | github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072 h1:DddqAaWDpywytcG8w/qoQ5sAN8X12d3Z3koB0C3Rxsc= 51 | github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8= 52 | github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo= 53 | github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= 54 | github.com/fesiong/goproject/convert v0.0.0-20210407013830-243f22255055 h1:ckgkUTB2yyJHv4OtOK51pLsM/MZcYkvzUHK2ZJIWtT0= 55 | github.com/fesiong/goproject/convert v0.0.0-20210407013830-243f22255055/go.mod h1:tNP9Ov3vN1LVJAOR9NDvsVSC9MpmcymdUx4V7530qjA= 56 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 57 | github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= 58 | github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= 59 | github.com/gavv/httpexpect v2.0.0+incompatible h1:1X9kcRshkSKEjNJJxX9Y9mQ5BRfbxU5kORdjhlA1yX8= 60 | github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc= 61 | github.com/go-check/check v0.0.0-20180628173108-788fd7840127 h1:0gkP6mzaMqkmpcJYCFOLkIBwI7xFExG03bbkOkCvUPI= 62 | github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= 63 | github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gGcHOs= 64 | github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= 65 | github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= 66 | github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= 67 | github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= 68 | github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe h1:lXe2qZdvpiX5WZkZR4hgp4KJVfY3nMkvmwbVkpv1rVY= 69 | github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= 70 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 71 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 72 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= 73 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= 74 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= 75 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= 76 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= 77 | github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0= 78 | github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= 79 | github.com/gomodule/redigo v1.7.1-0.20190724094224-574c33c3df38/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= 80 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 81 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 82 | github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= 83 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 84 | github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= 85 | github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= 86 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 87 | github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= 88 | github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= 89 | github.com/gorilla/websocket v1.4.1 h1:q7AeDBpnBk8AogcD4DSag/Ukw/KV+YhzLj2bP5HvKCM= 90 | github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= 91 | github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= 92 | github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= 93 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= 94 | github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= 95 | github.com/huichen/sego v0.0.0-20180617034105-3f3c8a8cfacc h1:3LXYtoxQGFSjIL5ZJAn4PceSpwRohuTKYL1W4kJ7G8g= 96 | github.com/huichen/sego v0.0.0-20180617034105-3f3c8a8cfacc/go.mod h1:+/Bm7uk1bnJJMi9l6P88FgHeGtscOQiYbxW1j+BmgBY= 97 | github.com/imkira/go-interpol v1.1.0 h1:KIiKr0VSG2CUW1hl1jpiyuzuJeKUUpC8iM1AIE7N1Vk= 98 | github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA= 99 | github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= 100 | github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= 101 | github.com/iris-contrib/blackfriday v2.0.0+incompatible h1:o5sHQHHm0ToHUlAJSTjW9UWicjJSDDauOOQ2AHuIVp4= 102 | github.com/iris-contrib/blackfriday v2.0.0+incompatible/go.mod h1:UzZ2bDEoaSGPbkg6SAB4att1aAwTmVIx/5gCVqeyUdI= 103 | github.com/iris-contrib/go.uuid v2.0.0+incompatible/go.mod h1:iz2lgM/1UnEf1kP0L/+fafWORmlnuysV2EMP8MW+qe0= 104 | github.com/iris-contrib/jade v1.1.3 h1:p7J/50I0cjo0wq/VWVCDFd8taPJbuFC+bq23SniRFX0= 105 | github.com/iris-contrib/jade v1.1.3/go.mod h1:H/geBymxJhShH5kecoiOCSssPX7QWYH7UaeZTSWddIk= 106 | github.com/iris-contrib/pongo2 v0.0.1 h1:zGP7pW51oi5eQZMIlGA3I+FHY9/HOQWDB+572yin0to= 107 | github.com/iris-contrib/pongo2 v0.0.1/go.mod h1:Ssh+00+3GAZqSQb30AvBRNxBx7rf0GqwkjqxNd0u65g= 108 | github.com/iris-contrib/schema v0.0.1 h1:10g/WnoRR+U+XXHWKBHeNy/+tZmM2kcAVGLOsz+yaDA= 109 | github.com/iris-contrib/schema v0.0.1/go.mod h1:urYA3uvUNG1TIIjOSCzHr9/LmbQo8LrOcOqfqxa4hXw= 110 | github.com/issue9/assert v1.4.0 h1:/hLHZOr6koZn6fM8feB2fSVl8RZ4o629hSdCeARWNWg= 111 | github.com/issue9/assert v1.4.0/go.mod h1:Yktk83hAVl1SPSYtd9kjhBizuiBIqUQyj+D5SE2yjVY= 112 | github.com/jinzhu/gorm v1.9.14 h1:Kg3ShyTPcM6nzVo148fRrcMO6MNKuqtOUwnzqMgVniM= 113 | github.com/jinzhu/gorm v1.9.14/go.mod h1:G3LB3wezTOWM2ITLzPxEXgSkOXAntiLHS7UdBefADcs= 114 | github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= 115 | github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= 116 | github.com/jinzhu/now v1.0.1 h1:HjfetcXq097iXP0uoPCdnM4Efp5/9MsM0/M+XOTeR3M= 117 | github.com/jinzhu/now v1.0.1/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= 118 | github.com/json-iterator/go v1.1.9 h1:9yzud/Ht36ygwatGx56VwCZtlI/2AD15T1X2sjSuGns= 119 | github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= 120 | github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= 121 | github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= 122 | github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88 h1:uC1QfSlInpQF+M0ao65imhwqKnz3Q2z/d8PWZRMQvDM= 123 | github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k= 124 | github.com/kataras/golog v0.0.10 h1:vRDRUmwacco/pmBAm8geLn8rHEdc+9Z4NAr5Sh7TG/4= 125 | github.com/kataras/golog v0.0.10/go.mod h1:yJ8YKCmyL+nWjERB90Qwn+bdyBZsaQwU3bTVFgkFIp8= 126 | github.com/kataras/iris/v12 v12.1.8 h1:O3gJasjm7ZxpxwTH8tApZsvf274scSGQAUpNe47c37U= 127 | github.com/kataras/iris/v12 v12.1.8/go.mod h1:LMYy4VlP67TQ3Zgriz8RE2h2kMZV2SgMYbq3UhfoFmE= 128 | github.com/kataras/neffos v0.0.14/go.mod h1:8lqADm8PnbeFfL7CLXh1WHw53dG27MC3pgi2R1rmoTE= 129 | github.com/kataras/pio v0.0.2 h1:6NAi+uPJ/Zuid6mrAKlgpbI11/zK/lV4B2rxWaJN98Y= 130 | github.com/kataras/pio v0.0.2/go.mod h1:hAoW0t9UmXi4R5Oyq5Z4irTbaTsOemSrDGUtaTl7Dro= 131 | github.com/kataras/sitemap v0.0.5 h1:4HCONX5RLgVy6G4RkYOV3vKNcma9p236LdGOipJsaFE= 132 | github.com/kataras/sitemap v0.0.5/go.mod h1:KY2eugMKiPwsJgx7+U103YZehfvNGOXURubcGyk0Bz8= 133 | github.com/klauspost/compress v1.9.7 h1:hYW1gP94JUmAhBtJ+LNz5My+gBobDxPR1iVuKug26aA= 134 | github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= 135 | github.com/klauspost/compress v1.10.7 h1:7rix8v8GpI3ZBb0nSozFRgbtXKv+hOe+qfEpZqybrAg= 136 | github.com/klauspost/compress v1.10.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= 137 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= 138 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 139 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 140 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 141 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 142 | github.com/lib/pq v1.1.1 h1:sJZmqHoEaY7f+NPP8pgLB/WxulyR3fewgCM2qaSlBb4= 143 | github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= 144 | github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= 145 | github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= 146 | github.com/mattn/go-colorable v0.1.7 h1:bQGKb3vps/j0E9GfJQ03JyhRuxsvdAanXlT9BTw3mdw= 147 | github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= 148 | github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY= 149 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= 150 | github.com/mattn/go-sqlite3 v1.14.0 h1:mLyGNKR8+Vv9CAU7PphKa2hkEqxxhn8i32J6FPj1/QA= 151 | github.com/mattn/go-sqlite3 v1.14.0/go.mod h1:JIl7NbARA7phWnGvh0LKTyg7S9BA+6gx71ShQilpsus= 152 | github.com/mattn/goveralls v0.0.2/go.mod h1:8d1ZMHsd7fW6IRPKQh46F2WRpyib5/X4FOpevwGNQEw= 153 | github.com/mediocregopher/radix/v3 v3.4.2/go.mod h1:8FL3F6UQRXHXIBSPUs5h0RybMF8i4n7wVopoX3x7Bv8= 154 | github.com/microcosm-cc/bluemonday v1.0.2 h1:5lPfLTTAvAbtS0VqT+94yOtFnGfUWYyx0+iToC3Os3s= 155 | github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= 156 | github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= 157 | github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE= 158 | github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= 159 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc= 160 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 161 | github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg= 162 | github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= 163 | github.com/moul/http2curl v1.0.0 h1:dRMWoAtb+ePxMlLkrCbAqh4TlPHXvoGUSQ323/9Zahs= 164 | github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= 165 | github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= 166 | github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= 167 | github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= 168 | github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= 169 | github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78= 170 | github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= 171 | github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= 172 | github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= 173 | github.com/onsi/ginkgo v1.14.0 h1:2mOpI4JVVPBN+WQRa0WKH2eXR+Ey+uK4n7Zj0aYpIQA= 174 | github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY= 175 | github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= 176 | github.com/onsi/gomega v1.10.1 h1:o0+MgICZLuZ7xjH7Vx6zS/zcu93/BEp1VwkIW1mEXCE= 177 | github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= 178 | github.com/parnurzeal/gorequest v0.2.16 h1:T/5x+/4BT+nj+3eSknXmCTnEVGSzFzPGdpqmUVVZXHQ= 179 | github.com/parnurzeal/gorequest v0.2.16/go.mod h1:3Kh2QUMJoqw3icWAecsyzkpY7UzRfDhbRdTjtNwNiUE= 180 | github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= 181 | github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= 182 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 183 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 184 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 185 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 186 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 187 | github.com/polaris1119/keyword v0.0.0-20170608075927-96ae6735f2f2 h1:63TyJE7nj3eUsJ/1jbzrnH8gxw1z3oV/su2RIyVHlRQ= 188 | github.com/polaris1119/keyword v0.0.0-20170608075927-96ae6735f2f2/go.mod h1:YYx1sYXgS43cG1iH0cXYFcibESl8azhUrA6W10gYb9E= 189 | github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= 190 | github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= 191 | github.com/rogpeppe/go-charset v0.0.0-20180617210344-2471d30d28b4/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc= 192 | github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= 193 | github.com/ryanuber/columnize v2.1.0+incompatible h1:j1Wcmh8OrK4Q7GXY+V7SVSY8nUWQxHW5TkBe7YUl+2s= 194 | github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= 195 | github.com/schollz/closestmatch v2.1.0+incompatible h1:Uel2GXEpJqOWBrlyI+oY9LTiyyjYS17cCYRqP13/SHk= 196 | github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtmuhtR2uUrrJOpYzYRvbcPAid+g= 197 | github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= 198 | github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= 199 | github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= 200 | github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= 201 | github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= 202 | github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= 203 | github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s= 204 | github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= 205 | github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI= 206 | github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= 207 | github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8= 208 | github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= 209 | github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s= 210 | github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= 211 | github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk= 212 | github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= 213 | github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= 214 | github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= 215 | github.com/spf13/viper v1.3.2 h1:VUFqw5KcqRf7i70GOzW7N+Q7+gxVBkSSqiXB12+JQ4M= 216 | github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= 217 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 218 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 219 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= 220 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 221 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= 222 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 223 | github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= 224 | github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= 225 | github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= 226 | github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= 227 | github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= 228 | github.com/valyala/fasthttp v1.16.0 h1:9zAqOYLl8Tuy3E5R6ckzGDJ1g8+pw15oQp2iL9Jl6gQ= 229 | github.com/valyala/fasthttp v1.16.0/go.mod h1:YOKImeEosDdBPnxc0gy7INqi3m1zK6A+xl6TwOBhHCA= 230 | github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= 231 | github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c= 232 | github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= 233 | github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= 234 | github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= 235 | github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74= 236 | github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= 237 | github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= 238 | github.com/yalp/jsonpath v0.0.0-20180802001716-5cc68e5049a0 h1:6fRhSjgLCkTD3JnJxvaJ4Sj+TYblw757bqYgZaOq5ZY= 239 | github.com/yalp/jsonpath v0.0.0-20180802001716-5cc68e5049a0/go.mod h1:/LWChgwKmvncFJFHJ7Gvn9wZArjbV5/FppcK2fKk/tI= 240 | github.com/yudai/gojsondiff v1.0.0 h1:27cbfqXLVEJ1o8I6v3y9lg8Ydm53EKqHXAOMxEGlCOA= 241 | github.com/yudai/gojsondiff v1.0.0/go.mod h1:AY32+k2cwILAkW1fbgxQ5mUmMiZFgLIV+FBNExI05xg= 242 | github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82 h1:BHyfKlQyqbsFN5p3IfnEUduWvb9is428/nNb5L3U01M= 243 | github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82/go.mod h1:lgjkn3NuSvDfVJdfcVVdX+jpBxNmX4rDAzaS45IcYoM= 244 | github.com/yudai/pp v2.0.1+incompatible h1:Q4//iY4pNF6yPLZIigmvcl7k/bPgrcTPIFIcmawg5bI= 245 | github.com/yudai/pp v2.0.1+incompatible/go.mod h1:PuxR/8QJ7cyCkFp/aUDS+JY727OFEZkTdatxwunjIkc= 246 | golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 247 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 248 | golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 249 | golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 250 | golang.org/x/crypto v0.0.0-20191205180655-e7c4368fe9dd/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 251 | golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 252 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI= 253 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 254 | golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a h1:vclmkQCjlDX5OydZ9wv8rBCcS0QyQY66Mpf/7BZbInM= 255 | golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 256 | golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 257 | golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 258 | golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 259 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 260 | golang.org/x/net v0.0.0-20190327091125-710a502c58a2/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 261 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 262 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 263 | golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 264 | golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 265 | golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= 266 | golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= 267 | golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= 268 | golang.org/x/net v0.0.0-20200904194848-62affa334b73 h1:MXfv8rhZWmFeqX3GNZRsd6vOLoaCHjYEX3qkRo3YBUA= 269 | golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= 270 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 271 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY= 272 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 273 | golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 274 | golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 275 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 276 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 277 | golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 278 | golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 279 | golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 280 | golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 281 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 282 | golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 283 | golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884= 284 | golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 285 | golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 286 | golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980 h1:OjiUf46hAmXblsZdnoSXsEUSKU8r1UEzcL5RVZ4gO9Y= 287 | golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 288 | golang.org/x/sys v0.0.0-20200909081042-eff7692f9009 h1:W0lCpv29Hv0UaM1LXb9QlBHLNP8UFfcKjblhVCWftOM= 289 | golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 290 | golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= 291 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 292 | golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= 293 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 294 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 295 | golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 296 | golang.org/x/tools v0.0.0-20190327201419-c70d86f8b7cf/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 297 | golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 298 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 299 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 300 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 301 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= 302 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 303 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= 304 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= 305 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= 306 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= 307 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= 308 | google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= 309 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 310 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 311 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 312 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= 313 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 314 | gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= 315 | gopkg.in/ini.v1 v1.51.1 h1:GyboHr4UqMiLUybYjd22ZjQIKEJEpgtLXtuGbR21Oho= 316 | gopkg.in/ini.v1 v1.51.1/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= 317 | gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= 318 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= 319 | gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= 320 | gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= 321 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 322 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 323 | gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU= 324 | gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 325 | gopkg.in/yaml.v3 v3.0.0-20191120175047-4206685974f2 h1:XZx7nhd5GMaZpmDaEHFVafUZC7ya0fuo7cSJ3UCKYmM= 326 | gopkg.in/yaml.v3 v3.0.0-20191120175047-4206685974f2/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 327 | moul.io/http2curl v1.0.0 h1:6XwpyZOYsgZJrU8exnG87ncVkU1FVCcTRpwzOkTDUi8= 328 | moul.io/http2curl v1.0.0/go.mod h1:f6cULg+e4Md/oW1cYmwW4IWQOVl2lGbmCNGOHvzX2kE= 329 | -------------------------------------------------------------------------------- /library/content.go: -------------------------------------------------------------------------------- 1 | package library 2 | 3 | import ( 4 | "github.com/axgle/mahonia" 5 | ) 6 | 7 | func ConvertToString(src string, srcCode string, tagCode string) string { 8 | srcCoder := mahonia.NewDecoder(srcCode) 9 | srcResult := srcCoder.ConvertString(src) 10 | tagCoder := mahonia.NewDecoder(tagCode) 11 | _, cdata, _ := tagCoder.Translate([]byte(srcResult), true) 12 | result := string(cdata) 13 | return result 14 | } 15 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "collector/app/bootstrap" 5 | "collector/config" 6 | ) 7 | 8 | func main() { 9 | b := bootstrap.New(config.ServerConfig.Port, config.ServerConfig.LogLevel) 10 | b.Serve() 11 | } -------------------------------------------------------------------------------- /public/echartsTheme.js: -------------------------------------------------------------------------------- 1 | /** layuiAdmin.pro-v1.2.1 LPPL License By http://www.layui.com/admin/ */ 2 | ;layui.define(function(e){e("echartsTheme",{color:["#009688","#1E9FFF","#5FB878","#FFB980","#D87A80","#8d98b3","#e5cf0d","#97b552","#95706d","#dc69aa","#07a2a4","#9a7fd1","#588dd5","#f5994e","#c05050","#59678c","#c9ab00","#7eb00a","#6f5553","#c14089"],title:{textStyle:{fontWeight:"normal",color:"#666"}},dataRange:{itemWidth:15,color:["#009688","#e0ffff"]},toolbox:{color:["#1e90ff","#1e90ff","#1e90ff","#1e90ff"],effectiveColor:"#ff4500"},tooltip:{backgroundColor:"rgba(50,50,50,0.5)",axisPointer:{type:"line",lineStyle:{color:"#009688"},crossStyle:{color:"#008acd"},shadowStyle:{color:"rgba(200,200,200,0.2)"}}},dataZoom:{dataBackgroundColor:"#efefff",fillerColor:"rgba(182,162,222,0.2)",handleColor:"#008acd"},grid:{borderColor:"#eee"},categoryAxis:{axisLine:{lineStyle:{color:"#009688"}},axisTick:{show:!1},splitLine:{lineStyle:{color:["#eee"]}}},valueAxis:{axisLine:{lineStyle:{color:"#009688"}},splitArea:{show:!0,areaStyle:{color:["rgba(250,250,250,0.1)","rgba(200,200,200,0.1)"]}},splitLine:{lineStyle:{color:["#eee"]}}},polar:{axisLine:{lineStyle:{color:"#ddd"}},splitArea:{show:!0,areaStyle:{color:["rgba(250,250,250,0.2)","rgba(200,200,200,0.2)"]}},splitLine:{lineStyle:{color:"#ddd"}}},timeline:{lineStyle:{color:"#009688"},controlStyle:{normal:{color:"#009688"},emphasis:{color:"#009688"}},symbol:"emptyCircle",symbolSize:3},bar:{itemStyle:{normal:{barBorderRadius:2},emphasis:{barBorderRadius:2}}},line:{smooth:!0,symbol:"emptyCircle",symbolSize:3},k:{itemStyle:{normal:{color:"#d87a80",color0:"#2ec7c9",lineStyle:{color:"#d87a80",color0:"#2ec7c9"}}}},scatter:{symbol:"circle",symbolSize:4},radar:{symbol:"emptyCircle",symbolSize:3},map:{itemStyle:{normal:{areaStyle:{color:"#ddd"},label:{textStyle:{color:"#d87a80"}}},emphasis:{areaStyle:{color:"#fe994e"}}}},force:{itemStyle:{normal:{linkStyle:{color:"#1e90ff"}}}},chord:{itemStyle:{normal:{borderWidth:1,borderColor:"rgba(128, 128, 128, 0.5)",chordStyle:{lineStyle:{color:"rgba(128, 128, 128, 0.5)"}}},emphasis:{borderWidth:1,borderColor:"rgba(128, 128, 128, 0.5)",chordStyle:{lineStyle:{color:"rgba(128, 128, 128, 0.5)"}}}}},gauge:{axisLine:{lineStyle:{color:[[.2,"#2ec7c9"],[.8,"#5ab1ef"],[1,"#d87a80"]],width:10}},axisTick:{splitNumber:10,length:15,lineStyle:{color:"auto"}},splitLine:{length:22,lineStyle:{color:"auto"}},pointer:{width:5}},textStyle:{fontFamily:"微软雅黑, Arial, Verdana, sans-serif"}})}); -------------------------------------------------------------------------------- /public/index.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fesiong/collector/3033851813ef43d186d3272460b3d5a43d134b15/public/index.png -------------------------------------------------------------------------------- /public/install.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fesiong/collector/3033851813ef43d186d3272460b3d5a43d134b15/public/install.png -------------------------------------------------------------------------------- /public/list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fesiong/collector/3033851813ef43d186d3272460b3d5a43d134b15/public/list.png -------------------------------------------------------------------------------- /public/publish.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fesiong/collector/3033851813ef43d186d3272460b3d5a43d134b15/public/publish.png -------------------------------------------------------------------------------- /public/remote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fesiong/collector/3033851813ef43d186d3272460b3d5a43d134b15/public/remote.png -------------------------------------------------------------------------------- /public/setting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fesiong/collector/3033851813ef43d186d3272460b3d5a43d134b15/public/setting.png -------------------------------------------------------------------------------- /public/source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fesiong/collector/3033851813ef43d186d3272460b3d5a43d134b15/public/source.png -------------------------------------------------------------------------------- /services/mysql.go: -------------------------------------------------------------------------------- 1 | package services 2 | 3 | import ( 4 | "collector/config" 5 | "fmt" 6 | "github.com/jinzhu/gorm" 7 | _ "github.com/jinzhu/gorm/dialects/mysql" 8 | "os" 9 | ) 10 | 11 | func InitDB() { 12 | if config.MySQLConfig.Url != "" { 13 | db, err := gorm.Open("mysql", config.MySQLConfig.Url) 14 | if err != nil { 15 | fmt.Println(config.MySQLConfig, err.Error()) 16 | os.Exit(-1) 17 | } 18 | 19 | if config.ServerConfig.Env == "development" { 20 | db.LogMode(true) 21 | } 22 | db.DB().SetMaxIdleConns(config.MySQLConfig.MaxIdleConnections) 23 | db.DB().SetMaxOpenConns(config.MySQLConfig.MaxOpenConnections) 24 | db.DB().SetConnMaxLifetime(-1) //不重新利用,可以执行得更快 25 | 26 | //禁用复数表名 27 | db.SingularTable(true) 28 | 29 | DB = db 30 | } 31 | } 32 | 33 | var DB *gorm.DB 34 | 35 | func init() { 36 | InitDB() 37 | } 38 | -------------------------------------------------------------------------------- /template/article/keywords.html: -------------------------------------------------------------------------------- 1 | {% include "partial/header.html" %} 2 |
3 |
4 | 关键词库功能尚未开发,预留卡位。 5 |
6 |
7 | {% include "partial/footer.html" %} -------------------------------------------------------------------------------- /template/article/list.html: -------------------------------------------------------------------------------- 1 | {% include "partial/header.html" %} 2 |
3 |
4 |

采集的文章

5 |
6 |
7 |
8 | {% include "partial/footer.html" %} 9 | 14 | 20 | -------------------------------------------------------------------------------- /template/article/source.html: -------------------------------------------------------------------------------- 1 | {% include "partial/header.html" %} 2 |
3 |
4 |

数据源

5 |
6 |
7 |
8 | {% include "partial/footer.html" %} 9 | 19 | 25 | 30 | -------------------------------------------------------------------------------- /template/errors/404.html: -------------------------------------------------------------------------------- 1 | {% include "partial/header.html" %} 2 | 72 |
73 |
74 |

404

75 |

你似乎来到了没有知识存在的荒原

76 | 77 | 去往首页 78 | 79 |
80 |
81 | {% include "partial/footer.html" %} -------------------------------------------------------------------------------- /template/index.html: -------------------------------------------------------------------------------- 1 | {% include "partial/header.html" %} 2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
数据采集概况
10 |
11 | 12 | 19 | 20 |
21 |
22 |
23 |
24 |
25 |
可用数据源概况
26 |
27 | 28 | 35 | 36 |
37 |
38 |
39 |
40 | 41 |
42 |
43 |
版本信息
44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 56 | 57 | 58 | 59 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 74 | 75 | 76 |
当前版本 54 | collector-v0.1.1 55 |
开发者 60 | Fesion 61 |
主要特色不用写正则 / 自动定时采集 / 支持批量数据源 / 操作简单
GitHub 70 | 73 |
77 |
78 |
79 | 80 |
81 |
开发计划
82 |
83 |
√ 自动定时执行采集任务
84 |
√ 增加可视化添加采集列表连接、查看修改已采集内容操作界面
85 |
× 增加自动发布到远程服务器网站功能
86 |
× 增加关键词自动替换(伪原创的一部分)
87 |
× 增加内容自动分段重组功能(待定)
88 |
89 |
90 | 91 |
92 |
93 | 采集器介绍 94 |
95 |
96 |

这是一个由golang编写的采集器,可以自动识别文章列表和文章内容。使用它来采集文章并不需要编写正则表达式,你只需要提供文章列表页的连接即可。

97 |
98 |
99 |
100 | 101 |
102 |
103 |
104 |
105 | {% include "partial/footer.html" %} 106 | -------------------------------------------------------------------------------- /template/install/index.html: -------------------------------------------------------------------------------- 1 | {% include "partial/header.html" %} 2 |
3 |
4 |

初始化采集工具

5 |
6 |
7 | 8 |
9 | 10 |
11 |
12 |
13 | 14 |
15 | 16 |
17 |
18 |
19 | 20 |
21 | 22 |
23 |
24 |
25 | 26 |
27 | 28 |
29 |
30 |
31 | 32 |
33 | 34 |
35 |
36 |
37 |
38 | 39 | 40 |
41 |
42 |
43 |
44 |
45 | {% include "partial/footer.html" %} 46 | -------------------------------------------------------------------------------- /template/partial/footer.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |
4 |

©2020 Fesion. All Rights Reserved

5 |
6 |
7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /template/partial/header.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 网站文章万能采集器 9 | 10 | 172 | 173 | 174 |
175 |
176 |
177 | 197 |
198 |
199 |
-------------------------------------------------------------------------------- /template/setting/index.html: -------------------------------------------------------------------------------- 1 | {% include "partial/header.html" %} 2 |
3 |
4 |

采集配置

5 |
6 |
7 |
采集源设置
8 |
9 |
10 | 11 |
12 | 13 |
14 |
15 |
16 | 17 |
18 | 19 |
20 |
21 |
22 |
23 |
24 |
采集标题设置
25 |
26 |
27 | 28 |
29 | 30 |
31 |
32 |
33 | 34 |
35 | 36 |
37 |
38 |
39 |
40 |
41 | 42 |
43 | 44 |
45 |
46 |
47 |
48 |
49 | 50 |
51 | 52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
采集内容设置
60 |
61 |
62 | 63 |
64 | 65 |
66 |
67 |
68 | 69 |
70 | 71 |
72 |
73 |
74 |
75 |
76 | 77 |
78 | 79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 | 确认修改 88 |
89 |
90 |
91 |
92 |
93 | {% include "partial/footer.html" %} 94 | 101 | -------------------------------------------------------------------------------- /template/setting/publish.html: -------------------------------------------------------------------------------- 1 | {% include "partial/header.html" %} 2 |
3 |
4 |

自动发布配置

5 |
6 |
7 |
基础配置
8 |
9 |
10 | 11 |
12 | 13 | 14 | 15 |
16 |
17 |
18 |
19 |
20 |
本地同数据库发布
21 |
22 |
23 | 24 |
25 | 26 |
27 |
28 |
29 | 30 |
31 | 32 |
33 |
34 |
35 | 36 |
37 | 38 |
39 |
40 |
41 |
42 |
43 |
远程发布
44 |
45 |
46 | 47 |
48 | 49 |
50 |
51 |
52 | 53 |
54 | 55 | 56 | 57 |
58 |
59 |
60 | 61 |
62 |
63 |
键名
64 |
65 | 66 |
67 |
键值
68 |
69 | 70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 | 78 |
79 |
80 |
键名
81 |
82 | 83 |
84 |
键值
85 |
86 | 87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
字段设置
97 |
98 |
99 | 100 |
101 |
102 |
键名
103 |
104 | 105 |
106 |
键值
107 |
108 | 109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 | 117 |
118 | 119 |
120 |
121 |
122 | 123 |
124 | 125 |
126 |
127 |
128 | 129 |
130 | 131 |
132 |
133 |
134 | 135 |
136 | 137 |
138 |
139 |
140 | 141 |
142 | 143 |
144 |
145 |
146 | 147 |
148 | 149 |
150 |
151 |
152 |
153 |
154 |
155 | 确认修改 156 |
157 |
158 |
159 |
160 |
161 | {% include "partial/footer.html" %} 162 | 169 | --------------------------------------------------------------------------------