├── .DS_Store
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── dependencies
├── docker-compose.yml
├── init-mongo.js
├── nginx
│ ├── conf
│ │ └── nginx.conf
│ ├── docker-compose.yml
│ └── html
│ │ └── index.html
└── traefik.toml
├── deploy
├── buildScript
│ ├── linux_build.sh
│ ├── mac_build.sh
│ └── win_build.sh
├── deploy
│ ├── start-douban-direct.sh
│ └── start-meituan-direct.sh
├── dockerBuildScript
│ └── docker_build.sh
└── service
│ ├── cache
│ └── Dockerfile
│ ├── docker-compose.yml
│ ├── douban
│ ├── crawl_detail
│ │ └── Dockerfile
│ ├── crawl_list
│ │ └── Dockerfile
│ ├── crawl_tags
│ │ └── Dockerfile
│ ├── docker-compose.yml
│ └── storage_detail
│ │ └── Dockerfile
│ ├── elastic
│ └── Dockerfile
│ └── meituan
│ ├── crawl_detail
│ └── Dockerfile
│ ├── crawl_list
│ └── Dockerfile
│ ├── crawl_urllist
│ └── Dockerfile
│ ├── docker-compose.yml
│ └── storage_detail
│ └── Dockerfile
├── global
├── data.go
├── setting.go
└── tracer.go
├── go.mod
├── go.sum
├── img
├── consul.png
├── consul_config.png
├── consul_service.png
├── douban.png
├── elasticsearch.png
├── flow.png
├── framework.png
├── meituan.png
├── rabbitmq.png
└── swagger.png
├── initConf
└── init.go
├── internal
├── crawler
│ ├── crawerConfig
│ │ ├── articleMQConfig.go
│ │ ├── bookMQConfig.go
│ │ └── start.go
│ ├── crawlOperation.go
│ ├── cronJob
│ │ └── main.go
│ ├── douban
│ │ ├── parser
│ │ │ ├── bookDetail.go
│ │ │ ├── booklist.go
│ │ │ └── tagList.go
│ │ └── storage
│ │ │ └── bookDetail.go
│ ├── fetcher
│ │ └── fetcher.go
│ ├── meituan
│ │ ├── conf
│ │ │ └── mapping.go
│ │ ├── parser
│ │ │ ├── articleDetail.go
│ │ │ ├── articleList.go
│ │ │ └── articleUrlList.go
│ │ └── storage
│ │ │ └── articleDetail.go
│ ├── persistence
│ │ └── persistence.go
│ └── worker
│ │ ├── types.go
│ │ └── worker.go
├── crontab
│ ├── common
│ │ ├── constants.go
│ │ ├── job.go
│ │ ├── job_easyjson.go
│ │ └── log.go
│ ├── master
│ │ └── etcd.go
│ └── worker
│ │ ├── etcd.go
│ │ ├── executor.go
│ │ ├── jobLock.go
│ │ ├── logSink.go
│ │ ├── main
│ │ └── main.go
│ │ └── scheduler.go
├── dao
│ ├── article.go
│ ├── book.go
│ ├── dao.go
│ └── forbes.go
├── middleware
│ ├── access_log.go
│ ├── context_timeout.go
│ ├── recovery.go
│ ├── tracer.go
│ └── translations.go
├── model
│ ├── article.go
│ ├── article_easyjson.go
│ ├── book.go
│ ├── book_easyjson.go
│ ├── db.go
│ └── forbes.go
├── routers
│ ├── job
│ │ └── job.go
│ ├── router.go
│ └── sd
│ │ └── check.go
└── service
│ └── job.go
├── main.go
├── pkg
├── app
│ ├── app.go
│ ├── form.go
│ └── pagination.go
├── cache
│ ├── cache.go
│ ├── cacheOperation.go
│ └── cache_test.go
├── consistentHash
│ └── consistent.go
├── convert
│ └── convert.go
├── db
│ └── db.go
├── elastic
│ ├── elastic.go
│ └── elasticOpeartion.go
├── email
│ └── email.go
├── errcode
│ ├── common_code.go
│ ├── ercd_code.go
│ ├── errcode.go
│ └── user.go
├── etcd
│ └── etcd.go
├── file
│ └── file.go
├── idGenerator
│ ├── idGenerator.go
│ └── idGenerator_test.go
├── ipParser
│ ├── ipParser.go
│ └── qqwry.utf8.dat
├── logger
│ └── logger.go
├── mongoDB
│ └── mongo.go
├── mq
│ ├── consumer.go
│ ├── producer.go
│ ├── rabbitmq.go
│ └── rabbitmq_test.go
├── otgorm
│ └── otgorm.go
├── setting
│ ├── section.go
│ └── setting.go
├── tracer
│ └── tracer.go
├── upload
│ └── file.go
└── util
│ ├── aes.go
│ ├── base64.go
│ ├── base64_test.go
│ ├── json.go
│ ├── json_test.go
│ ├── jwt.go
│ ├── md5.go
│ ├── md5_test.go
│ ├── morse.go
│ ├── morse_test.go
│ ├── qrcode.go
│ ├── reb2hex_test.go
│ ├── regularExpression.go
│ ├── rgb2hex.go
│ ├── stringCode.go
│ └── structMapping.go
├── service
├── cache
│ ├── client
│ │ └── client.go
│ ├── main.go
│ ├── proto
│ │ ├── redis.pb.go
│ │ ├── redis.pb.micro.go
│ │ └── redis.proto
│ └── server
│ │ └── server.go
├── douban
│ ├── crawl_detail
│ │ └── main.go
│ ├── crawl_list
│ │ └── main.go
│ ├── crawl_tags
│ │ └── main.go
│ └── storage_detail
│ │ └── main.go
├── elastic
│ ├── client
│ │ └── client.go
│ ├── main.go
│ ├── proto
│ │ ├── elastic.pb.go
│ │ ├── elastic.pb.micro.go
│ │ └── elastic.proto
│ └── server
│ │ └── server.go
└── meituan
│ ├── crawl_detail
│ └── main.go
│ ├── crawl_list
│ └── main.go
│ ├── crawl_urllist
│ └── main.go
│ └── storage_detail
│ └── main.go
└── storage
└── logs
└── app.log
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/.DS_Store
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Binaries for programs and plugins
2 |
3 | *.dll
4 | *.so
5 | *.dylib
6 |
7 | .idea
8 |
9 | # Test binary, built with `go test -c`
10 | *.test
11 |
12 | # Output of the go coverage tool, specifically when used with LiteIDE
13 | *.out
14 |
15 | # Dependency directories (remove the comment below to include it)
16 | # vendor/
17 |
18 | bin/
19 |
20 | configs/
21 | .DS_Store
22 | log/
23 | config.json
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Knowledge-Precipitation-Tribe
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/Makefile
--------------------------------------------------------------------------------
/dependencies/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 |
3 | services:
4 |
5 | redis:
6 | image: redis
7 | restart: always
8 | ports:
9 | - "6379:6379"
10 | volumes:
11 | - redis-data:/data
12 |
13 | mysql:
14 | image: mysql
15 | command: --default-authentication-plugin=mysql_native_password
16 | restart: always
17 | environment:
18 | MYSQL_ROOT_PASSWORD: example
19 |
20 | mongo:
21 | image: mongo
22 | environment:
23 | - MONGO_INITDB_DATABASE=cron
24 | - MONGO_INITDB_ROOT_USERNAME=root
25 | - MONGO_INITDB_ROOT_PASSWORD=password
26 | volumes:
27 | - ./init-mongo.js:/docker-entyrpoint-initdb.d/init-mongo.js:ro
28 | - mongo-data:/data/db
29 | ports:
30 | - '27017-27019:27017-27019'
31 |
32 | elastic:
33 | image: docker.elastic.co/elasticsearch/elasticsearch:7.8.0
34 | ports:
35 | - "9200:9200"
36 | - "9300:9300"
37 | volumes:
38 | - elastic-data:/data
39 | environment:
40 | - discovery.type=single-node
41 |
42 | rabbitmq:
43 | image: rabbitmq:management
44 | hostname: myrabbitmq
45 | ports:
46 | - "5672:5672"
47 | - "15672:15672"
48 | volumes:
49 | - rabbitmq-data:/var/lib/rabbitmq
50 |
51 | consul1:
52 | image: consul
53 | restart: always
54 | ports:
55 | - "8500:8500"
56 | - "8300:8300"
57 | - "8301:8301"
58 | - "8302:8302"
59 | - "8600:8600"
60 | command: agent -server -bootstrap-expect 2 -ui -bind=0.0.0.0 -client=0.0.0.0
61 |
62 | consul2:
63 | image: consul
64 | restart: always
65 | ports:
66 | - "8501:8500"
67 | command: agent -server -ui -bind=0.0.0.0 -client=0.0.0.0 -join consul1
68 |
69 | consul3:
70 | image: consul
71 | restart: always
72 | ports:
73 | - "8502:8500"
74 | command: agent -server -ui -bind=0.0.0.0 -client=0.0.0.0 -join consul1
75 |
76 | # proxy:
77 | # image: traefik
78 | # command: --api --docker --docker.domain=docker.localhost --logLevel=DEBUG
79 | # ports:
80 | # - "80:80"
81 | # - "8080:8080"
82 | # volumes:
83 | # - /var/run/docker.sock:/var/run/docker.sock
84 | # - ./traefik.toml:/root/go-crawler/traefik.toml
85 |
86 | jaeger:
87 | image: jaegertracing/all-in-one:1.16
88 | ports:
89 | - "5775:5775/udp"
90 | - "6831:6831/udp"
91 | - "6832:6832/udp"
92 | - "5778:5778"
93 | - "16686:16686"
94 | - "14268:14268"
95 | - "9411:9411"
96 | environment:
97 | - "COLLECTOR_ZIPKIN_HTTP_PORT=9411"
98 |
99 | etcd:
100 | image: quay.io/coreos/etcd:v3.3.12
101 | ports:
102 | - "2379:2379"
103 | - "2380:2380"
104 | environment:
105 | ETCDCTL_API: 3
106 | volumes:
107 | - etcd-data:/etcd-data
108 | command:
109 | - "/usr/local/bin/etcd"
110 | - "--name"
111 | - "s1"
112 | - "--data-dir"
113 | - "/etcd-data"
114 | - "--advertise-client-urls"
115 | - "http://0.0.0.0:2379"
116 | - --listen-client-urls
117 | - "http://0.0.0.0:2379"
118 | - "--initial-advertise-peer-urls"
119 | - "http://0.0.0.0:2380"
120 | - "--listen-peer-urls"
121 | - "http://0.0.0.0:2380"
122 | - "--initial-cluster-token"
123 | - "tkn"
124 | - "--initial-cluster"
125 | - "s1=http://0.0.0.0:2380"
126 | - "--initial-cluster-state"
127 | - "new"
128 |
129 | volumes:
130 | elastic-data:
131 | rabbitmq-data:
132 | redis-data:
133 | mongo-data:
134 | etcd-data:
--------------------------------------------------------------------------------
/dependencies/init-mongo.js:
--------------------------------------------------------------------------------
1 | db.createUser(
2 | {
3 | user:"root",
4 | pwd:"password",
5 | roles:[
6 | {
7 | role:"readWrite",
8 | db:"cron"
9 | }
10 | ]
11 | }
12 | )
13 |
--------------------------------------------------------------------------------
/dependencies/nginx/conf/nginx.conf:
--------------------------------------------------------------------------------
1 | #user nobody;
2 | worker_processes 1;
3 |
4 | #error_log logs/error.log;
5 | #error_log logs/error.log notice;
6 | #error_log logs/error.log info;
7 |
8 | #pid logs/nginx.pid;
9 |
10 |
11 | events {
12 | worker_connections 1024;
13 | }
14 |
15 |
16 | http {
17 | include mime.types;
18 | default_type application/octet-stream;
19 |
20 | #log_format main 'remoteaddr−remote_user [timelocal]"request" '
21 | # 'statusbody_bytes_sent "$http_referer" '
22 | # '"httpuseragent""http_x_forwarded_for"';
23 |
24 | #access_log logs/access.log main;
25 |
26 | sendfile on;
27 | #tcp_nopush on;
28 |
29 | #keepalive_timeout 0;
30 | keepalive_timeout 65;
31 |
32 | #gzip on;
33 |
34 | server {
35 | listen 80;
36 | server_name localhost;
37 |
38 | #charset koi8-r;
39 |
40 | #access_log logs/dig.log main;
41 |
42 | location / {
43 | root /html;
44 | index index.html index.htm;
45 | }
46 |
47 | #error_page 404 /404.html;
48 |
49 | # redirect server error pages to the static page /50x.html
50 | #
51 | error_page 500 502 503 504 /50x.html;
52 | location = /50x.html {
53 | root html;
54 | }
55 |
56 | # proxy the PHP scripts to Apache listening on 127.0.0.1:80
57 | #
58 | #location ~ \.php$ {
59 | # proxy_pass http://127.0.0.1;
60 | #}
61 |
62 | # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
63 | #
64 | #location ~ \.php$ {
65 | # root html;
66 | # fastcgi_pass 127.0.0.1:9000;
67 | # fastcgi_index index.php;
68 | # fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
69 | # include fastcgi_params;
70 | #}
71 |
72 | # deny access to .htaccess files, if Apache's document root
73 | # concurs with nginx's one
74 | #
75 | #location ~ /\.ht {
76 | # deny all;
77 | #}
78 | }
79 |
80 |
81 | # another virtual host using mix of IP-, name-, and port-based configuration
82 | #
83 | #server {
84 | # listen 8000;
85 | # listen somename:8080;
86 | # server_name somename alias another.alias;
87 |
88 | # location / {
89 | # root html;
90 | # index index.html index.htm;
91 | # }
92 | #}
93 |
94 |
95 | # HTTPS server
96 | #
97 | #server {
98 | # listen 443 ssl;
99 | # server_name localhost;
100 |
101 | # ssl_certificate cert.pem;
102 | # ssl_certificate_key cert.key;
103 |
104 | # ssl_session_cache shared:SSL:1m;
105 | # ssl_session_timeout 5m;
106 |
107 | # ssl_ciphers HIGH:!aNULL:!MD5;
108 | # ssl_prefer_server_ciphers on;
109 |
110 | # location / {
111 | # root html;
112 | # index index.html index.htm;
113 | # }
114 | #}
115 |
116 | }
--------------------------------------------------------------------------------
/dependencies/nginx/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 |
3 | services:
4 | nginx:
5 | image: nginx:stable-alpine-perl
6 | restart: always
7 | ports:
8 | - 80:80
9 | volumes:
10 | - ./html:/html
11 | - ./logs:/etc/nginx/logs
12 | - ./conf.d:/etc/nginx/conf.d
13 | - ./conf/nginx.conf:/etc/nginx/nginx.conf
--------------------------------------------------------------------------------
/dependencies/traefik.toml:
--------------------------------------------------------------------------------
1 | defaultEntryPoints = ["http"]
2 | insecureSkipVerify = true
3 | [entryPoints]
4 | [entryPoints.http]
5 | address = ":80"
--------------------------------------------------------------------------------
/deploy/buildScript/linux_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | ROOT_DIR=/Users/super/develop/go-crawler-distributed
3 |
4 | douban_services="
5 | storage_detail
6 | crawl_detail
7 | crawl_list
8 | crawl_tags
9 | "
10 |
11 | meituan_services="
12 | storage_detail
13 | crawl_detail
14 | crawl_list
15 | crawl_urllist
16 | "
17 |
18 | common_services="
19 | cache
20 | elastic
21 | "
22 |
23 | build_common_service() {
24 | rm -f ${ROOT_DIR}/deploy/service/$1/bin/$1
25 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo -o ${ROOT_DIR}/deploy/service/$1/bin/$1 ${ROOT_DIR}/service/$1/main.go
26 | echo -e "\033[32m编译完成: \033[0m ${ROOT_DIR}/deploy/service/$1/bin/$1"
27 | }
28 |
29 | build_douban_service() {
30 | rm -f ${ROOT_DIR}/deploy/service/douban/$1/bin/$1
31 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo -o ${ROOT_DIR}/deploy/service/douban/$1/bin/$1 ${ROOT_DIR}/service/douban/$1/main.go
32 | echo -e "\033[32m编译完成: \033[0m ${ROOT_DIR}/deploy/service/douban/$1/bin/$1"
33 | }
34 |
35 | build_meituan_service() {
36 | rm -f ${ROOT_DIR}/deploy/service/meituan/$1/bin/$1
37 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo -o ${ROOT_DIR}/deploy/service/meituan/$1/bin/$1 ${ROOT_DIR}/service/meituan/$1/main.go
38 | echo -e "\033[32m编译完成: \033[0m ${ROOT_DIR}/deploy/service/meituan/$1/bin/$1"
39 | }
40 |
41 | for service in $common_services
42 | do
43 | build_common_service $service
44 | done
45 | echo -e "\033[32m编译完成: \033[0m common"
46 |
47 | for service in $douban_services
48 | do
49 | build_douban_service $service
50 | done
51 | echo -e "\033[32m编译完成: \033[0m douban_service"
52 |
53 | for service in $meituan_services
54 | do
55 | build_meituan_service $service
56 | done
57 | echo -e "\033[32m编译完成: \033[0m meituan_service"
--------------------------------------------------------------------------------
/deploy/buildScript/mac_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | ROOT_DIR=/Users/super/develop/go-crawler-distributed
3 |
4 | services="
5 | cache
6 | storage_detail
7 | crawl_detail
8 | crawl_list
9 | crawl_tags
10 | "
11 |
12 | # 编译service可执行文件
13 | build_service() {
14 | go build -o ${ROOT_DIR}/deploy/mac/bin/$1 ${ROOT_DIR}/service/$1/main.go
15 | echo -e "\033[32m编译完成: \033[0m ${ROOT_DIR}/deploy/linux/bin/"
16 | }
17 |
18 | # 执行编译service
19 | mkdir -p ${ROOT_DIR}/deploy/mac/bin && rm -f ${ROOT_DIR}/deploy/mac/bin/*
20 | for service in $services
21 | do
22 | build_service $service
23 | done
--------------------------------------------------------------------------------
/deploy/buildScript/win_build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | ROOT_DIR=/Users/super/develop/go-crawler-distributed
3 |
4 | services="
5 | cache
6 | storage_detail
7 | crawl_detail
8 | crawl_list
9 | crawl_tags
10 | "
11 |
12 | # 编译service可执行文件
13 | build_service() {
14 | rm -f ${ROOT_DIR}/deploy/win/bin/
15 | CGO_ENABLED=0 GOOS=windows GOARCH=amd64 go build -a -installsuffix cgo -o ${ROOT_DIR}/deploy/win/bin/$1.exe ${ROOT_DIR}/service/$1/main.go
16 | echo -e "\033[32m编译完成: \033[0m ${ROOT_DIR}/deploy/win/bin/"
17 | }
18 |
19 | #cache service
20 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo -o ${ROOT_DIR}/deploy/win/bin/$1.exe ${ROOT_DIR}/service/cache/main.go
21 |
22 |
23 | # 执行编译service
24 | mkdir -p ${ROOT_DIR}/deploy/win/bin && rm -f ${ROOT_DIR}/deploy/win/bin/*
25 | for service in $services
26 | do
27 | build_service $service
28 | done
--------------------------------------------------------------------------------
/deploy/deploy/start-douban-direct.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | ROOT_DIR=/Users/super/develop/go-crawler-distributed
3 |
4 | services="
5 | storage_detail
6 | crawl_detail
7 | crawl_list
8 | crawl_tags
9 | "
10 |
11 | cd ${ROOT_DIR}
12 |
13 | # 编译service可执行文件
14 | run_service() {
15 | go run ${ROOT_DIR}/service/douban/$1/main.go
16 | echo -e "\033[32m启动完成: \033[0m $1"
17 | }
18 |
19 | go run ${ROOT_DIR}/service/cache/main.go
20 | echo -e "\033[32m启动完成: \033[0m cache"
21 |
22 | # 执行编译service
23 | for service in $services
24 | do
25 | run_service $service
26 | done
27 |
28 |
--------------------------------------------------------------------------------
/deploy/deploy/start-meituan-direct.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | ROOT_DIR=/Users/super/develop/go-crawler-distributed
3 |
4 | services="
5 | storage_detail
6 | crawl_detail
7 | crawl_list
8 | crawl_urllist
9 | "
10 |
11 | cd ${ROOT_DIR}
12 |
13 | # 编译service可执行文件
14 | run_service() {
15 | go run ${ROOT_DIR}/service/meituan/$1/main.go
16 | echo -e "\033[32m启动完成: \033[0m $1"
17 | }
18 |
19 | go run ${ROOT_DIR}/service/elastic/main.go
20 | echo -e "\033[32m启动完成: \033[0m elastic"
21 |
22 | # 执行编译service
23 | for service in $services
24 | do
25 | run_service $service
26 | done
27 |
28 |
--------------------------------------------------------------------------------
/deploy/dockerBuildScript/docker_build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | ROOT_DIR=/Users/super/develop/go-crawler-distributed
4 |
5 | services="
6 | cache
7 | storage_detail
8 | crawl_detail
9 | crawl_list
10 | crawl_tags
11 | "
12 |
13 | # 打包镜像
14 | build_image() {
15 | sudo docker build -t superssssss/crawler/$1 -f ./service/$1/Dockerfile .
16 | echo -e "\033[32m镜像打包完成: \033[0m superssssss/crawler/$1\n"
17 | }
18 |
19 | # 切换到工程根目录
20 | cd ${ROOT_DIR}
21 |
22 | echo -e "\033[32m开始构建docker镜像... \033[0m"
23 |
24 | # 打包微服务镜像
25 | cd ${ROOT_DIR}/deploy/
26 | for service in $services
27 | do
28 | build_image $service
29 | done
30 |
31 | echo -e "\033[32mdocker镜像构建完毕.\033[0m"
--------------------------------------------------------------------------------
/deploy/service/cache/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:7
2 |
3 | ADD bin/cache /app/
4 | ADD config.json /app/config/
5 |
6 | RUN chmod 777 /app/cache
7 |
8 | WORKDIR /app
9 |
10 | ENTRYPOINT ["./cache"]
--------------------------------------------------------------------------------
/deploy/service/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 |
3 | services:
4 |
5 | cache:
6 | build:
7 | context: cache
8 | dockerfile: Dockerfile
9 | networks:
10 | - crawler
11 |
12 | elastic:
13 | build:
14 | context: elastic
15 | dockerfile: Dockerfile
16 | depends_on:
17 | - elastic_server
18 | networks:
19 | - crawler
20 |
21 | redis:
22 | image: redis
23 | restart: always
24 | ports:
25 | - "6379:6379"
26 | volumes:
27 | - redis-data:/data
28 | networks:
29 | - crawler
30 |
31 | mysql:
32 | image: mysql
33 | command: --default-authentication-plugin=mysql_native_password
34 | restart: always
35 | ports:
36 | - "3306:3306"
37 | environment:
38 | MYSQL_ROOT_PASSWORD: example
39 | networks:
40 | - crawler
41 |
42 | elastic_server:
43 | image: docker.elastic.co/elasticsearch/elasticsearch:7.8.0
44 | ports:
45 | - "9200:9200"
46 | - "9300:9300"
47 | volumes:
48 | - elastic-data:/data
49 | environment:
50 | - discovery.type=single-node
51 | networks:
52 | - crawler
53 |
54 | rabbitmq:
55 | image: rabbitmq:management
56 | hostname: myrabbitmq
57 | ports:
58 | - "5672:5672"
59 | - "15672:15672"
60 | volumes:
61 | - rabbitmq-data:/var/lib/rabbitmq
62 | networks:
63 | - crawler
64 |
65 | consul1:
66 | image: consul
67 | restart: always
68 | ports:
69 | - "8500:8500"
70 | - "8300:8300"
71 | - "8301:8301"
72 | - "8302:8302"
73 | - "8600:8600"
74 | command: agent -server -bootstrap-expect 2 -ui -bind=0.0.0.0 -client=0.0.0.0
75 | networks:
76 | - crawler
77 |
78 | consul2:
79 | image: consul
80 | restart: always
81 | ports:
82 | - "8501:8500"
83 | command: agent -server -ui -bind=0.0.0.0 -client=0.0.0.0 -join consul1
84 | networks:
85 | - crawler
86 |
87 | consul3:
88 | image: consul
89 | restart: always
90 | ports:
91 | - "8502:8500"
92 | command: agent -server -ui -bind=0.0.0.0 -client=0.0.0.0 -join consul1
93 | networks:
94 | - crawler
95 |
96 |
97 | volumes:
98 | elastic-data:
99 | rabbitmq-data:
100 | redis-data:
101 |
102 | networks:
103 | crawler:
104 | external: true
--------------------------------------------------------------------------------
/deploy/service/douban/crawl_detail/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:7
2 |
3 | COPY bin/crawl_detail /app/
4 | COPY config.json /app/config/
5 |
6 | RUN chmod 777 /app/crawl_detail
7 |
8 | WORKDIR /app
9 |
10 | ENTRYPOINT ["./crawl_detail"]
--------------------------------------------------------------------------------
/deploy/service/douban/crawl_list/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:7
2 |
3 | COPY bin/crawl_list /app/
4 | COPY config.json /app/config/
5 |
6 | RUN chmod 777 /app/crawl_list
7 |
8 | WORKDIR /app
9 |
10 | ENTRYPOINT ["./crawl_list"]
--------------------------------------------------------------------------------
/deploy/service/douban/crawl_tags/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:7
2 |
3 | COPY bin/crawl_tags /app/
4 | COPY config.json /app/config/
5 |
6 | RUN chmod 777 /app/crawl_tags
7 |
8 | WORKDIR /app
9 |
10 | ENTRYPOINT ["./crawl_tags"]
--------------------------------------------------------------------------------
/deploy/service/douban/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 |
3 | services:
4 |
5 | crawl_list:
6 | build:
7 | context: crawl_list
8 | dockerfile: Dockerfile
9 |
10 | crawl_tags:
11 | build:
12 | context: crawl_tags
13 | dockerfile: Dockerfile
14 |
15 | crawl_detail:
16 | build:
17 | context: crawl_detail
18 | dockerfile: Dockerfile
19 |
20 | storage_detail:
21 | build:
22 | context: storage_detail
23 | dockerfile: Dockerfile
--------------------------------------------------------------------------------
/deploy/service/douban/storage_detail/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:7
2 |
3 | COPY bin/storage_detail /app/
4 | COPY config.json /app/config/
5 |
6 | RUN chmod 777 /app/storage_detail
7 |
8 | WORKDIR /app
9 |
10 | ENTRYPOINT ["./storage_detail"]
--------------------------------------------------------------------------------
/deploy/service/elastic/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:7
2 |
3 | ADD bin/elastic /app/
4 | ADD config.json /app/config/
5 |
6 | RUN chmod 777 /app/elastic
7 |
8 | WORKDIR /app
9 |
10 | ENTRYPOINT ["./elastic"]
--------------------------------------------------------------------------------
/deploy/service/meituan/crawl_detail/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:7
2 |
3 | COPY bin/crawl_detail /app/
4 | COPY config.json /app/config/
5 |
6 | RUN chmod 777 /app/crawl_detail
7 |
8 | WORKDIR /app
9 |
10 | ENTRYPOINT ["./crawl_detail"]
--------------------------------------------------------------------------------
/deploy/service/meituan/crawl_list/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:7
2 |
3 | COPY bin/crawl_list /app/
4 | COPY config.json /app/config/
5 |
6 | RUN chmod 777 /app/crawl_list
7 |
8 | WORKDIR /app
9 |
10 | ENTRYPOINT ["./crawl_list"]
--------------------------------------------------------------------------------
/deploy/service/meituan/crawl_urllist/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:7
2 |
3 | COPY bin/crawl_urllist /app/
4 | COPY config.json /app/config/
5 |
6 | RUN chmod 777 /app/crawl_urllist
7 |
8 | WORKDIR /app
9 |
10 | ENTRYPOINT ["./crawl_urllist"]
--------------------------------------------------------------------------------
/deploy/service/meituan/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 |
3 | services:
4 |
5 | crawl_list:
6 | build:
7 | context: crawl_list
8 | dockerfile: Dockerfile
9 | networks:
10 | - crawler
11 |
12 | crawl_tags:
13 | build:
14 | context: crawl_urllist
15 | dockerfile: Dockerfile
16 | networks:
17 | - crawler
18 |
19 | crawl_detail:
20 | build:
21 | context: crawl_detail
22 | dockerfile: Dockerfile
23 | networks:
24 | - crawler
25 |
26 | storage_detail:
27 | build:
28 | context: storage_detail
29 | dockerfile: Dockerfile
30 | networks:
31 | - crawler
32 |
33 | networks:
34 | crawler:
35 | external: true
--------------------------------------------------------------------------------
/deploy/service/meituan/storage_detail/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM centos:7
2 |
3 | COPY bin/storage_detail /app/
4 | COPY config.json /app/config/
5 |
6 | RUN chmod 777 /app/storage_detail
7 |
8 | WORKDIR /app
9 |
10 | ENTRYPOINT ["./storage_detail"]
--------------------------------------------------------------------------------
/global/data.go:
--------------------------------------------------------------------------------
1 | package global
2 |
3 | import (
4 | "github.com/coreos/etcd/clientv3"
5 | "github.com/garyburd/redigo/redis"
6 | "github.com/jinzhu/gorm"
7 | "github.com/kayon/iploc"
8 | "github.com/olivere/elastic/v7"
9 | "github.com/streadway/amqp"
10 | "go.mongodb.org/mongo-driver/mongo"
11 | )
12 |
13 | /**
14 | * @Author: super
15 | * @Date: 2020-09-18 08:51
16 | * @Description: 全局配置DB
17 | **/
18 |
19 | type RabbitMQ struct {
20 | Conn *amqp.Connection
21 | Channel *amqp.Channel
22 | }
23 |
24 | var (
25 | DBEngine *gorm.DB
26 | RedisEngine *redis.Pool
27 | RabbitMQEngine *RabbitMQ
28 | ElasticEngine *elastic.Client
29 | MongoDBEngine *mongo.Client
30 | EtcdEngine *clientv3.Client
31 | EtcdKV clientv3.KV
32 | EtcdLease clientv3.Lease
33 | EtcdWatcher clientv3.Watcher
34 | IpParser *iploc.Locator
35 | )
36 |
--------------------------------------------------------------------------------
/global/setting.go:
--------------------------------------------------------------------------------
1 | package global
2 |
3 | import (
4 | "go-crawler-distributed/pkg/logger"
5 | "go-crawler-distributed/pkg/setting"
6 | )
7 |
8 | /**
9 | * @Author: super
10 | * @Date: 2020-09-18 08:32
11 | * @Description: 全局配置包括:服务,数据库,Email,JWT和日志
12 | **/
13 |
14 | var (
15 | ServerSetting *setting.ServerSettingS
16 | AppSetting *setting.AppSettingS
17 | DatabaseSetting *setting.DatabaseSettingS
18 | CacheSetting *setting.CacheSettingS
19 | RabbitMQSetting *setting.RabbitMQSettingS
20 | ElasticSetting *setting.ElasticSettingS
21 | EmailSetting *setting.EmailSettingS
22 | JWTSetting *setting.JWTSettingS
23 | Logger *logger.Logger
24 | ConsulSetting *setting.ConsulSettingS
25 | MongoDBSetting *setting.MongoDBSettingS
26 | EtcdSetting *setting.EtcdSettingS
27 | TracerSetting *setting.TracerSettingS
28 | IpParserSetting *setting.IpParserSettingS
29 | )
30 |
--------------------------------------------------------------------------------
/global/tracer.go:
--------------------------------------------------------------------------------
1 | package global
2 |
3 | import "github.com/opentracing/opentracing-go"
4 |
5 | /**
6 | * @Author: super
7 | * @Date: 2020-09-24 08:10
8 | * @Description: 配置全局统一的调用链追踪
9 | **/
10 |
11 | var (
12 | Tracer opentracing.Tracer
13 | )
14 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module go-crawler-distributed
2 |
3 | go 1.13
4 |
5 | require (
6 | github.com/HdrHistogram/hdrhistogram-go v1.0.0 // indirect
7 | github.com/PuerkitoBio/goquery v1.5.1
8 | github.com/bwmarrin/snowflake v0.3.0
9 | github.com/coreos/etcd v3.3.18+incompatible
10 | github.com/dgrijalva/jwt-go v3.2.0+incompatible
11 | github.com/fsnotify/fsnotify v1.4.9
12 | github.com/garyburd/redigo v1.6.2
13 | github.com/gin-contrib/cors v1.3.1
14 | github.com/gin-gonic/gin v1.9.1
15 | github.com/go-acme/lego/v3 v3.4.0
16 | github.com/go-playground/locales v0.14.1
17 | github.com/go-playground/universal-translator v0.18.1
18 | github.com/go-playground/validator/v10 v10.14.0
19 | github.com/golang/protobuf v1.5.0
20 | github.com/jinzhu/gorm v1.9.16
21 | github.com/kayon/iploc v0.0.0-20200312105652-bda3e968a794
22 | github.com/mailru/easyjson v0.7.6
23 | github.com/micro/go-micro/v2 v2.9.1
24 | github.com/micro/go-plugins/registry/consul/v2 v2.9.1
25 | github.com/olivere/elastic/v7 v7.0.22
26 | github.com/opentracing/opentracing-go v1.2.0
27 | github.com/robfig/cron/v3 v3.0.1
28 | github.com/shirou/gopsutil v0.0.0-20190901111213-e4ec7b275ada
29 | github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e
30 | github.com/spf13/viper v1.7.1
31 | github.com/streadway/amqp v1.0.0
32 | github.com/uber/jaeger-client-go v2.25.0+incompatible
33 | github.com/uber/jaeger-lib v2.4.0+incompatible // indirect
34 | go.mongodb.org/mongo-driver v1.5.1
35 | go.uber.org/atomic v1.6.0 // indirect
36 | golang.org/x/net v0.17.0
37 | golang.org/x/text v0.13.0
38 | google.golang.org/protobuf v1.23.0
39 | gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect
40 | gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
41 | gopkg.in/natefinch/lumberjack.v2 v2.0.0
42 | )
43 |
--------------------------------------------------------------------------------
/img/consul.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/consul.png
--------------------------------------------------------------------------------
/img/consul_config.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/consul_config.png
--------------------------------------------------------------------------------
/img/consul_service.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/consul_service.png
--------------------------------------------------------------------------------
/img/douban.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/douban.png
--------------------------------------------------------------------------------
/img/elasticsearch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/elasticsearch.png
--------------------------------------------------------------------------------
/img/flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/flow.png
--------------------------------------------------------------------------------
/img/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/framework.png
--------------------------------------------------------------------------------
/img/meituan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/meituan.png
--------------------------------------------------------------------------------
/img/rabbitmq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/rabbitmq.png
--------------------------------------------------------------------------------
/img/swagger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/swagger.png
--------------------------------------------------------------------------------
/initConf/init.go:
--------------------------------------------------------------------------------
1 | package initConf
2 |
3 | import (
4 | "go-crawler-distributed/pkg/etcd"
5 | "go-crawler-distributed/pkg/ipParser"
6 | "go-crawler-distributed/pkg/mongoDB"
7 | "log"
8 | "strings"
9 | "time"
10 |
11 | "go-crawler-distributed/global"
12 | "go-crawler-distributed/pkg/cache"
13 | "go-crawler-distributed/pkg/db"
14 | "go-crawler-distributed/pkg/elastic"
15 | "go-crawler-distributed/pkg/idGenerator"
16 | "go-crawler-distributed/pkg/logger"
17 | "go-crawler-distributed/pkg/mq"
18 | "go-crawler-distributed/pkg/setting"
19 | "go-crawler-distributed/pkg/tracer"
20 |
21 | "gopkg.in/natefinch/lumberjack.v2"
22 | )
23 |
24 | /**
25 | * @Author: super
26 | * @Date: 2021-01-05 14:25
27 | * @Description:
28 | **/
29 | func Init(config string) {
30 | //初始化配置
31 | err := setupSetting(config)
32 | if err != nil {
33 | log.Printf("init setupSetting err: %v\n", err)
34 | } else {
35 | log.Printf("初始化配置信息成功")
36 | }
37 | //初始化日志
38 | err = setupLogger()
39 | if err != nil {
40 | log.Printf("init setupLogger err: %v\n", err)
41 | } else {
42 | log.Printf("初始化logger成功")
43 | }
44 | //初始化数据库
45 | err = setupDBEngine()
46 | if err != nil {
47 | log.Printf("init setupDBEngine err: %v\n", err)
48 | } else {
49 | log.Printf("初始化数据库成功")
50 | }
51 | //初始化redis
52 | err = setupCacheEngine()
53 | if err != nil {
54 | log.Printf("init setupCacheEngine err: %v\n", err)
55 | } else {
56 | log.Printf("初始化cache成功")
57 | }
58 | //初始化RabbitMQ
59 | err = setupRabbitMQEngine()
60 | if err != nil {
61 | log.Printf("init setupRabbitMQEngine err: %v\n", err)
62 | } else {
63 | log.Printf("初始化消息队列成功")
64 | }
65 | //初始化elastic
66 | //err = setupElasticEngine()
67 | //if err != nil {
68 | // log.Printf("init setupElasticEngine err: %v\n", err)
69 | //}else{
70 | // log.Printf("初始化elastic成功")
71 | //}
72 | //
73 | //初始化mongoDB
74 | err = setupMongoDBEngine()
75 | if err != nil {
76 | log.Printf("init setupMongoDBEngine err: %v\n", err)
77 | } else {
78 | log.Printf("初始化mongoDb成功")
79 | }
80 | //初始化etcd
81 | err = setupEtcdEngine()
82 | if err != nil {
83 | log.Printf("init setupEtcdEngine err: %v\n", err)
84 | } else {
85 | log.Printf("初始化etcd成功")
86 | }
87 | //初始化ipParser
88 | err = setupIpParser()
89 | if err != nil {
90 | log.Printf("init setupIpParser err: %v\n", err)
91 | } else {
92 | log.Printf("初始化ipParser成功")
93 | }
94 | //初始化追踪
95 | err = setupTracer()
96 | if err != nil {
97 | log.Printf("init.setupTracer err: %v\n", err)
98 | } else {
99 | log.Printf("初始化Tracer成功")
100 | }
101 | //初始化ID生成器
102 | err = idGenerator.InitSnowflake()
103 | if err != nil {
104 | log.Printf("init.snowflak err: %v\n", err)
105 | } else {
106 | log.Printf("初始化idGenerator成功")
107 | }
108 | }
109 |
110 | func setupSetting(config string) error {
111 | newSetting, err := setting.NewSetting(strings.Split(config, ",")...)
112 | if err != nil {
113 | return err
114 | }
115 | err = newSetting.ReadSection("Server", &global.ServerSetting)
116 | if err != nil {
117 | return err
118 | }
119 | err = newSetting.ReadSection("App", &global.AppSetting)
120 | if err != nil {
121 | return err
122 | }
123 | err = newSetting.ReadSection("Database", &global.DatabaseSetting)
124 | if err != nil {
125 | return err
126 | }
127 | err = newSetting.ReadSection("Cache", &global.CacheSetting)
128 | if err != nil {
129 | return err
130 | }
131 | err = newSetting.ReadSection("RabbitMQ", &global.RabbitMQSetting)
132 | if err != nil {
133 | return err
134 | }
135 | err = newSetting.ReadSection("Elastic", &global.ElasticSetting)
136 | if err != nil {
137 | return err
138 | }
139 | err = newSetting.ReadSection("JWT", &global.JWTSetting)
140 | if err != nil {
141 | return err
142 | }
143 | err = newSetting.ReadSection("Email", &global.EmailSetting)
144 | if err != nil {
145 | return err
146 | }
147 | err = newSetting.ReadSection("Consul", &global.ConsulSetting)
148 | if err != nil {
149 | return err
150 | }
151 | err = newSetting.ReadSection("MongoDB", &global.MongoDBSetting)
152 | if err != nil {
153 | return err
154 | }
155 | err = newSetting.ReadSection("Etcd", &global.EtcdSetting)
156 | if err != nil {
157 | return err
158 | }
159 | err = newSetting.ReadSection("IpParser", &global.IpParserSetting)
160 | if err != nil {
161 | return err
162 | }
163 | err = newSetting.ReadSection("Tracer", &global.TracerSetting)
164 | if err != nil {
165 | return err
166 | }
167 |
168 | global.AppSetting.DefaultContextTimeout *= time.Second
169 | global.ServerSetting.ReadTimeout *= time.Second
170 | global.ServerSetting.WriteTimeout *= time.Second
171 | global.JWTSetting.Expire *= time.Second
172 |
173 | return nil
174 | }
175 |
176 | func setupDBEngine() error {
177 | var err error
178 | global.DBEngine, err = db.NewDBEngine(global.DatabaseSetting)
179 | if err != nil {
180 | return err
181 | }
182 | return nil
183 | }
184 |
185 | func setupCacheEngine() error {
186 | var err error
187 | global.RedisEngine, err = cache.NewRedisEngine(global.CacheSetting)
188 | if err != nil {
189 | return err
190 | }
191 | return nil
192 | }
193 |
194 | func setupRabbitMQEngine() error {
195 | var err error
196 | global.RabbitMQEngine, err = mq.NewRabbitMQEngine(global.RabbitMQSetting)
197 | if err != nil {
198 | return err
199 | }
200 | return nil
201 | }
202 |
203 | func setupElasticEngine() error {
204 | var err error
205 | global.ElasticEngine, err = elastic.NewElasticEngine(global.ElasticSetting)
206 | if err != nil {
207 | return err
208 | }
209 | return nil
210 | }
211 |
212 | func setupMongoDBEngine() error {
213 | var err error
214 | global.MongoDBEngine, err = mongoDB.NewMongoDBEngine(global.MongoDBSetting)
215 | if err != nil {
216 | return err
217 | }
218 | return nil
219 | }
220 |
221 | func setupEtcdEngine() error {
222 | var err error
223 | global.EtcdEngine, global.EtcdKV, global.EtcdLease, global.EtcdWatcher, err = etcd.NewEtcdEngine(global.EtcdSetting)
224 | if err != nil {
225 | return err
226 | }
227 | return nil
228 | }
229 |
230 | func setupIpParser() error {
231 | var err error
232 | global.IpParser, err = ipParser.NewIpParser(global.IpParserSetting)
233 | if err != nil {
234 | return err
235 | }
236 | return nil
237 | }
238 |
239 | func setupLogger() error {
240 | fileName := global.AppSetting.LogSavePath + "/" + global.AppSetting.LogFileName + global.AppSetting.LogFileExt
241 | log.Println("log file name ", fileName)
242 | global.Logger = logger.NewLogger(&lumberjack.Logger{
243 | Filename: fileName,
244 | MaxSize: 500,
245 | MaxAge: 10,
246 | LocalTime: true,
247 | }, "", log.LstdFlags).WithCaller(2)
248 |
249 | return nil
250 | }
251 |
252 | func setupTracer() error {
253 | jaegerTracer, _, err := tracer.NewJaegerTracer(global.TracerSetting.ServiceName, global.TracerSetting.Host)
254 | if err != nil {
255 | return err
256 | }
257 | global.Tracer = jaegerTracer
258 | return nil
259 | }
260 |
--------------------------------------------------------------------------------
/internal/crawler/crawerConfig/articleMQConfig.go:
--------------------------------------------------------------------------------
1 | package crawerConfig
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-09-01 19:04
6 | * @Description:
7 | **/
8 |
9 | const ArticleList = "articleList"
10 | const ArticleUrlList = "articleUrlList"
11 | const ArticleDetail = "articleDetail"
12 |
--------------------------------------------------------------------------------
/internal/crawler/crawerConfig/bookMQConfig.go:
--------------------------------------------------------------------------------
1 | package crawerConfig
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-08-14 16:08
6 | * @Description: 可以放入配置文件,通过viper统一读取
7 | **/
8 | const BookDetailUrl = "bookDetailURL"
9 | const BookDetail = "bookDetail"
10 | const TagUrl = "tagURL"
11 |
--------------------------------------------------------------------------------
/internal/crawler/crawerConfig/start.go:
--------------------------------------------------------------------------------
1 | package crawerConfig
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-09-01 19:05
6 | * @Description:
7 | **/
8 |
9 | const StartUrl = "https://book.douban.com/tag/"
10 |
11 | //const StartUrl = "https://tech.meituan.com/"
12 |
13 | const StopTAG = "finish"
14 |
--------------------------------------------------------------------------------
/internal/crawler/crawlOperation.go:
--------------------------------------------------------------------------------
1 | package crawler
2 |
3 | import (
4 | "context"
5 | "go-crawler-distributed/global"
6 | "go-crawler-distributed/internal/crawler/crawerConfig"
7 | "go-crawler-distributed/internal/crawler/worker"
8 | "go-crawler-distributed/pkg/mq"
9 | "sync"
10 | "time"
11 | )
12 |
13 | /**
14 | * @Author: super
15 | * @Date: 2020-08-31 15:20
16 | * @Description:
17 | **/
18 |
19 | //sourceMQ: 配置从哪里读取消息
20 | //targetMQ: 配置将解析好的消息发送到什么位置
21 | //name: 当前工作节点的名称
22 | //function: 页面的具体解析函数
23 | func Crawl(sourceMQ string, targetMQ string, name string, function worker.ParserFunc) {
24 | funcParser := worker.NewFuncParser(function, targetMQ, name)
25 | if sourceMQ == "" {
26 | //代表开始模块
27 | url := crawerConfig.StartUrl
28 | doCrawler(url, funcParser)
29 | } else if targetMQ == "" {
30 | //存储模块
31 | getMessage(sourceMQ, funcParser, true)
32 | } else {
33 | getMessage(sourceMQ, funcParser, false)
34 | }
35 | }
36 |
37 | func getMessage(sourceMQ string, funcParser *worker.FuncParser, isStorage bool) {
38 | messages, err := mq.Consume(sourceMQ)
39 | if err != nil {
40 | global.Logger.Error(context.Background(), err)
41 | return
42 | }
43 | global.Logger.Infof(context.Background(), "parser name: %s", funcParser.Name)
44 |
45 | var wg sync.WaitGroup
46 | for d := range messages {
47 | d.Ack(false)
48 | if string(d.Body) == crawerConfig.StopTAG {
49 | break
50 | } else {
51 | wg.Add(1)
52 | go func(data []byte) {
53 | defer wg.Done()
54 | //是否是保存操作
55 | if isStorage {
56 | doStorage(data, funcParser)
57 | } else {
58 | d := string(data)
59 | doCrawler(d, funcParser)
60 | }
61 | }(d.Body)
62 | }
63 | time.Sleep(time.Second * 2)
64 | }
65 | wg.Wait()
66 | global.Logger.Infof(context.Background(), "finish fetching parser name: %s", funcParser.Name)
67 | }
68 |
69 | func doCrawler(url string, funcParser *worker.FuncParser) {
70 | global.Logger.Infof(context.Background(), "fetching: %s", url)
71 | r := worker.Request{
72 | Url: url,
73 | Parser: funcParser,
74 | }
75 |
76 | worker.Worker(r)
77 | }
78 |
79 | func doStorage(data []byte, funcParser *worker.FuncParser) {
80 | global.Logger.Infof(context.Background(), "saving: %s", data)
81 | funcParser.Parse(data, "")
82 | }
83 |
--------------------------------------------------------------------------------
/internal/crawler/cronJob/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "github.com/robfig/cron/v3"
6 | "time"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2021-01-19 13:52
12 | * @Description: 定时任务
13 | **/
14 |
15 | func main() {
16 | c := cron.New()
17 | //c.AddFunc("1 * * * *", func() { fmt.Println("Every hour on the half hour") })
18 | //c.AddFunc("30 3-6,20-23 * * *", func() { fmt.Println(".. in the range 3-6am, 8-11pm") })
19 | //c.AddFunc("CRON_TZ=Asia/Tokyo 30 04 * * *", func() { fmt.Println("Runs at 04:30 Tokyo time every day") })
20 | //c.AddFunc("@hourly", func() { fmt.Println("Every hour, starting an hour from now") })
21 | //c.AddFunc("@every 1h30m", func() { fmt.Println("Every hour thirty, starting an hour thirty from now") })
22 | c.AddFunc("@every 2s", func() { fmt.Println("Every hour thirty, starting an hour thirty from now") })
23 | c.Start()
24 |
25 | t1 := time.NewTimer(time.Second * 10)
26 | for {
27 | select {
28 | case <-t1.C:
29 | t1.Reset(time.Second * 10)
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/internal/crawler/douban/parser/bookDetail.go:
--------------------------------------------------------------------------------
1 | package parser
2 |
3 | import (
4 | "context"
5 | "github.com/PuerkitoBio/goquery"
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/internal/model"
8 | "go-crawler-distributed/pkg/mq"
9 | "regexp"
10 | "strconv"
11 | "strings"
12 | )
13 |
14 | /**
15 | * @Author: super
16 | * @Date: 2020-08-14 14:22
17 | * @Description:
18 | **/
19 | var re = regexp.MustCompile(`]*>([^<]+)]*>([^<]+)<`)
20 | var re1 = regexp.MustCompile(`]*>([^<]+)[^>]*>([^<]+)<`)
21 | var DateRe = regexp.MustCompile(`([0-9]{3}[1-9]|[0-9]{2}[1-9][0-9]{1}|[0-9]{1}[1-9][0-9]{2}|[1-9][0-9]{3})-(((0[13578]|1[02])-(0[1-9]|[12][0-9]|3[01]))|((0[469]|11)-(0[1-9]|[12][0-9]|30))|(02-(0[1-9]|[1][0-9]|2[0-8])))`)
22 | var priceRe = regexp.MustCompile(`[0-9]+[.]?[0-9]*`)
23 |
24 | func ParseBookDetail(contents []byte, queueName string, url string) {
25 | dom, err := goquery.NewDocumentFromReader(strings.NewReader(string(contents)))
26 | if err != nil {
27 | global.Logger.Error(context.Background(), err)
28 | }
29 |
30 | book := &model.Book{}
31 | book.Url = url
32 |
33 | //封面图片
34 | result := dom.Find("img[title]")
35 | img, _ := result.Attr("src")
36 | book.Img = img
37 | //书名
38 | title, _ := result.Attr("alt")
39 | book.Title = title
40 |
41 | //图书信息
42 | allSubmatch := re.FindAllSubmatch(contents, -1)
43 | for _, m := range allSubmatch {
44 | k := string(m[1])
45 | k = strings.TrimSpace(k)
46 | v := string(m[2])
47 | v = strings.TrimSpace(v)
48 | switch {
49 | case k == "ISBN:":
50 | book.ISBN = v
51 | case k == "出版年:":
52 | dateMatch := DateRe.FindAllSubmatch([]byte(v), -1)
53 | if len(dateMatch) == 0 {
54 | v = "2006-01-02"
55 | }
56 | if v == "" {
57 | v = "2006-01-02"
58 | }
59 | book.PublishYear = v
60 | case k == "副标题:":
61 | book.SubTitle = v
62 | case k == "原作名:":
63 | book.OriginalName = v
64 | case k == "定价:":
65 | priceMatch := priceRe.Find([]byte(v))
66 | if len(priceMatch) == 0 {
67 | v = "0"
68 | } else {
69 | v = string(priceMatch)
70 | }
71 | p, _ := strconv.ParseFloat(v, 64)
72 | book.Price = p
73 | case k == "装帧:":
74 | book.Layout = v
75 | case k == "页数:":
76 | p, _ := strconv.Atoi(v)
77 | book.Pages = p
78 | case k == "出版社:":
79 | book.Publish = v
80 | }
81 | }
82 | allSubmatch = re1.FindAllSubmatch(contents, -1)
83 | for _, m := range allSubmatch {
84 | k := string(m[1])
85 | k = strings.TrimSpace(k)
86 | v := string(m[2])
87 | v = strings.TrimSpace(v)
88 | switch {
89 | case k == "丛书:":
90 | book.Series = v
91 | case k == "作者:":
92 | book.Author = v
93 | case k == "出品方:":
94 | book.Producer = v
95 | }
96 | }
97 |
98 | //评分
99 | result = dom.Find("strong")
100 | score, _ := strconv.ParseFloat(strings.TrimSpace(result.Text()), 64)
101 | book.Score = score
102 |
103 | //评价人数
104 | result = dom.Find("a[class=rating_people]")
105 | length := len(result.Text())
106 | if length <= 9 {
107 | book.Comments = 0
108 | } else {
109 | comment := result.Text()[:length-9]
110 | comments, _ := strconv.Atoi(comment)
111 | book.Comments = comments
112 | }
113 |
114 | //短评
115 | result = dom.Find("div[class=indent]+p")
116 | commentUrl, _ := result.Children().Attr("href")
117 | book.CommentUrl = commentUrl
118 |
119 | //Book结构体转json
120 | bytes, err := book.MarshalJSON()
121 | if err != nil {
122 | global.Logger.Error(context.Background(), err)
123 | } else {
124 | //将解析到的图书详细信息URL放到消息队列
125 | err = mq.Publish(queueName, bytes)
126 | if err != nil {
127 | global.Logger.Error(context.Background(), err)
128 | }
129 | }
130 | }
131 |
--------------------------------------------------------------------------------
/internal/crawler/douban/parser/booklist.go:
--------------------------------------------------------------------------------
1 | package parser
2 |
3 | import (
4 | "context"
5 | "github.com/PuerkitoBio/goquery"
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/pkg/mq"
8 | "go-crawler-distributed/service/cache/client"
9 | "strings"
10 | )
11 |
12 | /**
13 | * @Author: super
14 | * @Date: 2020-08-14 13:54
15 | * @Description:
16 | **/
17 |
18 | func ParseBookList(contents []byte, queueName string, url string) {
19 | dom, err := goquery.NewDocumentFromReader(strings.NewReader(string(contents)))
20 | if err != nil {
21 | global.Logger.Error(context.Background(), err)
22 | }
23 |
24 | result := dom.Find("a[title]")
25 | result.Each(func(i int, selection *goquery.Selection) {
26 | href, _ := selection.Attr("href")
27 | global.Logger.Infof(context.Background(), "url: %s", href)
28 |
29 | //redis去重
30 | boolean, _ := client.ElementIsInSet(queueName, href)
31 | if !boolean {
32 | //不再redis中就添加
33 | _, _ = client.AddElementToSet(queueName, href)
34 | //将解析到的图书详细信息URL放到消息队列
35 | err = mq.Publish(queueName, []byte(href))
36 | if err != nil {
37 | global.Logger.Error(context.Background(), err)
38 | }
39 | }
40 | })
41 | }
42 |
--------------------------------------------------------------------------------
/internal/crawler/douban/parser/tagList.go:
--------------------------------------------------------------------------------
1 | package parser
2 |
3 | import (
4 | "context"
5 | "github.com/PuerkitoBio/goquery"
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/internal/crawler/crawerConfig"
8 | "go-crawler-distributed/pkg/mq"
9 | "strconv"
10 | "strings"
11 | "sync"
12 | "time"
13 | )
14 |
15 | /**
16 | * @Author: super
17 | * @Date: 2020-08-14 20:49
18 | * @Description:
19 | **/
20 | func ParseTagList(contents []byte, queueName string, url string) {
21 |
22 | dom, err := goquery.NewDocumentFromReader(strings.NewReader(string(contents)))
23 | if err != nil {
24 | global.Logger.Error(context.Background(), err)
25 | }
26 |
27 | result := dom.Find("table[class=tagCol]").Find("a")
28 | href := ""
29 | var wg sync.WaitGroup
30 | result.Each(func(i int, selection *goquery.Selection) {
31 | href = url + selection.Text()
32 | for i := 0; i <= 1000; i = i + 20 {
33 | wg.Add(1)
34 | go func(i int) {
35 | defer wg.Done()
36 | url := href + "?start=" + strconv.Itoa(i) + "&type=T"
37 | global.Logger.Infof(context.Background(), "url", url)
38 |
39 | //将解析到的图书详细信息URL放到消息队列
40 | err = mq.Publish(queueName, []byte(href))
41 | if err != nil {
42 | global.Logger.Error(context.Background(), err)
43 | }
44 | }(i)
45 | time.Sleep(time.Millisecond * 100)
46 | }
47 | })
48 | wg.Wait()
49 |
50 | err = mq.Publish(queueName, []byte(crawerConfig.StopTAG))
51 | if err != nil {
52 | global.Logger.Error(context.Background(), err)
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/internal/crawler/douban/storage/bookDetail.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "context"
5 | "go-crawler-distributed/global"
6 | "go-crawler-distributed/internal/dao"
7 | "go-crawler-distributed/internal/model"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2020-08-16 08:56
13 | * @Description:
14 | **/
15 |
16 | func ParseAndStorage(contents []byte, _ string, _ string) {
17 | book := model.Book{}
18 | err := book.UnmarshalJSON(contents)
19 | if err != nil {
20 | global.Logger.Error(context.Background(), err)
21 | return
22 | }
23 |
24 | bookManager := dao.NewBookManager("books", global.DBEngine)
25 |
26 | _, err = bookManager.SaveBook(book)
27 | if err != nil {
28 | global.Logger.Error(context.Background(), err)
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/internal/crawler/fetcher/fetcher.go:
--------------------------------------------------------------------------------
1 | package fetcher
2 |
3 | import (
4 | "bufio"
5 | "context"
6 | "fmt"
7 | "go-crawler-distributed/global"
8 | "golang.org/x/net/html/charset"
9 | "golang.org/x/text/encoding"
10 | "golang.org/x/text/encoding/unicode"
11 | "golang.org/x/text/transform"
12 | "io"
13 | "io/ioutil"
14 | "net/http"
15 | )
16 |
17 | /**
18 | * @Author: super
19 | * @Date: 2020-08-14 13:47
20 | * @Description:
21 | **/
22 |
23 | func Fetch(url string) ([]byte, error) {
24 | client := &http.Client{}
25 | request, err := http.NewRequest("GET", url, nil)
26 | if err != nil {
27 | global.Logger.Error(context.Background(), err)
28 | return nil, err
29 | }
30 |
31 | request.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36")
32 |
33 | resp, err := client.Do(request)
34 | if err != nil {
35 | return nil, err
36 | }
37 | defer resp.Body.Close()
38 |
39 | if resp.StatusCode != http.StatusOK {
40 | return nil,
41 | fmt.Errorf("wrong status code: %d",
42 | resp.StatusCode)
43 | }
44 |
45 | e := determineEncoding(resp.Body)
46 |
47 | utf8Reader := transform.NewReader(resp.Body, e.NewDecoder())
48 |
49 | return ioutil.ReadAll(utf8Reader)
50 | }
51 |
52 | //自动判断编码
53 | func determineEncoding(r io.Reader) encoding.Encoding {
54 | bytes, err := bufio.NewReader(r).Peek(1024)
55 | if err != nil {
56 | global.Logger.Error(context.Background(), err)
57 | //默认UTF8编码
58 | return unicode.UTF8
59 | }
60 | e, _, _ := charset.DetermineEncoding(bytes, "")
61 | return e
62 | }
63 |
--------------------------------------------------------------------------------
/internal/crawler/meituan/conf/mapping.go:
--------------------------------------------------------------------------------
1 | package conf
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-09-01 17:01
6 | * @Description: 用于存储与elastic的映射
7 | **/
8 |
9 | const Mapping = `
10 | {
11 | "mappings": {
12 | "properties": {
13 | "title": {
14 | "type": "text"
15 | },
16 | "url": {
17 | "type": "text"
18 | },
19 | "genres": {
20 | "type": "keyword"
21 | },
22 | "content": {
23 | "type": "text"
24 | }
25 | }
26 | }
27 | }`
28 |
--------------------------------------------------------------------------------
/internal/crawler/meituan/parser/articleDetail.go:
--------------------------------------------------------------------------------
1 | package parser
2 |
3 | import (
4 | "context"
5 | "github.com/PuerkitoBio/goquery"
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/internal/model"
8 | "go-crawler-distributed/pkg/mq"
9 | "go-crawler-distributed/pkg/util"
10 | "strings"
11 | )
12 |
13 | /**
14 | * @Author: super
15 | * @Date: 2020-09-01 19:09
16 | * @Description:
17 | **/
18 |
19 | func ParseArticleDetail(contents []byte, queueName string, url string) {
20 | dom, err := goquery.NewDocumentFromReader(strings.NewReader(string(contents)))
21 | if err != nil {
22 | global.Logger.Error(context.Background(), err)
23 | }
24 |
25 | article := &model.Article{}
26 |
27 | result := dom.Find("a[rel=bookmark]")
28 | article.Url = url
29 |
30 | title := result.Text()
31 | article.Title = title
32 |
33 | s, err := util.ZipString(contents)
34 | if err != nil {
35 | global.Logger.Error(context.Background(), err)
36 | }
37 | article.Content = s
38 |
39 | result = dom.Find("a[rel=tag]")
40 | result.Each(func(i int, selection *goquery.Selection) {
41 | tag := selection.Text()
42 | article.Genres = append(article.Genres, tag)
43 | })
44 |
45 | //Article结构体转json
46 | bytes, err := article.MarshalJSON()
47 | if err != nil {
48 | global.Logger.Error(context.Background(), err)
49 | } else {
50 | //将解析到的图书详细信息URL放到消息队列
51 | err = mq.Publish(queueName, bytes)
52 | if err != nil {
53 | global.Logger.Error(context.Background(), err)
54 | }
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/internal/crawler/meituan/parser/articleList.go:
--------------------------------------------------------------------------------
1 | package parser
2 |
3 | import (
4 | "context"
5 | "go-crawler-distributed/global"
6 | "go-crawler-distributed/internal/crawler/crawerConfig"
7 | "go-crawler-distributed/pkg/mq"
8 | "strconv"
9 | )
10 |
11 | /**
12 | * @Author: super
13 | * @Date: 2020-09-01 16:00
14 | * @Description:
15 | **/
16 |
17 | func ParseArticleList(contents []byte, queueName string, url string) {
18 | err := mq.Publish(queueName, []byte(url))
19 | if err != nil {
20 | global.Logger.Error(context.Background(), err)
21 | }
22 | global.Logger.Infof(context.Background(), "url: %s", url)
23 |
24 | for i := 2; i < 22; i++ {
25 | url := "https://tech.meituan.com//page/" + strconv.Itoa(i) + ".html"
26 | global.Logger.Infof(context.Background(), "url: %s", url)
27 | err = mq.Publish(queueName, []byte(url))
28 | if err != nil {
29 | global.Logger.Error(context.Background(), err)
30 | }
31 | }
32 | err = mq.Publish(queueName, []byte(crawerConfig.StopTAG))
33 | if err != nil {
34 | global.Logger.Error(context.Background(), err)
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/internal/crawler/meituan/parser/articleUrlList.go:
--------------------------------------------------------------------------------
1 | package parser
2 |
3 | import (
4 | "context"
5 | "github.com/PuerkitoBio/goquery"
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/pkg/mq"
8 | "strings"
9 | )
10 |
11 | /**
12 | * @Author: super
13 | * @Date: 2020-09-01 18:59
14 | * @Description:
15 | **/
16 |
17 | func ParseArticleUrlList(contents []byte, queueName string, _ string) {
18 | dom, err := goquery.NewDocumentFromReader(strings.NewReader(string(contents)))
19 | if err != nil {
20 | global.Logger.Error(context.Background(), err)
21 | }
22 |
23 | result := dom.Find("a[rel=bookmark]")
24 | result.Each(func(i int, selection *goquery.Selection) {
25 | href, exist := selection.Attr("href")
26 | if exist {
27 | global.Logger.Infof(context.Background(), "url: %s", href)
28 | //将解析到的图书详细信息URL放到消息队列
29 | err = mq.Publish(queueName, []byte(href))
30 | if err != nil {
31 | global.Logger.Error(context.Background(), err)
32 | }
33 | }
34 | })
35 | }
36 |
--------------------------------------------------------------------------------
/internal/crawler/meituan/storage/articleDetail.go:
--------------------------------------------------------------------------------
1 | package storage
2 |
3 | import (
4 | "context"
5 |
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/internal/model"
8 | "go-crawler-distributed/pkg/util"
9 | "go-crawler-distributed/service/elastic/client"
10 | )
11 |
12 | /**
13 | * @Author: super
14 | * @Date: 2020-09-01 19:29
15 | * @Description:
16 | **/
17 |
18 | func StorageArticle(contents []byte, _ string, _ string) {
19 | article := &model.Article{}
20 | err := article.UnmarshalJSON(contents)
21 | if err != nil {
22 | global.Logger.Error(context.Background(), err)
23 | return
24 | }
25 | article.Content = util.UnzipString(article.Content)
26 |
27 | index := global.ElasticSetting.Index
28 | _, _ = client.IndexExist(index)
29 | _, err = client.SaveInfo(index, article)
30 | if err != nil {
31 | global.Logger.Error(context.Background(), err)
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/internal/crawler/persistence/persistence.go:
--------------------------------------------------------------------------------
1 | package persistence
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-08-16 09:01
6 | * @Description:
7 | **/
8 | type ParseStorage func([]byte) error
9 |
10 | type FuncStorage struct {
11 | Name string
12 | ParseFunc ParseStorage
13 | }
14 |
--------------------------------------------------------------------------------
/internal/crawler/worker/types.go:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-08-16 08:04
6 | * @Description:
7 | **/
8 |
9 | type ParserFunc func(contents []byte, queueName string, url string)
10 |
11 | type Parser interface {
12 | Parse(contents []byte, url string)
13 | }
14 |
15 | type Request struct {
16 | Url string
17 | Parser Parser
18 | }
19 |
20 | type FuncParser struct {
21 | parser ParserFunc
22 | QueueName string
23 | Name string
24 | }
25 |
26 | func (f *FuncParser) Parse(contents []byte, url string) {
27 | f.parser(contents, f.QueueName, url)
28 | }
29 |
30 | func NewFuncParser(p ParserFunc, mqName string, name string) *FuncParser {
31 | return &FuncParser{
32 | parser: p,
33 | QueueName: mqName,
34 | Name: name,
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/internal/crawler/worker/worker.go:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import "go-crawler-distributed/internal/crawler/fetcher"
4 |
5 | /**
6 | * @Author: super
7 | * @Date: 2020-08-16 07:55
8 | * @Description:
9 | **/
10 | func Worker(r Request) {
11 | contents, _ := fetcher.Fetch(r.Url)
12 | r.Parser.Parse(contents, r.Url)
13 | }
14 |
--------------------------------------------------------------------------------
/internal/crontab/common/constants.go:
--------------------------------------------------------------------------------
1 | package common
2 |
3 | import "errors"
4 |
5 | /**
6 | * @Author: super
7 | * @Date: 2021-02-06 19:56
8 | * @Description:
9 | **/
10 |
11 | var ERR_LOCK_ALREDAY_REQUIRED = errors.New("锁被占用")
12 | var ERR_NO_LOCAL_IP_FOUND = errors.New("没有找到网卡IP")
13 |
14 | const (
15 | // 任务保存目录
16 | JOB_SAVE_DIR = "/cron/jobs/"
17 |
18 | // 任务强杀目录
19 | JOB_KILLER_DIR = "/cron/killer/"
20 |
21 | // 任务锁目录
22 | JOB_LOCK_DIR = "/cron/lock/"
23 |
24 | // 服务注册目录
25 | JOB_WORKER_DIR = "/cron/workers/"
26 |
27 | // 保存任务事件
28 | JOB_EVENT_SAVE = 1
29 |
30 | // 删除任务事件
31 | JOB_EVENT_DELETE = 2
32 |
33 | // 强杀任务事件
34 | JOB_EVENT_KILL = 3
35 | )
36 |
--------------------------------------------------------------------------------
/internal/crontab/common/job.go:
--------------------------------------------------------------------------------
1 | package common
2 |
3 | import (
4 | "context"
5 | "github.com/robfig/cron/v3"
6 | "strings"
7 | "time"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2021-02-06 18:44
13 | * @Description:
14 | **/
15 |
16 | type Job struct {
17 | Name string `json:"name"`
18 | Command string `json:"command"`
19 | CronExpr string `json:"cron_expr"`
20 | }
21 |
22 | func (job *Job) Run() {
23 | }
24 |
25 | // 任务调度计划
26 | type JobSchedulePlan struct {
27 | Job *Job // 要调度的任务信息
28 | Expr string // cron_expr表达式
29 | Schedule cron.Schedule // cron_expr表达式
30 | NextTime time.Time // 下次调度时间
31 | }
32 |
33 | type JobExecuteInfo struct {
34 | Job *Job
35 | PlanTime time.Time // 理论上的调度时间
36 | RealTime time.Time // 实际的调度时间
37 | CancelCtx context.Context // 任务command的context
38 | CancelFunc context.CancelFunc // 用于取消command执行的cancel函数
39 | }
40 |
41 | // 变化事件
42 | type JobEvent struct {
43 | EventType int // SAVE, DELETE
44 | Job *Job
45 | }
46 |
47 | // 任务执行结果
48 | type JobExecuteResult struct {
49 | ExecuteInfo *JobExecuteInfo // 执行状态
50 | Output []byte // 脚本输出
51 | Err error // 脚本错误原因
52 | StartTime time.Time // 启动时间
53 | EndTime time.Time // 结束时间
54 | }
55 |
56 | // 从etcd的key中提取任务名
57 | // /cron/jobs/job10抹掉/cron/jobs/
58 | func ExtractJobName(jobKey string) string {
59 | return strings.TrimPrefix(jobKey, JOB_SAVE_DIR)
60 | }
61 |
62 | // 从 /cron/killer/job10提取job10
63 | func ExtractKillerName(killerKey string) string {
64 | return strings.TrimPrefix(killerKey, JOB_KILLER_DIR)
65 | }
66 |
67 | // 任务变化事件有2种:1)更新任务 2)删除任务
68 | func BuildJobEvent(eventType int, job *Job) (jobEvent *JobEvent) {
69 | return &JobEvent{
70 | EventType: eventType,
71 | Job: job,
72 | }
73 | }
74 |
75 | // 构造任务执行计划
76 | func BuildJobSchedulePlan(job *Job) (jobSchedulePlan *JobSchedulePlan, err error) {
77 | var (
78 | schedule cron.Schedule
79 | )
80 |
81 | // 解析JOB的cron表达式
82 | if schedule, err = cron.ParseStandard(job.CronExpr); err != nil {
83 | return
84 | }
85 |
86 | // 生成任务调度计划对象
87 | jobSchedulePlan = &JobSchedulePlan{
88 | Job: job,
89 | Expr: job.CronExpr,
90 | Schedule: schedule,
91 | NextTime: schedule.Next(time.Now()),
92 | }
93 | return
94 | }
95 |
96 | // 构造执行状态信息
97 | func BuildJobExecuteInfo(jobSchedulePlan *JobSchedulePlan) (jobExecuteInfo *JobExecuteInfo) {
98 | jobExecuteInfo = &JobExecuteInfo{
99 | Job: jobSchedulePlan.Job,
100 | PlanTime: jobSchedulePlan.NextTime, // 计算调度时间
101 | RealTime: time.Now(), // 真实调度时间
102 | }
103 | jobExecuteInfo.CancelCtx, jobExecuteInfo.CancelFunc = context.WithCancel(context.TODO())
104 | return
105 | }
106 |
107 | // 提取worker的IP
108 | func ExtractWorkerIP(regKey string) string {
109 | return strings.TrimPrefix(regKey, JOB_WORKER_DIR)
110 | }
111 |
--------------------------------------------------------------------------------
/internal/crontab/common/job_easyjson.go:
--------------------------------------------------------------------------------
1 | // Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT.
2 |
3 | package common
4 |
5 | import (
6 | json "encoding/json"
7 | easyjson "github.com/mailru/easyjson"
8 | jlexer "github.com/mailru/easyjson/jlexer"
9 | jwriter "github.com/mailru/easyjson/jwriter"
10 | )
11 |
12 | // suppress unused package warning
13 | var (
14 | _ *json.RawMessage
15 | _ *jlexer.Lexer
16 | _ *jwriter.Writer
17 | _ easyjson.Marshaler
18 | )
19 |
20 | func easyjson8a33d6c7DecodeGoCrawlerDistributedInternalCrontabCommon(in *jlexer.Lexer, out *Job) {
21 | isTopLevel := in.IsStart()
22 | if in.IsNull() {
23 | if isTopLevel {
24 | in.Consumed()
25 | }
26 | in.Skip()
27 | return
28 | }
29 | in.Delim('{')
30 | for !in.IsDelim('}') {
31 | key := in.UnsafeFieldName(false)
32 | in.WantColon()
33 | if in.IsNull() {
34 | in.Skip()
35 | in.WantComma()
36 | continue
37 | }
38 | switch key {
39 | case "name":
40 | out.Name = string(in.String())
41 | case "command":
42 | out.Command = string(in.String())
43 | case "cron_expr":
44 | out.CronExpr = string(in.String())
45 | default:
46 | in.SkipRecursive()
47 | }
48 | in.WantComma()
49 | }
50 | in.Delim('}')
51 | if isTopLevel {
52 | in.Consumed()
53 | }
54 | }
55 | func easyjson8a33d6c7EncodeGoCrawlerDistributedInternalCrontabCommon(out *jwriter.Writer, in Job) {
56 | out.RawByte('{')
57 | first := true
58 | _ = first
59 | {
60 | const prefix string = ",\"name\":"
61 | out.RawString(prefix[1:])
62 | out.String(string(in.Name))
63 | }
64 | {
65 | const prefix string = ",\"command\":"
66 | out.RawString(prefix)
67 | out.String(string(in.Command))
68 | }
69 | {
70 | const prefix string = ",\"cron_expr\":"
71 | out.RawString(prefix)
72 | out.String(string(in.CronExpr))
73 | }
74 | out.RawByte('}')
75 | }
76 |
77 | // MarshalJSON supports json.Marshaler interface
78 | func (v Job) MarshalJSON() ([]byte, error) {
79 | w := jwriter.Writer{}
80 | easyjson8a33d6c7EncodeGoCrawlerDistributedInternalCrontabCommon(&w, v)
81 | return w.Buffer.BuildBytes(), w.Error
82 | }
83 |
84 | // MarshalEasyJSON supports easyjson.Marshaler interface
85 | func (v Job) MarshalEasyJSON(w *jwriter.Writer) {
86 | easyjson8a33d6c7EncodeGoCrawlerDistributedInternalCrontabCommon(w, v)
87 | }
88 |
89 | // UnmarshalJSON supports json.Unmarshaler interface
90 | func (v *Job) UnmarshalJSON(data []byte) error {
91 | r := jlexer.Lexer{Data: data}
92 | easyjson8a33d6c7DecodeGoCrawlerDistributedInternalCrontabCommon(&r, v)
93 | return r.Error()
94 | }
95 |
96 | // UnmarshalEasyJSON supports easyjson.Unmarshaler interface
97 | func (v *Job) UnmarshalEasyJSON(l *jlexer.Lexer) {
98 | easyjson8a33d6c7DecodeGoCrawlerDistributedInternalCrontabCommon(l, v)
99 | }
100 |
--------------------------------------------------------------------------------
/internal/crontab/common/log.go:
--------------------------------------------------------------------------------
1 | package common
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2021-02-07 18:39
6 | * @Description:
7 | **/
8 |
9 | // 任务执行日志
10 | type JobLog struct {
11 | JobName string `json:"jobName" bson:"jobName"` // 任务名字
12 | Command string `json:"command" bson:"command"` // 脚本命令
13 | Err string `json:"err" bson:"err"` // 错误原因
14 | Output string `json:"output" bson:"output"` // 脚本输出
15 | PlanTime int64 `json:"planTime" bson:"planTime"` // 计划开始时间
16 | ScheduleTime int64 `json:"scheduleTime" bson:"scheduleTime"` // 实际调度时间
17 | StartTime int64 `json:"startTime" bson:"startTime"` // 任务执行开始时间
18 | EndTime int64 `json:"endTime" bson:"endTime"` // 任务执行结束时间
19 | }
20 |
21 | // 日志批次,防止每条日志都单次插入数据库中
22 | type LogBatch struct {
23 | Logs []interface{} // 多条日志
24 | }
25 |
26 | // 任务日志过滤条件
27 | type JobLogFilter struct {
28 | JobName string `bson:"jobName"`
29 | }
30 |
31 | // 任务日志排序规则
32 | type SortLogByStartTime struct {
33 | SortOrder int `bson:"startTime"` // {startTime: -1}
34 | }
35 |
--------------------------------------------------------------------------------
/internal/crontab/master/etcd.go:
--------------------------------------------------------------------------------
1 | package master
2 |
3 | import (
4 | "context"
5 | "github.com/coreos/etcd/clientv3"
6 | "github.com/coreos/etcd/mvcc/mvccpb"
7 | "go-crawler-distributed/global"
8 | "go-crawler-distributed/internal/crontab/common"
9 | )
10 |
11 | /**
12 | * @Author: super
13 | * @Date: 2021-02-06 19:25
14 | * @Description:
15 | **/
16 |
17 | func EtcdSaveJob(ctx context.Context, job *common.Job) (oldJob *common.Job, err error) {
18 | jobKey := common.JOB_SAVE_DIR + job.Name
19 | jobValue, err := job.MarshalJSON()
20 | if err != nil {
21 | return
22 | }
23 | putResp, err := global.EtcdKV.Put(ctx, jobKey, string(jobValue), clientv3.WithPrevKV())
24 | if err != nil {
25 | return
26 | }
27 | if putResp.PrevKv != nil {
28 | oldJobObj := &common.Job{}
29 | _ = oldJobObj.UnmarshalJSON(putResp.PrevKv.Value)
30 | oldJob = oldJobObj
31 | }
32 | return
33 | }
34 |
35 | func EtcdDeleteJob(ctx context.Context, name string) (oldJob *common.Job, err error) {
36 | jobKey := common.JOB_SAVE_DIR + name
37 |
38 | delResp, err := global.EtcdKV.Delete(ctx, jobKey, clientv3.WithPrevKV())
39 | if err != nil {
40 | return
41 | }
42 | if len(delResp.PrevKvs) != 0 {
43 | oldJobObj := &common.Job{}
44 | _ = oldJobObj.UnmarshalJSON(delResp.PrevKvs[0].Value)
45 | oldJob = oldJobObj
46 | }
47 | return
48 | }
49 |
50 | func EtcdListJobs(ctx context.Context) (jobList []*common.Job, err error) {
51 | dirKey := common.JOB_SAVE_DIR
52 |
53 | getResp, err := global.EtcdKV.Get(ctx, dirKey, clientv3.WithPrefix())
54 | if err != nil {
55 | return
56 | }
57 | jobList = make([]*common.Job, len(getResp.Kvs))
58 | for i := 0; i < len(getResp.Kvs); i++ {
59 | job := &common.Job{}
60 | _ = job.UnmarshalJSON(getResp.Kvs[i].Value)
61 | jobList[i] = job
62 | }
63 | return
64 | }
65 |
66 | func EtcdKillJob(ctx context.Context, name string) (err error) {
67 | killerKey := common.JOB_KILLER_DIR + name
68 |
69 | leaseResp, err := global.EtcdLease.Grant(ctx, 1)
70 | if err != nil {
71 | return
72 | }
73 | leaseId := leaseResp.ID
74 | _, err = global.EtcdKV.Put(ctx, killerKey, "", clientv3.WithLease(leaseId))
75 | return
76 | }
77 |
78 | func ListWorkers()(workerArr []string, err error){
79 | var (
80 | getResp *clientv3.GetResponse
81 | kv *mvccpb.KeyValue
82 | workerIP string
83 | )
84 |
85 | // 初始化数组
86 | workerArr = make([]string, 0)
87 |
88 | // 获取目录下所有Kv
89 | if getResp, err = global.EtcdKV.Get(context.TODO(), common.JOB_WORKER_DIR, clientv3.WithPrefix()); err != nil {
90 | return
91 | }
92 |
93 | // 解析每个节点的IP
94 | for _, kv = range getResp.Kvs {
95 | // kv.Key : /cron/workers/192.168.2.1
96 | workerIP = common.ExtractWorkerIP(string(kv.Key))
97 | workerArr = append(workerArr, workerIP)
98 | }
99 | return
100 | }
101 |
--------------------------------------------------------------------------------
/internal/crontab/worker/etcd.go:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import (
4 | "context"
5 | "github.com/coreos/etcd/clientv3"
6 | "github.com/coreos/etcd/mvcc/mvccpb"
7 | "github.com/go-acme/lego/v3/log"
8 | "go-crawler-distributed/global"
9 | "go-crawler-distributed/internal/crontab/common"
10 | "net"
11 | "time"
12 | )
13 |
14 | /**
15 | * @Author: super
16 | * @Date: 2021-02-07 17:14
17 | * @Description:
18 | **/
19 |
20 | func WatchJobs(ctx context.Context) (err error) {
21 | getResp, err := global.EtcdKV.Get(ctx, common.JOB_SAVE_DIR, clientv3.WithPrefix())
22 | if err != nil {
23 | return err
24 | }
25 | for i := 0; i < len(getResp.Kvs); i++ {
26 | job := &common.Job{}
27 | err := job.UnmarshalJSON(getResp.Kvs[i].Value)
28 | if err == nil {
29 | jobEvent := common.BuildJobEvent(common.JOB_EVENT_SAVE, job)
30 | //添加到任务调度器
31 | GlobalScheduler.PushJobEvent(jobEvent)
32 | }
33 | }
34 |
35 | revision := getResp.Header.Revision
36 |
37 | go func(watchStartRevision int64) {
38 | watchChan := global.EtcdWatcher.Watch(ctx, common.JOB_SAVE_DIR,
39 | clientv3.WithRev(watchStartRevision),
40 | clientv3.WithPrefix())
41 | for watchResp := range watchChan {
42 | for _, watchEvent := range watchResp.Events {
43 | var jobEvent *common.JobEvent
44 | switch watchEvent.Type {
45 | case mvccpb.PUT:
46 | job := &common.Job{}
47 | err := job.UnmarshalJSON(watchEvent.Kv.Value)
48 | if err != nil {
49 | continue
50 | }
51 | jobEvent = common.BuildJobEvent(common.JOB_EVENT_SAVE, job)
52 | case mvccpb.DELETE:
53 | jobName := common.ExtractJobName(string(watchEvent.Kv.Key))
54 | job := &common.Job{
55 | Name: jobName,
56 | }
57 | jobEvent = common.BuildJobEvent(common.JOB_EVENT_DELETE, job)
58 | }
59 | //将变化情况推送给调度器
60 | GlobalScheduler.PushJobEvent(jobEvent)
61 | }
62 | }
63 | }(revision + 1)
64 | return
65 | }
66 |
67 | func WatchKiller(ctx context.Context) {
68 | go func() {
69 | // 监听/cron/killer/目录的变化
70 | watchChan := global.EtcdWatcher.Watch(ctx, common.JOB_KILLER_DIR, clientv3.WithPrefix())
71 | // 处理监听事件
72 | for watchResp := range watchChan {
73 | for _, watchEvent := range watchResp.Events {
74 | switch watchEvent.Type {
75 | case mvccpb.PUT: // 杀死任务事件
76 | jobName := common.ExtractKillerName(string(watchEvent.Kv.Key))
77 | job := &common.Job{Name: jobName}
78 | jobEvent := common.BuildJobEvent(common.JOB_EVENT_KILL, job)
79 | // 事件推给scheduler
80 | GlobalScheduler.PushJobEvent(jobEvent)
81 | case mvccpb.DELETE: // killer标记过期, 被自动删除
82 | }
83 | }
84 | }
85 | }()
86 | }
87 |
88 | // 获取本机网卡IP
89 | func getLocalIP() (ipv4 string, err error) {
90 | var (
91 | addrs []net.Addr
92 | addr net.Addr
93 | ipNet *net.IPNet // IP地址
94 | isIpNet bool
95 | )
96 | // 获取所有网卡
97 | if addrs, err = net.InterfaceAddrs(); err != nil {
98 | return
99 | }
100 | // 取第一个非lo的网卡IP
101 | for _, addr = range addrs {
102 | // 这个网络地址是IP地址: ipv4, ipv6
103 | if ipNet, isIpNet = addr.(*net.IPNet); isIpNet && !ipNet.IP.IsLoopback() {
104 | // 跳过IPV6
105 | if ipNet.IP.To4() != nil {
106 | ipv4 = ipNet.IP.String() // 192.168.1.1
107 | return
108 | }
109 | }
110 | }
111 | err = common.ERR_NO_LOCAL_IP_FOUND
112 | return
113 | }
114 |
115 | func KeepOnline(){
116 | var (
117 | ip string
118 | regKey string
119 | leaseGrantResp *clientv3.LeaseGrantResponse
120 | err error
121 | keepAliveChan <- chan *clientv3.LeaseKeepAliveResponse
122 | keepAliveResp *clientv3.LeaseKeepAliveResponse
123 | cancelCtx context.Context
124 | cancelFunc context.CancelFunc
125 | )
126 | ip, err = getLocalIP()
127 | if err != nil{
128 | log.Println("ip获取失败", err)
129 | return
130 | }
131 | for {
132 | // 注册路径
133 | regKey = common.JOB_WORKER_DIR + ip
134 |
135 | cancelFunc = nil
136 |
137 | // 创建租约
138 | if leaseGrantResp, err = global.EtcdLease.Grant(context.TODO(), 10); err != nil {
139 | goto RETRY
140 | }
141 |
142 | // 自动续租
143 | if keepAliveChan, err = global.EtcdLease.KeepAlive(context.TODO(), leaseGrantResp.ID); err != nil {
144 | goto RETRY
145 | }
146 |
147 | cancelCtx, cancelFunc = context.WithCancel(context.TODO())
148 |
149 | // 注册到etcd
150 | if _, err = global.EtcdKV.Put(cancelCtx, regKey, "", clientv3.WithLease(leaseGrantResp.ID)); err != nil {
151 | goto RETRY
152 | }
153 |
154 | // 处理续租应答
155 | for {
156 | select {
157 | case keepAliveResp = <- keepAliveChan:
158 | if keepAliveResp == nil { // 续租失败
159 | goto RETRY
160 | }
161 | }
162 | }
163 |
164 | RETRY:
165 | time.Sleep(1 * time.Second)
166 | if cancelFunc != nil {
167 | cancelFunc()
168 | }
169 | }
170 | }
171 |
172 | func CreateJobLocker(jobName string) (jobLocker *JobLocker) {
173 | jobLocker = NewJobLocker(jobName)
174 | return
175 | }
176 |
--------------------------------------------------------------------------------
/internal/crontab/worker/executor.go:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import (
4 | "go-crawler-distributed/internal/crontab/common"
5 | "math/rand"
6 | "os/exec"
7 | "time"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2021-02-09 10:45
13 | * @Description:
14 | **/
15 |
16 | type Executor struct {
17 | }
18 |
19 | var (
20 | GlobalExecutor *Executor
21 | )
22 |
23 | func (e *Executor) ExecuteJob(info *common.JobExecuteInfo) {
24 | go func() {
25 | var (
26 | cmd *exec.Cmd
27 | err error
28 | output []byte
29 | result *common.JobExecuteResult
30 | jobLocker *JobLocker
31 | )
32 | // 任务结果
33 | result = &common.JobExecuteResult{
34 | ExecuteInfo: info,
35 | Output: make([]byte, 0),
36 | }
37 |
38 | //初始化分布式锁
39 | jobLocker = CreateJobLocker(info.Job.Name)
40 |
41 | result.StartTime = time.Now()
42 |
43 | // 随机睡眠(0~1s),防止单个节点总是抢占任务
44 | time.Sleep(time.Duration(rand.Intn(1000)) * time.Millisecond)
45 | err = jobLocker.TryLock()
46 | defer jobLocker.Unlock()
47 |
48 | if err != nil {
49 | result.Err = err
50 | result.EndTime = time.Now()
51 | } else {
52 | result.StartTime = time.Now()
53 | // 执行shell命令
54 | cmd = exec.CommandContext(info.CancelCtx, "/bin/bash", "-c", info.Job.Command)
55 |
56 | // 执行并捕获输出
57 | output, err = cmd.CombinedOutput()
58 |
59 | // 记录任务结束时间
60 | result.EndTime = time.Now()
61 | result.Output = output
62 | result.Err = err
63 | }
64 |
65 | GlobalScheduler.PushJobResult(result)
66 | }()
67 | }
68 |
69 | func NewExecutor() (err error) {
70 | GlobalExecutor = &Executor{}
71 | return
72 | }
73 |
--------------------------------------------------------------------------------
/internal/crontab/worker/jobLock.go:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import (
4 | "context"
5 | "github.com/coreos/etcd/clientv3"
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/internal/crontab/common"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2021-02-07 18:53
13 | * @Description:
14 | **/
15 |
16 | type JobLocker struct {
17 | JobName string
18 | CancelFunc context.CancelFunc
19 | LeaseId clientv3.LeaseID
20 | IsLocked bool
21 | }
22 |
23 | func (jobLocker *JobLocker) TryLock() (err error) {
24 | var (
25 | leaseGrantResp *clientv3.LeaseGrantResponse
26 | cancelCtx context.Context
27 | cancelFunc context.CancelFunc
28 | leaseId clientv3.LeaseID
29 | keepRespChan <-chan *clientv3.LeaseKeepAliveResponse
30 | txn clientv3.Txn
31 | lockKey string
32 | txnResp *clientv3.TxnResponse
33 | )
34 | if leaseGrantResp, err = global.EtcdLease.Grant(context.TODO(), 5); err != nil {
35 | return
36 | }
37 | cancelCtx, cancelFunc = context.WithCancel(context.TODO())
38 | leaseId = leaseGrantResp.ID
39 |
40 | if keepRespChan, err = global.EtcdLease.KeepAlive(cancelCtx, leaseId); err != nil {
41 | cancelFunc()
42 | global.EtcdLease.Revoke(context.TODO(), leaseId)
43 | return
44 | }
45 |
46 | go func() {
47 | var (
48 | keepResp *clientv3.LeaseKeepAliveResponse
49 | )
50 | for {
51 | select {
52 | case keepResp = <-keepRespChan: // 自动续租的应答
53 | if keepResp == nil {
54 | return
55 | }
56 | }
57 | }
58 | }()
59 |
60 | txn = global.EtcdKV.Txn(context.TODO())
61 | lockKey = common.JOB_LOCK_DIR + jobLocker.JobName
62 |
63 | txn.If(clientv3.Compare(clientv3.CreateRevision(lockKey), "=", 0)).
64 | Then(clientv3.OpPut(lockKey, "", clientv3.WithLease(leaseId))).
65 | Else(clientv3.OpGet(lockKey))
66 |
67 | if txnResp, err = txn.Commit(); err != nil {
68 | cancelFunc()
69 | global.EtcdLease.Revoke(context.TODO(), leaseId)
70 | return
71 | }
72 |
73 | if !txnResp.Succeeded {
74 | err = common.ERR_LOCK_ALREDAY_REQUIRED
75 | cancelFunc()
76 | global.EtcdLease.Revoke(context.TODO(), leaseId)
77 | return
78 | }
79 | // 抢锁成功
80 | jobLocker.LeaseId = leaseId
81 | jobLocker.CancelFunc = cancelFunc
82 | jobLocker.IsLocked = true
83 | return
84 | }
85 |
86 | func (jobLocker *JobLocker) Unlock() {
87 | if jobLocker.IsLocked {
88 | jobLocker.CancelFunc() // 取消我们程序自动续租的协程
89 | global.EtcdLease.Revoke(context.TODO(), jobLocker.LeaseId) // 释放租约
90 | }
91 | }
92 |
93 | func NewJobLocker(jobName string) *JobLocker {
94 | return &JobLocker{
95 | JobName: jobName,
96 | }
97 | }
98 |
--------------------------------------------------------------------------------
/internal/crontab/worker/logSink.go:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import (
4 | "context"
5 | "go-crawler-distributed/global"
6 | "go-crawler-distributed/internal/crontab/common"
7 | "go.mongodb.org/mongo-driver/mongo"
8 | "log"
9 | "time"
10 | )
11 |
12 | /**
13 | * @Author: super
14 | * @Date: 2021-02-09 14:06
15 | * @Description:
16 | **/
17 |
18 | type LogSink struct {
19 | LogCollection *mongo.Collection
20 | LogChan chan *common.JobLog
21 | AutoCommitChan chan *common.LogBatch
22 | }
23 |
24 | var (
25 | GlobalLogSink *LogSink
26 | )
27 |
28 | func (l *LogSink) SaveLogs(batch *common.LogBatch) {
29 | _, err := l.LogCollection.InsertMany(context.TODO(), batch.Logs)
30 | if err != nil {
31 | log.Println("saveLogs", err)
32 | }
33 | log.Println("saveLogs")
34 | }
35 |
36 | func (l *LogSink) writeLoop() {
37 | var (
38 | jobLog *common.JobLog
39 | logBatch *common.LogBatch // 当前的批次
40 | commitTimer *time.Timer
41 | timeoutBatch *common.LogBatch // 超时批次
42 | )
43 | for {
44 | select {
45 | case jobLog = <-l.LogChan:
46 | if logBatch == nil {
47 | logBatch = &common.LogBatch{}
48 | // 让这个批次超时自动提交(给1秒的时间)
49 | commitTimer = time.AfterFunc(
50 | time.Duration(1000)*time.Millisecond,
51 | func(batch *common.LogBatch) func() {
52 | return func() {
53 | l.AutoCommitChan <- batch
54 | }
55 | }(logBatch),
56 | )
57 | }
58 |
59 | // 把新日志追加到批次中
60 | logBatch.Logs = append(logBatch.Logs, jobLog)
61 |
62 | // 如果批次满了, 就立即发送
63 | if len(logBatch.Logs) >= 100 {
64 | // 发送日志
65 | l.SaveLogs(logBatch)
66 | // 清空logBatch
67 | logBatch = nil
68 | // 取消定时器
69 | commitTimer.Stop()
70 | }
71 | case timeoutBatch = <-l.AutoCommitChan: // 过期的批次
72 | // 判断过期批次是否仍旧是当前的批次
73 | if timeoutBatch != logBatch {
74 | continue // 跳过已经被提交的批次
75 | }
76 | // 把批次写入到mongo中
77 | l.SaveLogs(timeoutBatch)
78 | // 清空logBatch
79 | logBatch = nil
80 | }
81 | }
82 | }
83 |
84 | // 发送日志
85 | func (l *LogSink) Append(jobLog *common.JobLog) {
86 | select {
87 | case l.LogChan <- jobLog:
88 | default:
89 | // 队列满了就丢弃
90 | }
91 | }
92 |
93 | func NewLogSink() (err error) {
94 | GlobalLogSink = &LogSink{
95 | LogCollection: global.MongoDBEngine.Database("cron").Collection("log"),
96 | LogChan: make(chan *common.JobLog, 1000),
97 | AutoCommitChan: make(chan *common.LogBatch, 1000),
98 | }
99 | go GlobalLogSink.writeLoop()
100 | return
101 | }
102 |
--------------------------------------------------------------------------------
/internal/crontab/worker/main/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "go-crawler-distributed/initConf"
6 | "go-crawler-distributed/internal/crontab/worker"
7 | "log"
8 | "time"
9 | )
10 |
11 | /**
12 | * @Author: super
13 | * @Date: 2021-02-08 23:04
14 | * @Description:
15 | **/
16 |
17 | func main() {
18 | initConf.Init("/Users/super/develop/go-crawler-distributed/configs/")
19 | if err := worker.NewScheduler(); err != nil {
20 | log.Printf("init NewScheduler err: %v\n", err)
21 | return
22 | }
23 | if err := worker.NewExecutor(); err != nil {
24 | log.Printf("init NewExecutor err: %v\n", err)
25 | return
26 | }
27 | if err := worker.NewLogSink(); err != nil {
28 | log.Printf("init NewLogSink err: %v\n", err)
29 | return
30 | }
31 | if err := worker.WatchJobs(context.Background()); err != nil {
32 | log.Printf("init WatchJobs err: %v\n", err)
33 | return
34 | }
35 | worker.WatchKiller(context.Background())
36 | go worker.KeepOnline()
37 |
38 | // 正常退出
39 | for {
40 | time.Sleep(1 * time.Second)
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/internal/crontab/worker/scheduler.go:
--------------------------------------------------------------------------------
1 | package worker
2 |
3 | import (
4 | "fmt"
5 | "go-crawler-distributed/internal/crontab/common"
6 | "time"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2021-02-08 19:47
12 | * @Description:
13 | **/
14 |
15 | type Scheduler struct {
16 | JobEventChan chan *common.JobEvent
17 | JobPlanTable map[string]*common.JobSchedulePlan //任务调度计划表
18 | JobExecutingTable map[string]*common.JobExecuteInfo
19 | JobResultChan chan *common.JobExecuteResult // 任务结果队列
20 | }
21 |
22 | var (
23 | GlobalScheduler *Scheduler
24 | )
25 |
26 | //处理任务事件
27 | func (s *Scheduler) handleJobEvent(jobEvent *common.JobEvent) {
28 | var (
29 | jobSchedulePlan *common.JobSchedulePlan
30 | jobExcuteInfo *common.JobExecuteInfo
31 | jobExcuting bool
32 | jobExisted bool
33 | err error
34 | )
35 | switch jobEvent.EventType {
36 | //保存任务事件
37 | case common.JOB_EVENT_SAVE:
38 | if jobSchedulePlan, err = common.BuildJobSchedulePlan(jobEvent.Job); err != nil {
39 | return
40 | }
41 | s.JobPlanTable[jobEvent.Job.Name] = jobSchedulePlan
42 | //删除任务事件
43 | case common.JOB_EVENT_DELETE:
44 | if jobSchedulePlan, jobExisted = s.JobPlanTable[jobEvent.Job.Name]; jobExisted {
45 | delete(s.JobPlanTable, jobEvent.Job.Name)
46 | }
47 | case common.JOB_EVENT_KILL:
48 | //通过context取消任务
49 | if jobExcuteInfo, jobExcuting = s.JobExecutingTable[jobEvent.Job.Name]; jobExcuting {
50 | jobExcuteInfo.CancelFunc()
51 | }
52 | }
53 | }
54 |
55 | // 任务虽然被调度了,但是可能因为一些原因执行很久,加入1s执行一次的任务,单次任务执行了1分钟
56 | // 当前任务就会被调度60次却只执行1次
57 | func (s *Scheduler) TryStartJob(jobPlan *common.JobSchedulePlan) {
58 | var (
59 | jobExcuteInfo *common.JobExecuteInfo
60 | jobExcuting bool
61 | )
62 | if jobExcuteInfo, jobExcuting = s.JobExecutingTable[jobPlan.Job.Name]; jobExcuting {
63 | return
64 | }
65 | jobExcuteInfo = common.BuildJobExecuteInfo(jobPlan)
66 | s.JobExecutingTable[jobPlan.Job.Name] = jobExcuteInfo
67 | fmt.Println("执行任务", jobExcuteInfo.Job.Name, jobExcuteInfo.PlanTime, jobExcuteInfo.RealTime)
68 | GlobalExecutor.ExecuteJob(jobExcuteInfo)
69 | }
70 |
71 | func (s *Scheduler) TrySchedule() (scheduleAfter time.Duration) {
72 | var (
73 | jobPlan *common.JobSchedulePlan
74 | now time.Time
75 | nearTime *time.Time
76 | )
77 |
78 | if len(s.JobPlanTable) == 0 {
79 | scheduleAfter = 1 * time.Second
80 | return
81 | }
82 |
83 | now = time.Now()
84 | for _, jobPlan = range s.JobPlanTable {
85 | if jobPlan.NextTime.Before(now) || jobPlan.NextTime.Equal(now) {
86 | s.TryStartJob(jobPlan)
87 | jobPlan.NextTime = jobPlan.Schedule.Next(now)
88 | }
89 |
90 | if nearTime == nil || jobPlan.NextTime.Before(*nearTime) {
91 | nearTime = &jobPlan.NextTime
92 | }
93 | }
94 | scheduleAfter = (*nearTime).Sub(now)
95 | return
96 | }
97 |
98 | func (s *Scheduler) handleJobResult(result *common.JobExecuteResult) {
99 | delete(s.JobExecutingTable, result.ExecuteInfo.Job.Name)
100 |
101 | //生成执行日志
102 | if result.Err != common.ERR_LOCK_ALREDAY_REQUIRED {
103 | jobLog := &common.JobLog{
104 | JobName: result.ExecuteInfo.Job.Name,
105 | Command: result.ExecuteInfo.Job.Command,
106 | Output: string(result.Output),
107 | PlanTime: result.ExecuteInfo.PlanTime.UnixNano() / 1000 / 1000,
108 | ScheduleTime: result.ExecuteInfo.RealTime.UnixNano() / 1000 / 1000,
109 | StartTime: result.StartTime.UnixNano() / 1000 / 1000,
110 | EndTime: result.EndTime.UnixNano() / 1000 / 1000,
111 | }
112 | if result.Err != nil {
113 | jobLog.Err = result.Err.Error()
114 | } else {
115 | jobLog.Err = ""
116 | }
117 | GlobalLogSink.Append(jobLog)
118 | }
119 | }
120 |
121 | func (s *Scheduler) schedulerLoop() {
122 | var (
123 | jobEvent *common.JobEvent
124 | scheduleAfter time.Duration
125 | scheduleTimer *time.Timer
126 | jobResult *common.JobExecuteResult
127 | )
128 |
129 | scheduleAfter = s.TrySchedule()
130 | scheduleTimer = time.NewTimer(scheduleAfter)
131 |
132 | for {
133 | select {
134 | //监听任务变化
135 | case jobEvent = <-s.JobEventChan:
136 | //对内存中的任务进行增删改查
137 | s.handleJobEvent(jobEvent)
138 | case <-scheduleTimer.C:
139 | case jobResult = <-s.JobResultChan: //监听任务执行结果
140 | s.handleJobResult(jobResult)
141 | }
142 | scheduleAfter = s.TrySchedule()
143 | scheduleTimer.Reset(scheduleAfter)
144 | }
145 | }
146 |
147 | func (s *Scheduler) PushJobEvent(jobEvent *common.JobEvent) {
148 | s.JobEventChan <- jobEvent
149 | }
150 |
151 | func (s *Scheduler) PushJobResult(jobResult *common.JobExecuteResult) {
152 | s.JobResultChan <- jobResult
153 | }
154 |
155 | func NewScheduler() (err error) {
156 | GlobalScheduler = &Scheduler{
157 | JobEventChan: make(chan *common.JobEvent, 10000),
158 | JobPlanTable: make(map[string]*common.JobSchedulePlan),
159 | JobExecutingTable: make(map[string]*common.JobExecuteInfo),
160 | JobResultChan: make(chan *common.JobExecuteResult, 1000),
161 | }
162 | go GlobalScheduler.schedulerLoop()
163 | return
164 | }
165 |
--------------------------------------------------------------------------------
/internal/dao/article.go:
--------------------------------------------------------------------------------
1 | package dao
2 |
3 | import (
4 | "github.com/jinzhu/gorm"
5 | "go-crawler-distributed/internal/model"
6 | )
7 |
8 | /**
9 | * @Author: super
10 | * @Date: 2021-01-05 15:55
11 | * @Description:
12 | **/
13 |
14 | type IArticle interface {
15 | SaveArticle(article model.Article) (string, error)
16 | }
17 |
18 | type ArticleManager struct {
19 | table string
20 | conn *gorm.DB
21 | }
22 |
23 | func NewArticleManager(table string, conn *gorm.DB) IArticle {
24 | return &ArticleManager{table: table, conn: conn}
25 | }
26 |
27 | func (m *ArticleManager) SaveArticle(article model.Article) (string, error) {
28 | return "", nil
29 | }
30 |
--------------------------------------------------------------------------------
/internal/dao/book.go:
--------------------------------------------------------------------------------
1 | package dao
2 |
3 | import (
4 | "github.com/jinzhu/gorm"
5 | "go-crawler-distributed/internal/model"
6 | )
7 |
8 | /**
9 | * @Author: super
10 | * @Date: 2021-01-05 18:55
11 | * @Description:
12 | **/
13 |
14 | type IBook interface {
15 | SaveBook(book model.Book) (string, error)
16 | }
17 |
18 | type BookManager struct {
19 | table string
20 | conn *gorm.DB
21 | }
22 |
23 | func NewBookManager(table string, conn *gorm.DB) IBook {
24 | return &BookManager{table: table, conn: conn}
25 | }
26 |
27 | func (m *BookManager) SaveBook(book model.Book) (string, error) {
28 | return "", nil
29 | }
30 |
--------------------------------------------------------------------------------
/internal/dao/dao.go:
--------------------------------------------------------------------------------
1 | package dao
2 |
3 | import "github.com/jinzhu/gorm"
4 |
5 | /**
6 | * @Author: super
7 | * @Date: 2020-09-22 09:35
8 | * @Description: 用于统一配置DB引擎
9 | **/
10 |
11 | type Dao struct {
12 | engine *gorm.DB
13 | }
14 |
15 | func New(engine *gorm.DB) *Dao {
16 | return &Dao{engine: engine}
17 | }
18 |
--------------------------------------------------------------------------------
/internal/dao/forbes.go:
--------------------------------------------------------------------------------
1 | package dao
2 |
3 | import (
4 | "errors"
5 | "github.com/jinzhu/gorm"
6 | "go-crawler-distributed/pkg/app"
7 |
8 | "go-crawler-distributed/internal/model"
9 | )
10 |
11 | /**
12 | * @Author: super
13 | * @Date: 2020-12-30 11:21
14 | * @Description:
15 | **/
16 |
17 | type Forbes struct {
18 | NameEn string `json:"name_en"`
19 | Wealth int `json:"wealth"`
20 | SourceOfWealth string `json:"source_of_wealth"`
21 | Region string `json:"region"`
22 | ModifiedOn string `json:"modified_on"`
23 | ID string `json:"id"`
24 | Rank int `json:"rank"`
25 | Name string `json:"name"`
26 | }
27 |
28 | type IForbes interface {
29 | SelectAll() ([]*Forbes, error)
30 | SelectList(page, pageSize int) ([]*Forbes, error)
31 | }
32 |
33 | type ForbesManager struct {
34 | table string
35 | conn *gorm.DB
36 | }
37 |
38 | func NewForbesManager(table string, conn *gorm.DB) IForbes {
39 | return &ForbesManager{table: table, conn: conn}
40 | }
41 |
42 | func (m *ForbesManager) SelectAll() ([]*Forbes, error) {
43 | var f []*model.Forbes
44 | if err := m.conn.Find(&f).Error; err != nil {
45 | return nil, errors.New("select all forbes error")
46 | }
47 | forbess := make([]*Forbes, 0)
48 | for _, forbes := range f {
49 | temp := &Forbes{
50 | ID: forbes.ID,
51 | Rank: forbes.Rank,
52 | Name: forbes.Name,
53 | NameEn: forbes.NameEn,
54 | Wealth: forbes.Wealth,
55 | SourceOfWealth: forbes.SourceOfWealth,
56 | Region: forbes.Region,
57 | ModifiedOn: forbes.ModifiedOn,
58 | }
59 | forbess = append(forbess, temp)
60 | }
61 | return forbess, nil
62 | }
63 |
64 | func (m *ForbesManager) SelectList(page, pageSize int) ([]*Forbes, error) {
65 | pageOffset := app.GetPageOffset(page, pageSize)
66 | if pageOffset < 0 && pageSize < 0 {
67 | pageOffset = 0
68 | pageSize = 5
69 | }
70 | fields := []string{"id", "rank", "name", "name_en", "wealth", "source_of_wealth", "region", "modified_on"}
71 | rows, err := m.conn.Offset(pageOffset).Limit(pageSize).Select(fields).Table(m.table).Rows()
72 | if err != nil {
73 | return nil, err
74 | }
75 | defer rows.Close()
76 |
77 | var forbess []*Forbes
78 | for rows.Next() {
79 | forbes := &Forbes{}
80 | if err := rows.Scan(&forbes.ID,
81 | &forbes.Rank,
82 | &forbes.Name,
83 | &forbes.NameEn,
84 | &forbes.Wealth,
85 | &forbes.SourceOfWealth,
86 | &forbes.Region,
87 | &forbes.ModifiedOn); err != nil {
88 | return nil, err
89 | }
90 | forbess = append(forbess, forbes)
91 | }
92 | return forbess, nil
93 | }
94 |
--------------------------------------------------------------------------------
/internal/middleware/access_log.go:
--------------------------------------------------------------------------------
1 | package middleware
2 |
3 | import (
4 | "bytes"
5 | "go-crawler-distributed/global"
6 | "go-crawler-distributed/pkg/logger"
7 | "time"
8 |
9 | "github.com/gin-gonic/gin"
10 | )
11 |
12 | /**
13 | * @Author: super
14 | * @Date: 2020-09-23 20:33
15 | * @Description: 处理访问日志中间件,记录请求参数,响应与响应时间
16 | **/
17 |
18 | type AccessLogWriter struct {
19 | gin.ResponseWriter
20 | body *bytes.Buffer
21 | }
22 |
23 | func (w AccessLogWriter) Write(p []byte) (int, error) {
24 | if n, err := w.body.Write(p); err != nil {
25 | return n, err
26 | }
27 | return w.ResponseWriter.Write(p)
28 | }
29 |
30 | func AccessLog() gin.HandlerFunc {
31 | return func(c *gin.Context) {
32 | bodyWriter := &AccessLogWriter{body: bytes.NewBufferString(""), ResponseWriter: c.Writer}
33 | c.Writer = bodyWriter
34 |
35 | beginTime := time.Now().Unix()
36 | c.Next()
37 | endTime := time.Now().Unix()
38 |
39 | fields := logger.Fields{
40 | "request": c.Request.PostForm.Encode(),
41 | "response": bodyWriter.body.String(),
42 | }
43 | s := "access log: method: %s, status_code: %d, " +
44 | "begin_time: %d, end_time: %d"
45 | global.Logger.WithFields(fields).Infof(c, s,
46 | c.Request.Method,
47 | bodyWriter.Status(),
48 | beginTime,
49 | endTime,
50 | )
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/internal/middleware/context_timeout.go:
--------------------------------------------------------------------------------
1 | package middleware
2 |
3 | import (
4 | "context"
5 | "time"
6 |
7 | "github.com/gin-gonic/gin"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2020-09-23 21:01
13 | * @Description: 用于处理响应超时,请求超过规定时间则停止执行
14 | **/
15 |
16 | func ContextTimeout(t time.Duration) func(c *gin.Context) {
17 | return func(c *gin.Context) {
18 | ctx, cancel := context.WithTimeout(c.Request.Context(), t)
19 | defer cancel()
20 |
21 | c.Request = c.Request.WithContext(ctx)
22 | c.Next()
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/internal/middleware/recovery.go:
--------------------------------------------------------------------------------
1 | package middleware
2 |
3 | import (
4 | "fmt"
5 | "go-crawler-distributed/global"
6 | "go-crawler-distributed/pkg/app"
7 | "go-crawler-distributed/pkg/email"
8 | "go-crawler-distributed/pkg/errcode"
9 | "time"
10 |
11 | "github.com/gin-gonic/gin"
12 | )
13 |
14 | /**
15 | * @Author: super
16 | * @Date: 2020-09-23 20:45
17 | * @Description: 自定义recovery,主要用于记录异常发生的时间以及错误信息
18 | **/
19 |
20 | func Recovery() gin.HandlerFunc {
21 | mailer := email.NewEmail(&email.SMTPInfo{
22 | Host: global.EmailSetting.Host,
23 | Port: global.EmailSetting.Port,
24 | IsSSL: global.EmailSetting.IsSSL,
25 | UserName: global.EmailSetting.UserName,
26 | Password: global.EmailSetting.Password,
27 | From: global.EmailSetting.From,
28 | })
29 | return func(c *gin.Context) {
30 | defer func() {
31 | if err := recover(); err != nil {
32 | global.Logger.WithCallersFrames().Errorf(c, "panic recover err: %v", err)
33 |
34 | err := mailer.SendMail(
35 | global.EmailSetting.To,
36 | fmt.Sprintf("异常抛出,发生时间: %d", time.Now().Unix()),
37 | fmt.Sprintf("错误信息: %v", err),
38 | )
39 | if err != nil {
40 | global.Logger.Panicf(c, "mail.SendMail err: %v", err)
41 | }
42 |
43 | app.NewResponse(c).ToErrorResponse(errcode.ServerError)
44 | c.Abort()
45 | }
46 | }()
47 | c.Next()
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/internal/middleware/tracer.go:
--------------------------------------------------------------------------------
1 | package middleware
2 |
3 | import (
4 | "context"
5 | "go-crawler-distributed/global"
6 |
7 | "github.com/gin-gonic/gin"
8 | "github.com/opentracing/opentracing-go"
9 | "github.com/opentracing/opentracing-go/ext"
10 | "github.com/uber/jaeger-client-go"
11 | )
12 |
13 | /**
14 | * @Author: super
15 | * @Date: 2020-09-24 08:14
16 | * @Description: 调用链追踪中间件,调用结果可在jaeger ui看到
17 | **/
18 |
19 | func Tracing() func(c *gin.Context) {
20 | return func(c *gin.Context) {
21 | var newCtx context.Context
22 | var span opentracing.Span
23 | spanCtx, err := opentracing.GlobalTracer().Extract(opentracing.HTTPHeaders, opentracing.HTTPHeadersCarrier(c.Request.Header))
24 | if err != nil {
25 | span, newCtx = opentracing.StartSpanFromContextWithTracer(c.Request.Context(), global.Tracer, c.Request.URL.Path)
26 | } else {
27 | span, newCtx = opentracing.StartSpanFromContextWithTracer(
28 | c.Request.Context(),
29 | global.Tracer,
30 | c.Request.URL.Path,
31 | opentracing.ChildOf(spanCtx),
32 | opentracing.Tag{Key: string(ext.Component), Value: "HTTP"},
33 | )
34 | }
35 | defer span.Finish()
36 |
37 | var traceID string
38 | var spanID string
39 | var spanContext = span.Context()
40 | switch spanContext.(type) {
41 | case jaeger.SpanContext:
42 | jaegerContext := spanContext.(jaeger.SpanContext)
43 | traceID = jaegerContext.TraceID().String()
44 | spanID = jaegerContext.SpanID().String()
45 | }
46 | c.Set("X-Trace-ID", traceID)
47 | c.Set("X-Span-ID", spanID)
48 | c.Request = c.Request.WithContext(newCtx)
49 | c.Next()
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/internal/middleware/translations.go:
--------------------------------------------------------------------------------
1 | package middleware
2 |
3 | import (
4 | "github.com/gin-gonic/gin"
5 | "github.com/gin-gonic/gin/binding"
6 | "github.com/go-playground/locales/en"
7 | "github.com/go-playground/locales/zh"
8 | "github.com/go-playground/locales/zh_Hant_TW"
9 | "github.com/go-playground/universal-translator"
10 | validator "github.com/go-playground/validator/v10"
11 | en_translations "github.com/go-playground/validator/v10/translations/en"
12 | zh_translations "github.com/go-playground/validator/v10/translations/zh"
13 | )
14 |
15 | /**
16 | * @Author: super
17 | * @Date: 2020-09-18 15:06
18 | * @Description: 翻译中间件
19 | **/
20 |
21 | func Translations() gin.HandlerFunc {
22 | return func(c *gin.Context) {
23 | uni := ut.New(en.New(), zh.New(), zh_Hant_TW.New())
24 | locale := c.GetHeader("locale")
25 | trans, _ := uni.GetTranslator(locale)
26 | v, ok := binding.Validator.Engine().(*validator.Validate)
27 | if ok {
28 | switch locale {
29 | case "zh":
30 | _ = zh_translations.RegisterDefaultTranslations(v, trans)
31 | break
32 | case "en":
33 | _ = en_translations.RegisterDefaultTranslations(v, trans)
34 | break
35 | default:
36 | _ = zh_translations.RegisterDefaultTranslations(v, trans)
37 | break
38 | }
39 | c.Set("trans", trans)
40 | }
41 |
42 | c.Next()
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/internal/model/article.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import "fmt"
4 |
5 | /**
6 | * @Author: super
7 | * @Date: 2020-09-01 17:23
8 | * @Description:
9 | **/
10 |
11 | type Article struct {
12 | Title string `json:"title"`
13 | Url string `json:"url"`
14 | Genres []string `json:"genres"`
15 | Content string `json:"content"`
16 | }
17 |
18 | // TableName sets the insert table name for this struct type
19 | func (a *Article) TableName() string {
20 | return "articles"
21 | }
22 |
23 | func (article Article) String() string {
24 | return fmt.Sprintf("title: %s\n"+
25 | "url: %s\n"+
26 | "geners: %v\n"+
27 | "content: %s\n",
28 | article.Title,
29 | article.Url,
30 | article.Genres,
31 | article.Content)
32 | }
33 |
--------------------------------------------------------------------------------
/internal/model/article_easyjson.go:
--------------------------------------------------------------------------------
1 | // Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT.
2 |
3 | package model
4 |
5 | import (
6 | json "encoding/json"
7 | easyjson "github.com/mailru/easyjson"
8 | jlexer "github.com/mailru/easyjson/jlexer"
9 | jwriter "github.com/mailru/easyjson/jwriter"
10 | )
11 |
12 | // suppress unused package warning
13 | var (
14 | _ *json.RawMessage
15 | _ *jlexer.Lexer
16 | _ *jwriter.Writer
17 | _ easyjson.Marshaler
18 | )
19 |
20 | func easyjson6de889b8DecodeGoCrawlerDistributedModel(in *jlexer.Lexer, out *Article) {
21 | isTopLevel := in.IsStart()
22 | if in.IsNull() {
23 | if isTopLevel {
24 | in.Consumed()
25 | }
26 | in.Skip()
27 | return
28 | }
29 | in.Delim('{')
30 | for !in.IsDelim('}') {
31 | key := in.UnsafeFieldName(false)
32 | in.WantColon()
33 | if in.IsNull() {
34 | in.Skip()
35 | in.WantComma()
36 | continue
37 | }
38 | switch key {
39 | case "title":
40 | out.Title = string(in.String())
41 | case "url":
42 | out.Url = string(in.String())
43 | case "genres":
44 | if in.IsNull() {
45 | in.Skip()
46 | out.Genres = nil
47 | } else {
48 | in.Delim('[')
49 | if out.Genres == nil {
50 | if !in.IsDelim(']') {
51 | out.Genres = make([]string, 0, 4)
52 | } else {
53 | out.Genres = []string{}
54 | }
55 | } else {
56 | out.Genres = (out.Genres)[:0]
57 | }
58 | for !in.IsDelim(']') {
59 | var v1 string
60 | v1 = string(in.String())
61 | out.Genres = append(out.Genres, v1)
62 | in.WantComma()
63 | }
64 | in.Delim(']')
65 | }
66 | case "content":
67 | out.Content = string(in.String())
68 | default:
69 | in.SkipRecursive()
70 | }
71 | in.WantComma()
72 | }
73 | in.Delim('}')
74 | if isTopLevel {
75 | in.Consumed()
76 | }
77 | }
78 | func easyjson6de889b8EncodeGoCrawlerDistributedModel(out *jwriter.Writer, in Article) {
79 | out.RawByte('{')
80 | first := true
81 | _ = first
82 | {
83 | const prefix string = ",\"title\":"
84 | out.RawString(prefix[1:])
85 | out.String(string(in.Title))
86 | }
87 | {
88 | const prefix string = ",\"url\":"
89 | out.RawString(prefix)
90 | out.String(string(in.Url))
91 | }
92 | {
93 | const prefix string = ",\"genres\":"
94 | out.RawString(prefix)
95 | if in.Genres == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 {
96 | out.RawString("null")
97 | } else {
98 | out.RawByte('[')
99 | for v2, v3 := range in.Genres {
100 | if v2 > 0 {
101 | out.RawByte(',')
102 | }
103 | out.String(string(v3))
104 | }
105 | out.RawByte(']')
106 | }
107 | }
108 | {
109 | const prefix string = ",\"content\":"
110 | out.RawString(prefix)
111 | out.String(string(in.Content))
112 | }
113 | out.RawByte('}')
114 | }
115 |
116 | // MarshalJSON supports json.Marshaler interface
117 | func (v Article) MarshalJSON() ([]byte, error) {
118 | w := jwriter.Writer{}
119 | easyjson6de889b8EncodeGoCrawlerDistributedModel(&w, v)
120 | return w.Buffer.BuildBytes(), w.Error
121 | }
122 |
123 | // MarshalEasyJSON supports easyjson.Marshaler interface
124 | func (v Article) MarshalEasyJSON(w *jwriter.Writer) {
125 | easyjson6de889b8EncodeGoCrawlerDistributedModel(w, v)
126 | }
127 |
128 | // UnmarshalJSON supports json.Unmarshaler interface
129 | func (v *Article) UnmarshalJSON(data []byte) error {
130 | r := jlexer.Lexer{Data: data}
131 | easyjson6de889b8DecodeGoCrawlerDistributedModel(&r, v)
132 | return r.Error()
133 | }
134 |
135 | // UnmarshalEasyJSON supports easyjson.Unmarshaler interface
136 | func (v *Article) UnmarshalEasyJSON(l *jlexer.Lexer) {
137 | easyjson6de889b8DecodeGoCrawlerDistributedModel(l, v)
138 | }
139 |
--------------------------------------------------------------------------------
/internal/model/book.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import "fmt"
4 |
5 | /**
6 | * @Author: super
7 | * @Date: 2020-08-14 15:29
8 | * @Description:
9 | **/
10 | type Book struct {
11 | BookID int `gorm:"column:book_id" gorm:"PRIMARY_KEY" json:"book_id"`
12 | Title string `gorm:"column:title" json:"title"`
13 | SubTitle string `gorm:"column:sub_title" json:"sub_title"`
14 | Img string `gorm:"column:img" json:"img"`
15 | Author string `gorm:"column:author" json:"author"`
16 | Publish string `gorm:"column:publish" json:"publish"`
17 | Producer string `gorm:"column:producer" json:"producer"`
18 | PublishYear string `gorm:"column:publish_year" gorm:"type:date" json:"publish_year"`
19 | Pages int `gorm:"column:pages" json:"pages"`
20 | Price float64 `gorm:"column:price" json:"price"`
21 | Layout string `gorm:"column:layout" json:"layout"`
22 | Series string `gorm:"column:series" json:"series"`
23 | ISBN string `gorm:"column:isbn" json:"isbn"`
24 | Score float64 `gorm:"column:score" json:"score"`
25 | OriginalName string `gorm:"column:original_name" json:"original_name"`
26 | Comments int `gorm:"column:comments" json:"comments"`
27 | CommentUrl string `gorm:"column:comment_url" json:"comment_url"`
28 | Url string `gorm:"column:url" json:"url"`
29 | }
30 |
31 | // TableName sets the insert table name for this struct type
32 | func (book *Book) TableName() string {
33 | return "books"
34 | }
35 |
36 | func (book Book) String() string {
37 | return fmt.Sprintf("book_id: %d\n"+
38 | "title: %s\n"+
39 | "sub_title: %s\n"+
40 | "img: %s\n"+
41 | "author: %s\n"+
42 | "publish: %s\n"+
43 | "producer: %s\n"+
44 | "publish_year: %s\n"+
45 | "pages: %d\n"+
46 | "price: %f\n"+
47 | "layout: %s\n"+
48 | "series: %s\n"+
49 | "isbn: %s\n"+
50 | "score: %f\n"+
51 | "original_name: %s\n"+
52 | "comments: %d\n"+
53 | "comment_url: %s\n"+
54 | "url: %s",
55 | book.BookID, book.Title, book.SubTitle, book.Img,
56 | book.Author, book.Publish, book.Producer, book.PublishYear,
57 | book.Pages, book.Price, book.Layout, book.Series, book.ISBN,
58 | book.Score, book.OriginalName, book.Comments, book.CommentUrl, book.Url)
59 | }
60 |
--------------------------------------------------------------------------------
/internal/model/book_easyjson.go:
--------------------------------------------------------------------------------
1 | // Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT.
2 |
3 | package model
4 |
5 | import (
6 | json "encoding/json"
7 | easyjson "github.com/mailru/easyjson"
8 | jlexer "github.com/mailru/easyjson/jlexer"
9 | jwriter "github.com/mailru/easyjson/jwriter"
10 | )
11 |
12 | // suppress unused package warning
13 | var (
14 | _ *json.RawMessage
15 | _ *jlexer.Lexer
16 | _ *jwriter.Writer
17 | _ easyjson.Marshaler
18 | )
19 |
20 | func easyjson67646b7bDecodeGoCrawlerDistributedModel(in *jlexer.Lexer, out *Book) {
21 | isTopLevel := in.IsStart()
22 | if in.IsNull() {
23 | if isTopLevel {
24 | in.Consumed()
25 | }
26 | in.Skip()
27 | return
28 | }
29 | in.Delim('{')
30 | for !in.IsDelim('}') {
31 | key := in.UnsafeFieldName(false)
32 | in.WantColon()
33 | if in.IsNull() {
34 | in.Skip()
35 | in.WantComma()
36 | continue
37 | }
38 | switch key {
39 | case "book_id":
40 | out.BookID = int(in.Int())
41 | case "title":
42 | out.Title = string(in.String())
43 | case "sub_title":
44 | out.SubTitle = string(in.String())
45 | case "img":
46 | out.Img = string(in.String())
47 | case "author":
48 | out.Author = string(in.String())
49 | case "publish":
50 | out.Publish = string(in.String())
51 | case "producer":
52 | out.Producer = string(in.String())
53 | case "publish_year":
54 | out.PublishYear = string(in.String())
55 | case "pages":
56 | out.Pages = int(in.Int())
57 | case "price":
58 | out.Price = float64(in.Float64())
59 | case "layout":
60 | out.Layout = string(in.String())
61 | case "series":
62 | out.Series = string(in.String())
63 | case "isbn":
64 | out.ISBN = string(in.String())
65 | case "score":
66 | out.Score = float64(in.Float64())
67 | case "original_name":
68 | out.OriginalName = string(in.String())
69 | case "comments":
70 | out.Comments = int(in.Int())
71 | case "comment_url":
72 | out.CommentUrl = string(in.String())
73 | case "url":
74 | out.Url = string(in.String())
75 | default:
76 | in.SkipRecursive()
77 | }
78 | in.WantComma()
79 | }
80 | in.Delim('}')
81 | if isTopLevel {
82 | in.Consumed()
83 | }
84 | }
85 | func easyjson67646b7bEncodeGoCrawlerDistributedModel(out *jwriter.Writer, in Book) {
86 | out.RawByte('{')
87 | first := true
88 | _ = first
89 | {
90 | const prefix string = ",\"book_id\":"
91 | out.RawString(prefix[1:])
92 | out.Int(int(in.BookID))
93 | }
94 | {
95 | const prefix string = ",\"title\":"
96 | out.RawString(prefix)
97 | out.String(string(in.Title))
98 | }
99 | {
100 | const prefix string = ",\"sub_title\":"
101 | out.RawString(prefix)
102 | out.String(string(in.SubTitle))
103 | }
104 | {
105 | const prefix string = ",\"img\":"
106 | out.RawString(prefix)
107 | out.String(string(in.Img))
108 | }
109 | {
110 | const prefix string = ",\"author\":"
111 | out.RawString(prefix)
112 | out.String(string(in.Author))
113 | }
114 | {
115 | const prefix string = ",\"publish\":"
116 | out.RawString(prefix)
117 | out.String(string(in.Publish))
118 | }
119 | {
120 | const prefix string = ",\"producer\":"
121 | out.RawString(prefix)
122 | out.String(string(in.Producer))
123 | }
124 | {
125 | const prefix string = ",\"publish_year\":"
126 | out.RawString(prefix)
127 | out.String(string(in.PublishYear))
128 | }
129 | {
130 | const prefix string = ",\"pages\":"
131 | out.RawString(prefix)
132 | out.Int(int(in.Pages))
133 | }
134 | {
135 | const prefix string = ",\"price\":"
136 | out.RawString(prefix)
137 | out.Float64(float64(in.Price))
138 | }
139 | {
140 | const prefix string = ",\"layout\":"
141 | out.RawString(prefix)
142 | out.String(string(in.Layout))
143 | }
144 | {
145 | const prefix string = ",\"series\":"
146 | out.RawString(prefix)
147 | out.String(string(in.Series))
148 | }
149 | {
150 | const prefix string = ",\"isbn\":"
151 | out.RawString(prefix)
152 | out.String(string(in.ISBN))
153 | }
154 | {
155 | const prefix string = ",\"score\":"
156 | out.RawString(prefix)
157 | out.Float64(float64(in.Score))
158 | }
159 | {
160 | const prefix string = ",\"original_name\":"
161 | out.RawString(prefix)
162 | out.String(string(in.OriginalName))
163 | }
164 | {
165 | const prefix string = ",\"comments\":"
166 | out.RawString(prefix)
167 | out.Int(int(in.Comments))
168 | }
169 | {
170 | const prefix string = ",\"comment_url\":"
171 | out.RawString(prefix)
172 | out.String(string(in.CommentUrl))
173 | }
174 | {
175 | const prefix string = ",\"url\":"
176 | out.RawString(prefix)
177 | out.String(string(in.Url))
178 | }
179 | out.RawByte('}')
180 | }
181 |
182 | // MarshalJSON supports json.Marshaler interface
183 | func (v Book) MarshalJSON() ([]byte, error) {
184 | w := jwriter.Writer{}
185 | easyjson67646b7bEncodeGoCrawlerDistributedModel(&w, v)
186 | return w.Buffer.BuildBytes(), w.Error
187 | }
188 |
189 | // MarshalEasyJSON supports easyjson.Marshaler interface
190 | func (v Book) MarshalEasyJSON(w *jwriter.Writer) {
191 | easyjson67646b7bEncodeGoCrawlerDistributedModel(w, v)
192 | }
193 |
194 | // UnmarshalJSON supports json.Unmarshaler interface
195 | func (v *Book) UnmarshalJSON(data []byte) error {
196 | r := jlexer.Lexer{Data: data}
197 | easyjson67646b7bDecodeGoCrawlerDistributedModel(&r, v)
198 | return r.Error()
199 | }
200 |
201 | // UnmarshalEasyJSON supports easyjson.Unmarshaler interface
202 | func (v *Book) UnmarshalEasyJSON(l *jlexer.Lexer) {
203 | easyjson67646b7bDecodeGoCrawlerDistributedModel(l, v)
204 | }
205 |
--------------------------------------------------------------------------------
/internal/model/db.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-11-18 15:07
6 | * @Description:
7 | **/
8 |
9 | type Model struct {
10 | CreatedOn string `gorm:"column:created_on" json:"created_on"`
11 | CreatedBy string `gorm:"column:created_by" json:"created_by"`
12 | DeletedOn string `gorm:"column:deleted_on" json:"deleted_on"`
13 | ModifiedBy string `gorm:"column:modified_by" json:"modified_by"`
14 | ModifiedOn string `gorm:"column:modified_on" json:"modified_on"`
15 | ID string `gorm:"column:id;primary_key" json:"id"`
16 | IsDel int `gorm:"column:is_del" json:"is_del"`
17 | }
18 |
--------------------------------------------------------------------------------
/internal/model/forbes.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-12-30 11:18
6 | * @Description: 福布斯排行榜
7 | **/
8 |
9 | type Forbes struct {
10 | NameEn string `gorm:"column:name_en" json:"name_en"`
11 | Wealth int `gorm:"column:wealth" json:"wealth"`
12 | SourceOfWealth string `gorm:"column:source_of_wealth" json:"source_of_wealth"`
13 | Region string `gorm:"column:region" json:"region"`
14 | ModifiedOn string `gorm:"column:modified_on" json:"modified_on"`
15 | ID string `gorm:"column:id;primary_key" json:"id"`
16 | Rank int `gorm:"column:rank" json:"rank"`
17 | Name string `gorm:"column:name" json:"name"`
18 | }
19 |
20 | // TableName sets the insert table name for this struct type
21 | func (f *Forbes) TableName() string {
22 | return "forbes_list"
23 | }
24 |
--------------------------------------------------------------------------------
/internal/routers/job/job.go:
--------------------------------------------------------------------------------
1 | package job
2 |
3 | import (
4 | "fmt"
5 | "github.com/gin-gonic/gin"
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/internal/crontab/common"
8 | "go-crawler-distributed/internal/crontab/master"
9 | "go-crawler-distributed/internal/service"
10 | "go-crawler-distributed/pkg/app"
11 | "go-crawler-distributed/pkg/errcode"
12 | "net/http"
13 | )
14 |
15 | /**
16 | * @Author: super
17 | * @Date: 2021-02-06 16:44
18 | * @Description:
19 | **/
20 |
21 | // 将任务保存到etcd中
22 | func SaveJob(c *gin.Context) {
23 | name, _ := c.GetPostForm("name")
24 | fmt.Println("1", c.Param("name"))
25 | fmt.Println("2", name)
26 | fmt.Println("3", c.PostForm("name"))
27 | fmt.Println()
28 | param := service.SaveJobRequest{}
29 | response := app.NewResponse(c)
30 | valid, errs := app.BindAndValid(c, ¶m)
31 | if !valid {
32 | global.Logger.Errorf(c, "app.BindAndValid errs: %v", errs)
33 | response.ToErrorResponse(errcode.InvalidParams.WithDetails(errs.Errors()...))
34 | return
35 | }
36 | job := &common.Job{
37 | Name: param.Name,
38 | Command: param.Command,
39 | CronExpr: param.CronExpr,
40 | }
41 |
42 | oldJob, err := master.EtcdSaveJob(c, job)
43 | if err != nil {
44 | global.Logger.Errorf(c, "app.EtcdSaveJob err: %v", err)
45 | response.ToErrorResponse(errcode.ErrorSaveFail)
46 | return
47 | }
48 | response.ToResponse(oldJob, "存储任务成功", http.StatusOK)
49 | }
50 |
51 | func DeleteJob(c *gin.Context) {
52 | param := service.DeleteJobRequest{}
53 | response := app.NewResponse(c)
54 | valid, errs := app.BindAndValid(c, ¶m)
55 | if !valid {
56 | global.Logger.Errorf(c, "app.BindAndValid errs: %v", errs)
57 | response.ToErrorResponse(errcode.InvalidParams.WithDetails(errs.Errors()...))
58 | return
59 | }
60 |
61 | oldJob, err := master.EtcdDeleteJob(c, param.Name)
62 | if err != nil {
63 | global.Logger.Errorf(c, "app.EtcdDeleteJob err: %v", err)
64 | response.ToErrorResponse(errcode.ErrorDeleteFail)
65 | return
66 | }
67 | response.ToResponse(oldJob, "删除任务成功", http.StatusOK)
68 | }
69 |
70 | func ListJobs(c *gin.Context) {
71 | response := app.NewResponse(c)
72 | jobs, err := master.EtcdListJobs(c)
73 | if err != nil {
74 | global.Logger.Errorf(c, "app.EtcdListJobs err: %v", err)
75 | response.ToErrorResponse(errcode.ErrorListFail)
76 | return
77 | }
78 | response.ToResponse(jobs, "获取任务列表成功", http.StatusOK)
79 | }
80 |
81 | func KillJob(c *gin.Context) {
82 | param := service.KillJobRequest{}
83 | response := app.NewResponse(c)
84 | valid, errs := app.BindAndValid(c, ¶m)
85 | if !valid {
86 | global.Logger.Errorf(c, "app.BindAndValid errs: %v", errs)
87 | response.ToErrorResponse(errcode.InvalidParams.WithDetails(errs.Errors()...))
88 | return
89 | }
90 |
91 | err := master.EtcdKillJob(c, param.Name)
92 | if err != nil {
93 | global.Logger.Errorf(c, "app.EtcdKillJob err: %v", err)
94 | response.ToErrorResponse(errcode.ErrorDeleteFail)
95 | return
96 | }
97 | response.ToResponse(gin.H{}, "杀死任务成功", http.StatusOK)
98 | }
99 |
100 | func JobLog(c *gin.Context) {
101 | param := service.JobLogRequest{}
102 | pager := app.Pager{Page: app.GetPage(c), PageSize: app.GetPageSize(c)}
103 | response := app.NewResponse(c)
104 | valid, errs := app.BindAndValid(c, ¶m)
105 | if !valid {
106 | global.Logger.Errorf(c, "app.BindAndValid errs: %v", errs)
107 | response.ToErrorResponse(errcode.InvalidParams.WithDetails(errs.Errors()...))
108 | return
109 | }
110 | result, err := service.GetLogList(¶m, &pager)
111 | if err != nil {
112 | global.Logger.Errorf(c, "service.GetLogList err: %v", err)
113 | response.ToErrorResponse(errcode.ErrorLogListFail)
114 | return
115 | }
116 | response.ToResponse(result, "获取日志列表成功", http.StatusOK)
117 | }
118 |
119 | func WorkerList(c *gin.Context) {
120 | response := app.NewResponse(c)
121 | workers, err := master.ListWorkers()
122 | if err != nil {
123 | global.Logger.Errorf(c, "appWorkerList err: %v", err)
124 | response.ToErrorResponse(errcode.ErrorWorkerListFail)
125 | return
126 | }
127 | response.ToResponse(workers, "获取worker列表成功", http.StatusOK)
128 | }
--------------------------------------------------------------------------------
/internal/routers/router.go:
--------------------------------------------------------------------------------
1 | package routers
2 |
3 | import (
4 | "github.com/gin-contrib/cors"
5 | "github.com/gin-gonic/gin"
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/internal/middleware"
8 | "go-crawler-distributed/internal/routers/job"
9 | "go-crawler-distributed/internal/routers/sd"
10 | )
11 |
12 | /**
13 | * @Author: super
14 | * @Date: 2021-02-06 16:34
15 | * @Description:
16 | **/
17 |
18 | func NewRouter() *gin.Engine {
19 | r := gin.New()
20 | r.Use(cors.Default())
21 | if global.ServerSetting.RunMode == "debug" {
22 | r.Use(gin.Logger())
23 | r.Use(gin.Recovery())
24 | } else {
25 | r.Use(middleware.AccessLog())
26 | r.Use(middleware.Recovery())
27 | }
28 | r.Use(middleware.Tracing())
29 | r.Use(middleware.ContextTimeout(global.AppSetting.DefaultContextTimeout))
30 | r.Use(middleware.Translations())
31 |
32 | svcd := r.Group("/sd")
33 | {
34 | svcd.GET("/health", sd.HealthCheck)
35 | svcd.GET("/disk", sd.DiskCheck)
36 | svcd.GET("/cpu", sd.CPUCheck)
37 | svcd.GET("/ram", sd.RAMCheck)
38 | }
39 |
40 | jobGroup := r.Group("/job")
41 | {
42 | jobGroup.POST("/save", job.SaveJob)
43 | jobGroup.POST("/delete", job.DeleteJob)
44 | jobGroup.GET("/list", job.ListJobs)
45 | jobGroup.POST("/kill", job.KillJob)
46 | jobGroup.GET("/log", job.JobLog)
47 | }
48 | r.GET("/worker/list", job.WorkerList)
49 |
50 | return r
51 | }
52 |
--------------------------------------------------------------------------------
/internal/routers/sd/check.go:
--------------------------------------------------------------------------------
1 | package sd
2 |
3 | import (
4 | "fmt"
5 | "net/http"
6 |
7 | "github.com/gin-gonic/gin"
8 | "github.com/shirou/gopsutil/cpu"
9 | "github.com/shirou/gopsutil/disk"
10 | "github.com/shirou/gopsutil/load"
11 | "github.com/shirou/gopsutil/mem"
12 | )
13 |
14 | /**
15 | * @Author: super
16 | * @Date: 2020-08-26 15:14
17 | * @Description: 用于服务的健康检查
18 | **/
19 |
20 | const (
21 | B = 1
22 | KB = 1024 * B
23 | MB = 1024 * KB
24 | GB = 1024 * MB
25 | )
26 |
27 | // HealthCheck shows `OK` as the ping-pong result.
28 | func HealthCheck(c *gin.Context) {
29 | message := "OK"
30 | c.String(http.StatusOK, "\n"+message)
31 | }
32 |
33 | // DiskCheck checks the disk usage.
34 | func DiskCheck(c *gin.Context) {
35 | u, _ := disk.Usage("/")
36 |
37 | usedMB := int(u.Used) / MB
38 | usedGB := int(u.Used) / GB
39 | totalMB := int(u.Total) / MB
40 | totalGB := int(u.Total) / GB
41 | usedPercent := int(u.UsedPercent)
42 |
43 | status := http.StatusOK
44 | text := "OK"
45 |
46 | if usedPercent >= 95 {
47 | status = http.StatusOK
48 | text = "CRITICAL"
49 | } else if usedPercent >= 90 {
50 | status = http.StatusTooManyRequests
51 | text = "WARNING"
52 | }
53 |
54 | message := fmt.Sprintf("%s - Free space: %dMB (%dGB) / %dMB (%dGB) | Used: %d%%", text, usedMB, usedGB, totalMB, totalGB, usedPercent)
55 | c.String(status, "\n"+message)
56 | }
57 |
58 | // CPUCheck checks the cpu usage.
59 | func CPUCheck(c *gin.Context) {
60 | cores, _ := cpu.Counts(false)
61 |
62 | a, _ := load.Avg()
63 | l1 := a.Load1
64 | l5 := a.Load5
65 | l15 := a.Load15
66 |
67 | status := http.StatusOK
68 | text := "OK"
69 |
70 | if l5 >= float64(cores-1) {
71 | status = http.StatusInternalServerError
72 | text = "CRITICAL"
73 | } else if l5 >= float64(cores-2) {
74 | status = http.StatusTooManyRequests
75 | text = "WARNING"
76 | }
77 |
78 | message := fmt.Sprintf("%s - Load average: %.2f, %.2f, %.2f | Cores: %d", text, l1, l5, l15, cores)
79 | c.String(status, "\n"+message)
80 | }
81 |
82 | // RAMCheck checks the disk usage.
83 | func RAMCheck(c *gin.Context) {
84 | u, _ := mem.VirtualMemory()
85 |
86 | usedMB := int(u.Used) / MB
87 | usedGB := int(u.Used) / GB
88 | totalMB := int(u.Total) / MB
89 | totalGB := int(u.Total) / GB
90 | usedPercent := int(u.UsedPercent)
91 |
92 | status := http.StatusOK
93 | text := "OK"
94 |
95 | if usedPercent >= 95 {
96 | status = http.StatusInternalServerError
97 | text = "CRITICAL"
98 | } else if usedPercent >= 90 {
99 | status = http.StatusTooManyRequests
100 | text = "WARNING"
101 | }
102 |
103 | message := fmt.Sprintf("%s - Free space: %dMB (%dGB) / %dMB (%dGB) | Used: %d%%", text, usedMB, usedGB, totalMB, totalGB, usedPercent)
104 | c.String(status, "\n"+message)
105 | }
106 |
--------------------------------------------------------------------------------
/internal/service/job.go:
--------------------------------------------------------------------------------
1 | package service
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/internal/crontab/common"
8 | "go-crawler-distributed/pkg/app"
9 | "go.mongodb.org/mongo-driver/mongo/options"
10 | )
11 |
12 | /**
13 | * @Author: super
14 | * @Date: 2021-02-06 19:18
15 | * @Description:
16 | **/
17 |
18 | type SaveJobRequest struct {
19 | Name string `json:"name" form:"name" binding:"required,min=2,max=4294967295"`
20 | Command string `json:"command" form:"command" binding:"required,min=2,max=4294967295"`
21 | CronExpr string `json:"cronExpr" form:"cronExpr" binding:"required,min=2,max=4294967295"`
22 | }
23 |
24 | type DeleteJobRequest struct {
25 | Name string `json:"name" form:"name" binding:"required,min=2,max=4294967295"`
26 | }
27 |
28 | type KillJobRequest struct {
29 | Name string `json:"name" form:"name" binding:"required,min=2,max=4294967295"`
30 | }
31 |
32 | type JobLogRequest struct {
33 | Name string `json:"name" form:"name" binding:"required,min=2,max=4294967295"`
34 | }
35 |
36 | type IJobLogService interface {
37 | GetLogList(param *JobLogRequest, pager *app.Pager) (*common.JobLog, error)
38 | }
39 |
40 | func GetLogList(param *JobLogRequest, pager *app.Pager) ([]*common.JobLog, error) {
41 | filter := &common.JobLogFilter{
42 | JobName: param.Name,
43 | }
44 | fmt.Println(param.Name)
45 | logSort := &common.SortLogByStartTime{
46 | SortOrder: -1,
47 | }
48 | if pager.PageSize == 0 {
49 | pager.PageSize = 20
50 | }
51 | collection := global.MongoDBEngine.Database("cron").Collection("log")
52 |
53 | skip := int64(pager.Page)
54 | limit := int64(pager.PageSize)
55 | op := &options.FindOptions{
56 | Sort: logSort,
57 | Skip: &skip,
58 | Limit: &limit,
59 | }
60 | cursor, err := collection.Find(context.TODO(), filter, op)
61 | if err != nil {
62 | return nil, err
63 | }
64 | defer cursor.Close(context.TODO())
65 |
66 | result := make([]*common.JobLog, 0)
67 | for cursor.Next(context.TODO()) {
68 | jobLog := &common.JobLog{}
69 | if err := cursor.Decode(jobLog); err != nil {
70 | continue
71 | }
72 | result = append(result, jobLog)
73 | }
74 | return result, nil
75 | }
76 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "errors"
6 | "flag"
7 | "go-crawler-distributed/global"
8 | "go-crawler-distributed/initConf"
9 | "go-crawler-distributed/internal/routers"
10 | "log"
11 | "net/http"
12 | "time"
13 | )
14 |
15 | /**
16 | * @Author: super
17 | * @Date: 2020-08-21 20:37
18 | * @Description:
19 | **/
20 | var (
21 | port string
22 | runMode string
23 | config string
24 | isVersion bool
25 | )
26 |
27 | func init() {
28 | err := setupFlag()
29 | if err != nil {
30 | log.Printf("init setupSetting err: %v\n", err)
31 | }
32 | initConf.Init(config)
33 | }
34 |
35 | func main() {
36 | router := routers.NewRouter()
37 | s := &http.Server{
38 | Addr: ":" + global.ServerSetting.HttpPort,
39 | Handler: router,
40 | ReadTimeout: global.ServerSetting.ReadTimeout * time.Second,
41 | WriteTimeout: global.ServerSetting.WriteTimeout * time.Second,
42 | MaxHeaderBytes: 1 << 20,
43 | }
44 |
45 | go func() {
46 | if err := pingServer(); err != nil {
47 | global.Logger.Errorf(context.Background(), "The server has no response, or it might took too long to start up.")
48 | }
49 | global.Logger.Info(context.Background(), "The server has been deployed successfully.")
50 | }()
51 |
52 | global.Logger.Infof(context.Background(), "Start to listening the incoming requests on http address :%s", global.ServerSetting.HttpPort)
53 | err := s.ListenAndServe()
54 | if err != nil {
55 | global.Logger.Fatalf(context.Background(), "start listen server err: %v", err)
56 | }
57 | }
58 |
59 | func setupFlag() error {
60 | flag.StringVar(&port, "port", "", "启动端口")
61 | flag.StringVar(&runMode, "mode", "", "启动模式")
62 | flag.StringVar(&config, "config", "configs/", "指定要使用的配置文件路径")
63 | flag.BoolVar(&isVersion, "version", false, "编译信息")
64 | flag.Parse()
65 |
66 | return nil
67 | }
68 |
69 | // pingServer pings the http server to make sure the router is working.
70 | func pingServer() error {
71 | for i := 0; i < 3; i++ {
72 | time.Sleep(time.Second)
73 | // Ping the server by sending a GET request to `/health`.
74 | resp, err := http.Get(":" + global.ServerSetting.HttpPort + "/sd/health")
75 | if err == nil && resp.StatusCode == 200 {
76 | return nil
77 | }
78 | // Sleep for a second to continue the next ping.
79 | global.Logger.Info(context.Background(), "Waiting for the server, retry in 1 second.")
80 | }
81 | return errors.New("cannot connect to the server")
82 | }
83 |
--------------------------------------------------------------------------------
/pkg/app/app.go:
--------------------------------------------------------------------------------
1 | package app
2 |
3 | import (
4 | "github.com/gin-gonic/gin"
5 | "go-crawler-distributed/pkg/errcode"
6 |
7 | "net/http"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2020-09-18 10:11
13 | * @Description: 设置统一响应与分页
14 | **/
15 |
16 | type Response struct {
17 | Ctx *gin.Context
18 | }
19 |
20 | type Meta struct {
21 | Msg string `json:"msg"`
22 | Status int `json:"status"`
23 | }
24 |
25 | type Pager struct {
26 | // 页码
27 | Page int `json:"page"`
28 | // 每页数量
29 | PageSize int `json:"page_size"`
30 | // 总行数
31 | TotalRows int `json:"total_rows"`
32 | }
33 |
34 | func NewResponse(ctx *gin.Context) *Response {
35 | return &Response{
36 | Ctx: ctx,
37 | }
38 | }
39 |
40 | func (r *Response) ToResponse(data interface{}, msg string, status int) {
41 | if data == nil {
42 | data = gin.H{}
43 | } else {
44 | data = gin.H{
45 | "data": data,
46 | "meta": Meta{
47 | Msg: msg,
48 | Status: status,
49 | },
50 | }
51 | }
52 | r.Ctx.JSON(http.StatusOK, data)
53 | }
54 |
55 | func (r *Response) ToErrorResponse(err *errcode.Error) {
56 | response := gin.H{
57 | "data": gin.H{},
58 | "meta": Meta{
59 | Msg: err.Msg(),
60 | Status: err.Code(),
61 | },
62 | }
63 | details := err.Details()
64 | if len(details) > 0 {
65 | response["details"] = details
66 | }
67 |
68 | r.Ctx.JSON(err.StatusCode(), response)
69 | }
70 |
--------------------------------------------------------------------------------
/pkg/app/form.go:
--------------------------------------------------------------------------------
1 | package app
2 |
3 | import (
4 | "strings"
5 |
6 | "github.com/gin-gonic/gin"
7 | ut "github.com/go-playground/universal-translator"
8 | val "github.com/go-playground/validator/v10"
9 | )
10 |
11 | /**
12 | * @Author: super
13 | * @Date: 2020-09-18 15:26
14 | * @Description: 统一参数校验
15 | **/
16 |
17 | type ValidError struct {
18 | Key string
19 | Message string
20 | }
21 |
22 | type ValidErrors []*ValidError
23 |
24 | func (v *ValidError) Error() string {
25 | return v.Message
26 | }
27 |
28 | func (v ValidErrors) Error() string {
29 | return strings.Join(v.Errors(), ",")
30 | }
31 |
32 | func (v ValidErrors) Errors() []string {
33 | var errs []string
34 | for _, err := range v {
35 | errs = append(errs, err.Error())
36 | }
37 |
38 | return errs
39 | }
40 |
41 | func BindAndValid(c *gin.Context, v interface{}) (bool, ValidErrors) {
42 | var errs ValidErrors
43 | err := c.ShouldBind(v)
44 | if err != nil {
45 | v := c.Value("trans")
46 | trans, _ := v.(ut.Translator)
47 | verrs, ok := err.(val.ValidationErrors)
48 | if !ok {
49 | return false, errs
50 | }
51 |
52 | for key, value := range verrs.Translate(trans) {
53 | errs = append(errs, &ValidError{
54 | Key: key,
55 | Message: value,
56 | })
57 | }
58 |
59 | return false, errs
60 | }
61 |
62 | return true, nil
63 | }
64 |
--------------------------------------------------------------------------------
/pkg/app/pagination.go:
--------------------------------------------------------------------------------
1 | package app
2 |
3 | import (
4 | "github.com/gin-gonic/gin"
5 | "go-crawler-distributed/global"
6 | "go-crawler-distributed/pkg/convert"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-09-18 10:11
12 | * @Description: 获取与分页相关的内容
13 | **/
14 |
15 | func GetPage(c *gin.Context) int {
16 | page := convert.StrTo(c.Query("page")).MustInt()
17 | if page <= 0 {
18 | return 1
19 | }
20 |
21 | return page
22 | }
23 |
24 | func GetPageSize(c *gin.Context) int {
25 | pageSize := convert.StrTo(c.Query("page_size")).MustInt()
26 | if pageSize <= 0 {
27 | return global.AppSetting.DefaultPageSize
28 | }
29 | if pageSize > global.AppSetting.MaxPageSize {
30 | return global.AppSetting.MaxPageSize
31 | }
32 |
33 | return pageSize
34 | }
35 |
36 | func GetPageOffset(page, pageSize int) int {
37 | result := 0
38 | if page > 0 {
39 | result = (page - 1) * pageSize
40 | }
41 |
42 | return result
43 | }
44 |
--------------------------------------------------------------------------------
/pkg/cache/cache.go:
--------------------------------------------------------------------------------
1 | package cache
2 |
3 | import (
4 | "github.com/garyburd/redigo/redis"
5 |
6 | "go-crawler-distributed/pkg/setting"
7 |
8 | "time"
9 | )
10 |
11 | /**
12 | * @Author: super
13 | * @Date: 2020-11-18 11:35
14 | * @Description: 根据配置创建redis连接池
15 | **/
16 |
17 | func NewRedisEngine(cacheSetting *setting.CacheSettingS) (*redis.Pool, error) {
18 | return &redis.Pool{
19 | MaxIdle: cacheSetting.MaxIdle,
20 | MaxActive: cacheSetting.MaxActive,
21 | IdleTimeout: 300 * time.Second,
22 | // 如果空闲列表中没有可用的连接,且当前Active连接数 < MaxActive, 则等待
23 | Wait: true,
24 | Dial: func() (redis.Conn, error) {
25 | conn, err := redis.Dial("tcp", cacheSetting.Host)
26 | if err != nil {
27 | return nil, err
28 | }
29 | return conn, nil
30 | },
31 | TestOnBorrow: func(conn redis.Conn, t time.Time) error {
32 | if time.Since(t) < time.Minute {
33 | return nil
34 | }
35 | _, err := conn.Do("PING")
36 | return err
37 | },
38 | }, nil
39 | }
40 |
--------------------------------------------------------------------------------
/pkg/cache/cacheOperation.go:
--------------------------------------------------------------------------------
1 | package cache
2 |
3 | import (
4 | "github.com/garyburd/redigo/redis"
5 |
6 | "go-crawler-distributed/global"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-12-29 11:20
12 | * @Description:
13 | **/
14 |
15 | func SetString(key string, value string) (string, error) {
16 | c := global.RedisEngine.Get()
17 | defer c.Close()
18 | str, err := redis.String(c.Do("set", key, value))
19 | if err != nil {
20 | return "", err
21 | }
22 | return str, err
23 | }
24 |
25 | func GetString(key string) (string, error) {
26 | c := global.RedisEngine.Get()
27 | defer c.Close()
28 | str, err := redis.String(c.Do("get", key))
29 | if err != nil {
30 | return "", err
31 | }
32 | return str, err
33 | }
34 |
35 | func AddElementToSet(key string, value string) (int, error) {
36 | c := global.RedisEngine.Get()
37 | defer c.Close()
38 | result, err := redis.Int(c.Do("sadd", key, value))
39 | if err != nil {
40 | return -1, err
41 | }
42 | return result, err
43 | }
44 |
45 | func ElementIsInSet(key string, value string) (bool, error) {
46 | c := global.RedisEngine.Get()
47 | defer c.Close()
48 | result, err := redis.Int(c.Do("sismember", key, value))
49 | if err != nil {
50 | return false, err
51 | }
52 | if result == 1 {
53 | return true, err
54 | }
55 | return false, err
56 | }
57 |
58 | func GetAllElementFromSet(key string) ([]string, error) {
59 | c := global.RedisEngine.Get()
60 | defer c.Close()
61 | return redis.Strings(c.Do("smembers", key))
62 | }
63 |
64 | func DelAllElementFromSet(key string) (int, error) {
65 | c := global.RedisEngine.Get()
66 | defer c.Close()
67 | return redis.Int(c.Do("DEL", key))
68 | }
69 |
--------------------------------------------------------------------------------
/pkg/cache/cache_test.go:
--------------------------------------------------------------------------------
1 | package cache
2 |
3 | import (
4 | "fmt"
5 | "go-crawler-distributed/global"
6 | "go-crawler-distributed/pkg/setting"
7 | "strings"
8 | "testing"
9 | )
10 |
11 | /**
12 | * @Author: super
13 | * @Date: 2020-12-29 11:31
14 | * @Description:
15 | **/
16 |
17 | func TestAddElementToSet(t *testing.T) {
18 | newSetting, err := setting.NewSetting(strings.Split("/Users/super/develop/superTools-frontground-backend/configs", ",")...)
19 | if err != nil {
20 | t.Error(err)
21 | }
22 | err = newSetting.ReadSection("Cache", &global.CacheSetting)
23 | if err != nil {
24 | t.Error(err)
25 | }
26 | global.RedisEngine, err = NewRedisEngine(global.CacheSetting)
27 | if err != nil {
28 | t.Error(err)
29 | }
30 | result, err := AddElementToSet("hello", "1")
31 | if err != nil {
32 | t.Error(err)
33 | }
34 | fmt.Println(result)
35 | }
36 |
37 | func BenchmarkAddElementToSet(b *testing.B) {
38 | newSetting, err := setting.NewSetting(strings.Split("/Users/super/develop/superTools-frontground-backend/configs", ",")...)
39 | if err != nil {
40 | b.Error(err)
41 | }
42 | err = newSetting.ReadSection("Cache", &global.CacheSetting)
43 | if err != nil {
44 | b.Error(err)
45 | }
46 | global.RedisEngine, err = NewRedisEngine(global.CacheSetting)
47 | if err != nil {
48 | b.Error(err)
49 | }
50 | for i := 0; i < b.N; i++ {
51 | _, err := AddElementToSet("hello", "1")
52 | if err != nil {
53 | b.Error(err)
54 | }
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/pkg/consistentHash/consistent.go:
--------------------------------------------------------------------------------
1 | package consistentHash
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-12-16 16:56
6 | * @Description: 一致性哈希算法实现
7 | **/
8 | import (
9 | "errors"
10 | "hash/crc32"
11 | "sort"
12 | "strconv"
13 | "sync"
14 | )
15 |
16 | //实现一致性哈希算法
17 | //声明新切片类型
18 | type units []uint32
19 |
20 | //返回切片长度
21 | func (x units) Len() int {
22 | return len(x)
23 | }
24 |
25 | //比对两个数大小
26 | func (x units) Less(i, j int) bool {
27 | return x[i] < x[j]
28 | }
29 |
30 | //切片中两个值的交换
31 | func (x units) Swap(i, j int) {
32 | x[i], x[j] = x[j], x[i]
33 | }
34 |
35 | //当hash环上没有数据时,提示错误
36 | var errEmpty = errors.New("hash环没有数据")
37 |
38 | //创建结构体保存一致性hash信息
39 | type Consistent struct {
40 | //hash环,key为哈希值,值存放节点的信息
41 | circle map[uint32]string
42 | //已经排序的节点hash切片
43 | sortedHashes units
44 | //虚拟节点个数,用来增加hash的平衡性
45 | VirtualNode int
46 | //map 读写锁
47 | sync.RWMutex
48 | }
49 |
50 | //创建一致性hash算法结构体,设置默认节点数量
51 | func NewConsistent() *Consistent {
52 | return &Consistent{
53 | //初始化变量
54 | circle: make(map[uint32]string),
55 | //设置虚拟节点个数
56 | VirtualNode: 20,
57 | }
58 | }
59 |
60 | //自动生成key值
61 | func (c *Consistent) generateKey(element string, index int) string {
62 | //副本key生成逻辑
63 | return element + strconv.Itoa(index)
64 | }
65 |
66 | //获取hash位置
67 | func (c *Consistent) hashkey(key string) uint32 {
68 | if len(key) < 64 {
69 | //声明一个数组长度为64
70 | var srcatch [64]byte
71 | //拷贝数据到数组中
72 | copy(srcatch[:], key)
73 | //使用IEEE 多项式返回数据的CRC-32校验和
74 | return crc32.ChecksumIEEE(srcatch[:len(key)])
75 | }
76 | return crc32.ChecksumIEEE([]byte(key))
77 | }
78 |
79 | //更新排序,方便查找
80 | func (c *Consistent) updateSortedHashes() {
81 | hashes := c.sortedHashes[:0]
82 | //判断切片容量,是否过大,如果过大则重置
83 | if cap(c.sortedHashes)/(c.VirtualNode*4) > len(c.circle) {
84 | hashes = nil
85 | }
86 |
87 | //添加hashes
88 | for k := range c.circle {
89 | hashes = append(hashes, k)
90 | }
91 |
92 | //对所有节点hash值进行排序,
93 | //方便之后进行二分查找
94 | sort.Sort(hashes)
95 | //重新赋值
96 | c.sortedHashes = hashes
97 |
98 | }
99 |
100 | //向hash环中添加节点
101 | func (c *Consistent) Add(element string) {
102 | //加锁
103 | c.Lock()
104 | //解锁
105 | defer c.Unlock()
106 | c.add(element)
107 | }
108 |
109 | //添加节点
110 | func (c *Consistent) add(element string) {
111 | //循环虚拟节点,设置副本
112 | for i := 0; i < c.VirtualNode; i++ {
113 | //根据生成的节点添加到hash环中
114 | c.circle[c.hashkey(c.generateKey(element, i))] = element
115 | }
116 | //更新排序
117 | c.updateSortedHashes()
118 | }
119 |
120 | //删除节点
121 | func (c *Consistent) remove(element string) {
122 | for i := 0; i < c.VirtualNode; i++ {
123 | delete(c.circle, c.hashkey(c.generateKey(element, i)))
124 | }
125 | c.updateSortedHashes()
126 | }
127 |
128 | //删除一个节点
129 | func (c *Consistent) Remove(element string) {
130 | c.Lock()
131 | defer c.Unlock()
132 | c.remove(element)
133 | }
134 |
135 | //顺时针查找最近的节点
136 | func (c *Consistent) search(key uint32) int {
137 | //查找算法
138 | f := func(x int) bool {
139 | return c.sortedHashes[x] > key
140 | }
141 | //使用"二分查找"算法来搜索指定切片满足条件的最小值
142 | i := sort.Search(len(c.sortedHashes), f)
143 | //如果超出范围则设置i=0
144 | if i >= len(c.sortedHashes) {
145 | i = 0
146 | }
147 | return i
148 | }
149 |
150 | //根据数据标示获取最近的服务器节点信息
151 | func (c *Consistent) Get(name string) (string, error) {
152 | //添加锁
153 | c.RLock()
154 | //解锁
155 | defer c.RUnlock()
156 | //如果为零则返回错误
157 | if len(c.circle) == 0 {
158 | return "", errEmpty
159 | }
160 | //计算hash值
161 | key := c.hashkey(name)
162 | i := c.search(key)
163 | return c.circle[c.sortedHashes[i]], nil
164 | }
165 |
--------------------------------------------------------------------------------
/pkg/convert/convert.go:
--------------------------------------------------------------------------------
1 | package convert
2 |
3 | import "strconv"
4 |
5 | /**
6 | * @Author: super
7 | * @Date: 2020-09-18 10:10
8 | * @Description: 用于处理数据转换
9 | **/
10 |
11 | type StrTo string
12 |
13 | func (s StrTo) String() string {
14 | return string(s)
15 | }
16 |
17 | func (s StrTo) Int() (int, error) {
18 | v, err := strconv.Atoi(s.String())
19 | return v, err
20 | }
21 |
22 | func (s StrTo) MustInt() int {
23 | v, _ := s.Int()
24 | return v
25 | }
26 |
27 | func (s StrTo) UInt32() (uint32, error) {
28 | v, err := strconv.Atoi(s.String())
29 | return uint32(v), err
30 | }
31 |
32 | func (s StrTo) MustUInt32() uint32 {
33 | v, _ := s.UInt32()
34 | return v
35 | }
36 |
37 | func (s StrTo) MustInt64() int64 {
38 | v := s.MustInt()
39 | return int64(v)
40 | }
41 |
--------------------------------------------------------------------------------
/pkg/db/db.go:
--------------------------------------------------------------------------------
1 | package db
2 |
3 | import (
4 | "fmt"
5 | "go-crawler-distributed/global"
6 | "go-crawler-distributed/pkg/otgorm"
7 | "go-crawler-distributed/pkg/setting"
8 | "time"
9 |
10 | "github.com/jinzhu/gorm"
11 | _ "github.com/jinzhu/gorm/dialects/mysql"
12 | )
13 |
14 | /**
15 | * @Author: super
16 | * @Date: 2020-09-16 07:42
17 | * @Description: 统一定义数据库公共字段
18 | **/
19 |
20 | const (
21 | STATE_OPEN = 1
22 | STATE_CLOSE = 0
23 | )
24 |
25 | //根据配置获取对应的db连接
26 | func NewDBEngine(databaseSetting *setting.DatabaseSettingS) (*gorm.DB, error) {
27 | db, err := gorm.Open(databaseSetting.DBType, fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=%s&parseTime=%t&loc=Local",
28 | databaseSetting.UserName,
29 | databaseSetting.Password,
30 | databaseSetting.Host,
31 | databaseSetting.DBName,
32 | databaseSetting.Charset,
33 | databaseSetting.ParseTime,
34 | ))
35 | if err != nil {
36 | return nil, err
37 | }
38 |
39 | if global.ServerSetting.RunMode == "debug" {
40 | db.LogMode(true)
41 | }
42 | db.SingularTable(true)
43 | db.Callback().Create().Replace("gorm:update_time_stamp", updateTimeStampForCreateCallback)
44 | db.Callback().Update().Replace("gorm:update_time_stamp", updateTimeStampForUpdateCallback)
45 | db.Callback().Delete().Replace("gorm:delete", deleteCallback)
46 | db.DB().SetMaxIdleConns(databaseSetting.MaxIdleConns)
47 | db.DB().SetMaxOpenConns(databaseSetting.MaxOpenConns)
48 | otgorm.AddGormCallbacks(db)
49 | return db, nil
50 | }
51 |
52 | //以下内容用于定义回调,统一填充公共字段
53 | func updateTimeStampForCreateCallback(scope *gorm.Scope) {
54 | if !scope.HasError() {
55 | nowTime := time.Now().Format("2006-01-02 15:04:05")
56 |
57 | if createTimeField, ok := scope.FieldByName("CreatedOn"); ok {
58 | if createTimeField.IsBlank {
59 | _ = createTimeField.Set(nowTime)
60 | }
61 | }
62 |
63 | if modifyTimeField, ok := scope.FieldByName("ModifiedOn"); ok {
64 | if modifyTimeField.IsBlank {
65 | _ = modifyTimeField.Set(nowTime)
66 | }
67 | }
68 | }
69 | }
70 |
71 | func updateTimeStampForUpdateCallback(scope *gorm.Scope) {
72 | if _, ok := scope.Get("gorm:update_column"); !ok {
73 | nowTime := time.Now().Format("2006-01-02 15:04:05")
74 | _ = scope.SetColumn("ModifiedOn", nowTime)
75 | }
76 | }
77 |
78 | func deleteCallback(scope *gorm.Scope) {
79 | if !scope.HasError() {
80 | var extraOption string
81 | if str, ok := scope.Get("gorm:delete_option"); ok {
82 | extraOption = fmt.Sprint(str)
83 | }
84 |
85 | deletedOnField, hasDeletedOnField := scope.FieldByName("DeletedOn")
86 | isDelField, hasIsDelField := scope.FieldByName("IsDel")
87 | if !scope.Search.Unscoped && hasDeletedOnField && hasIsDelField {
88 | nowTime := time.Now().Format("2006-01-02 15:04:05")
89 | scope.Raw(fmt.Sprintf(
90 | "UPDATE %v SET %v=%v,%v=%v%v%v",
91 | scope.QuotedTableName(),
92 | scope.Quote(deletedOnField.DBName),
93 | scope.AddToVars(nowTime),
94 | scope.Quote(isDelField.DBName),
95 | scope.AddToVars(1),
96 | addExtraSpaceIfExist(scope.CombinedConditionSql()),
97 | addExtraSpaceIfExist(extraOption),
98 | )).Exec()
99 | } else {
100 | scope.Raw(fmt.Sprintf(
101 | "DELETE FROM %v%v%v",
102 | scope.QuotedTableName(),
103 | addExtraSpaceIfExist(scope.CombinedConditionSql()),
104 | addExtraSpaceIfExist(extraOption),
105 | )).Exec()
106 | }
107 | }
108 | }
109 |
110 | func addExtraSpaceIfExist(str string) string {
111 | if str != "" {
112 | return " " + str
113 | }
114 | return ""
115 | }
116 |
--------------------------------------------------------------------------------
/pkg/elastic/elastic.go:
--------------------------------------------------------------------------------
1 | package elastic
2 |
3 | import (
4 | "github.com/olivere/elastic/v7"
5 |
6 | "go-crawler-distributed/pkg/setting"
7 |
8 | "time"
9 | )
10 |
11 | /**
12 | * @Author: super
13 | * @Date: 2020-12-29 11:51
14 | * @Description:
15 | **/
16 |
17 | func NewElasticEngine(elasticSetting *setting.ElasticSettingS) (*elastic.Client, error) {
18 | var client *elastic.Client
19 | for i := 0; i < 10; i++ {
20 | // Ping the server by sending a GET request to `/health`.
21 | var err error
22 | client, err = elastic.NewClient(
23 | elastic.SetURL(elasticSetting.Url),
24 | elastic.SetSniff(false))
25 | if err == nil {
26 | return nil, err
27 | }
28 | time.Sleep(time.Second)
29 | }
30 | return client, nil
31 | }
32 |
--------------------------------------------------------------------------------
/pkg/elastic/elasticOpeartion.go:
--------------------------------------------------------------------------------
1 | package elastic
2 |
3 | import (
4 | "context"
5 | "github.com/olivere/elastic/v7"
6 | "go-crawler-distributed/internal/model"
7 | "reflect"
8 |
9 | "go-crawler-distributed/global"
10 | )
11 |
12 | /**
13 | * @Author: super
14 | * @Date: 2020-12-29 11:58
15 | * @Description:
16 | **/
17 | const Mapping = `
18 | {
19 | "mappings": {
20 | "properties": {
21 | "title": {
22 | "type": "text"
23 | },
24 | "url": {
25 | "type": "text"
26 | },
27 | "genres": {
28 | "type": "keyword"
29 | },
30 | "content": {
31 | "type": "text"
32 | }
33 | }
34 | }
35 | }`
36 |
37 | //判断index是否存在
38 | func IndexExist(index string) (bool, error) {
39 | client := global.ElasticEngine
40 |
41 | exist, err := client.IndexExists(index).Do(context.Background())
42 | if err != nil {
43 | return exist, err
44 | }
45 | if !exist {
46 | _, err := client.CreateIndex(index).BodyString(Mapping).Do(context.Background())
47 | if err != nil {
48 | return false, err
49 | }
50 | }
51 | return true, err
52 | }
53 |
54 | //保存信息
55 | func SaveInfo(table string, data interface{}) (string, error) {
56 | client := global.ElasticEngine
57 | // https://www.letianbiji.com/elasticsearch/es7-quick-start.html
58 | // 在v7中Type被注释
59 | // ES 实例:对应 MySQL 实例中的一个 Database。
60 | // Index 对应 MySQL 中的 Table
61 | // Document 对应 MySQL 中表的记录。
62 | response, err := client.Index().Index(table).BodyJson(data).Do(context.Background())
63 | if err != nil {
64 | return "", err
65 | }
66 | return response.Id, nil
67 | }
68 |
69 | //获取信息
70 | func GetInfo(table string, id string) (*model.Article, error) {
71 | client := global.ElasticEngine
72 | result, err := client.Get().Index(table).Id(id).Do(context.Background())
73 | if err != nil {
74 | return nil, err
75 | }
76 | article := &model.Article{}
77 | err = article.UnmarshalJSON(result.Source)
78 | if err != nil {
79 | return nil, err
80 | }
81 | return article, nil
82 | }
83 |
84 | //搜索信息
85 | func SearchInfo(table string, fieldName string, fieldValue string) ([]*model.Article, error) {
86 | query := elastic.NewTermQuery(fieldName, fieldValue)
87 | client := global.ElasticEngine
88 | result, err := client.Search().Index(table).Query(query).Do(context.Background())
89 | if err != nil {
90 | return nil, err
91 | }
92 | articles := make([]*model.Article, 0)
93 | article := model.Article{}
94 | total := result.TotalHits()
95 | if total > 0 {
96 | for _, item := range result.Each(reflect.TypeOf(article)) {
97 | if t, ok := item.(model.Article); ok {
98 | articles = append(articles, &t)
99 | }
100 | }
101 | }
102 | return articles, nil
103 | }
104 |
--------------------------------------------------------------------------------
/pkg/email/email.go:
--------------------------------------------------------------------------------
1 | package email
2 |
3 | import (
4 | "crypto/tls"
5 | "gopkg.in/gomail.v2"
6 | )
7 |
8 | /**
9 | * @Author: super
10 | * @Date: 2020-09-23 20:37
11 | * @Description: 使用gomail库发送邮件
12 | **/
13 |
14 | type Email struct {
15 | *SMTPInfo
16 | }
17 |
18 | type SMTPInfo struct {
19 | Host string
20 | Port int
21 | IsSSL bool
22 | UserName string
23 | Password string
24 | From string
25 | }
26 |
27 | func NewEmail(info *SMTPInfo) *Email {
28 | return &Email{SMTPInfo: info}
29 | }
30 |
31 | func (e *Email) SendMail(to []string, subject, body string) error {
32 | m := gomail.NewMessage()
33 | m.SetHeader("From", e.From)
34 | m.SetHeader("To", to...)
35 | m.SetHeader("Subject", subject)
36 | m.SetBody("text/html", body)
37 |
38 | dialer := gomail.NewDialer(e.Host, e.Port, e.UserName, e.Password)
39 | dialer.TLSConfig = &tls.Config{InsecureSkipVerify: e.IsSSL}
40 | return dialer.DialAndSend(m)
41 | }
42 |
--------------------------------------------------------------------------------
/pkg/errcode/common_code.go:
--------------------------------------------------------------------------------
1 | package errcode
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-09-18 07:53
6 | * @Description: 统一错误代码
7 | **/
8 |
9 | var (
10 | Success = NewError(0, "成功")
11 | ServerError = NewError(10000000, "服务内部错误")
12 | InvalidParams = NewError(10000001, "入参错误")
13 | NotFound = NewError(10000002, "找不到")
14 | )
15 |
--------------------------------------------------------------------------------
/pkg/errcode/ercd_code.go:
--------------------------------------------------------------------------------
1 | package errcode
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2021-02-06 19:41
6 | * @Description:
7 | **/
8 |
9 | var (
10 | ErrorSaveFail = NewError(30060001, "存储数据到ETCD失败")
11 | ErrorDeleteFail = NewError(30060002, "ETCD删除数据失败")
12 | ErrorListFail = NewError(30060003, "ETCD获取数据列表失败")
13 | ErrorLogListFail = NewError(30060004, "获取日志列表失败")
14 | ErrorWorkerListFail = NewError(30060005, "获取worker列表失败")
15 | )
16 |
--------------------------------------------------------------------------------
/pkg/errcode/errcode.go:
--------------------------------------------------------------------------------
1 | package errcode
2 |
3 | import (
4 | "fmt"
5 | "net/http"
6 | )
7 |
8 | /**
9 | * @Author: super
10 | * @Date: 2020-09-18 08:09
11 | * @Description: 统一错误代码
12 | **/
13 |
14 | type Error struct {
15 | code int `json:"code"`
16 | msg string `json:"msg"`
17 | details []string `json:"details"`
18 | }
19 |
20 | var codes = map[int]string{}
21 |
22 | func NewError(code int, msg string) *Error {
23 | if _, ok := codes[code]; ok {
24 | panic(fmt.Sprintf("错误码%d已经存在,请更换一个", code))
25 | }
26 | codes[code] = msg
27 | return &Error{code: code, msg: msg}
28 | }
29 |
30 | func (e *Error) Error() string {
31 | return fmt.Sprintf("错误码:%d, 错误信息::%s", e.Code(), e.Msg())
32 | }
33 |
34 | func (e *Error) Code() int {
35 | return e.code
36 | }
37 |
38 | func (e *Error) Msg() string {
39 | return e.msg
40 | }
41 |
42 | func (e *Error) Msgf(args []interface{}) string {
43 | return fmt.Sprintf(e.msg, args...)
44 | }
45 |
46 | func (e *Error) Details() []string {
47 | return e.details
48 | }
49 |
50 | func (e *Error) WithDetails(details ...string) *Error {
51 | newError := *e
52 | newError.details = []string{}
53 | for _, d := range details {
54 | newError.details = append(newError.details, d)
55 | }
56 |
57 | return &newError
58 | }
59 |
60 | func (e *Error) StatusCode() int {
61 | switch e.Code() {
62 | case Success.Code():
63 | return http.StatusOK
64 | case ServerError.Code():
65 | return http.StatusInternalServerError
66 | case InvalidParams.Code():
67 | return http.StatusBadRequest
68 | }
69 |
70 | return http.StatusInternalServerError
71 | }
72 |
--------------------------------------------------------------------------------
/pkg/errcode/user.go:
--------------------------------------------------------------------------------
1 | package errcode
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-11-24 18:45
6 | * @Description:
7 | **/
8 |
9 | var (
10 | ErrorUserSignInFail = NewError(20060001, "用户登录失败")
11 | ErrorUserUpdateFail = NewError(20060002, "用户信息更新失败")
12 | ErrorUserRegisterFail = NewError(20060003, "用户注册失败")
13 | ErrorUserCookieFail = NewError(20060004, "用户cookie验证失败")
14 | )
15 |
--------------------------------------------------------------------------------
/pkg/etcd/etcd.go:
--------------------------------------------------------------------------------
1 | package etcd
2 |
3 | import (
4 | "github.com/coreos/etcd/clientv3"
5 | "go-crawler-distributed/pkg/setting"
6 | "time"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2021-02-06 18:22
12 | * @Description:
13 | **/
14 |
15 | func NewEtcdEngine(etcdSetting *setting.EtcdSettingS) (client *clientv3.Client, kv clientv3.KV, lease clientv3.Lease, watcher clientv3.Watcher, err error) {
16 | config := clientv3.Config{
17 | Endpoints: []string{etcdSetting.Endpoint},
18 | DialTimeout: time.Duration(etcdSetting.DialTimeout) * time.Millisecond,
19 | }
20 | if client, err = clientv3.New(config); err != nil {
21 | return
22 | }
23 | kv = clientv3.NewKV(client)
24 | lease = clientv3.NewLease(client)
25 | watcher = clientv3.NewWatcher(client)
26 | return
27 | }
28 |
--------------------------------------------------------------------------------
/pkg/file/file.go:
--------------------------------------------------------------------------------
1 | package file
2 |
3 | import (
4 | "io/ioutil"
5 | "mime/multipart"
6 | "os"
7 | "path"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2020-08-22 10:45
13 | * @Description:
14 | **/
15 |
16 | func GetSize(f multipart.File) (int, error) {
17 | content, err := ioutil.ReadAll(f)
18 |
19 | return len(content), err
20 | }
21 |
22 | func GetExt(fileName string) string {
23 | return path.Ext(fileName)
24 | }
25 |
26 | func CheckNotExist(src string) bool {
27 | _, err := os.Stat(src)
28 |
29 | return os.IsNotExist(err)
30 | }
31 |
32 | func CheckPermission(src string) bool {
33 | _, err := os.Stat(src)
34 |
35 | return os.IsPermission(err)
36 | }
37 |
38 | func IsNotExistMkDir(src string) error {
39 | if notExist := CheckNotExist(src); notExist == true {
40 | if err := MkDir(src); err != nil {
41 | return err
42 | }
43 | }
44 |
45 | return nil
46 | }
47 |
48 | func MkDir(src string) error {
49 | err := os.MkdirAll(src, os.ModePerm)
50 | if err != nil {
51 | return err
52 | }
53 |
54 | return nil
55 | }
56 |
57 | func Open(name string, flag int, perm os.FileMode) (*os.File, error) {
58 | f, err := os.OpenFile(name, flag, perm)
59 | if err != nil {
60 | return nil, err
61 | }
62 |
63 | return f, nil
64 | }
65 |
--------------------------------------------------------------------------------
/pkg/idGenerator/idGenerator.go:
--------------------------------------------------------------------------------
1 | package idGenerator
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-11-24 13:45
6 | * @Description:
7 | **/
8 |
9 | import (
10 | "github.com/bwmarrin/snowflake"
11 |
12 | "strconv"
13 | )
14 |
15 | /**
16 | * @Author: super
17 | * @Date: 2020-09-09 22:04
18 | * @Description: 雪花算法介绍:https://juejin.im/post/6844903562007314440
19 | **/
20 |
21 | var node *snowflake.Node
22 |
23 | // InitSnowflake initiate Snowflake node singleton.
24 | func InitSnowflake() error {
25 | // Get node number from env TIX_NODE_NO
26 | //key, ok := os.LookupEnv("TIX_NODE_NO")
27 | //if !ok {
28 | // return fmt.Errorf("TIX_NODE_NO is not set in system environment")
29 | //}
30 | // Parse node number
31 | key := "1"
32 | nodeNo, err := strconv.ParseInt(key, 10, 64)
33 | if err != nil {
34 | return err
35 | }
36 | // Create snowflake node
37 | n, err := snowflake.NewNode(nodeNo)
38 | if err != nil {
39 | return err
40 | }
41 | // Set node
42 | node = n
43 | return nil
44 | }
45 |
46 | // GenerateSnowflake generate Twitter Snowflake ID
47 | func GenerateID() string {
48 | return node.Generate().String()
49 | }
50 |
--------------------------------------------------------------------------------
/pkg/idGenerator/idGenerator_test.go:
--------------------------------------------------------------------------------
1 | package idGenerator
2 |
3 | import "testing"
4 |
5 | /**
6 | * @Author: super
7 | * @Date: 2020-11-24 13:57
8 | * @Description:
9 | **/
10 |
11 | func TestGenerateSnowflake(t *testing.T) {
12 | err := InitSnowflake()
13 | if err != nil {
14 | t.Error(err)
15 | }
16 | id := GenerateID()
17 | t.Log(id)
18 | }
19 |
--------------------------------------------------------------------------------
/pkg/ipParser/ipParser.go:
--------------------------------------------------------------------------------
1 | package ipParser
2 |
3 | import (
4 | "errors"
5 | "github.com/kayon/iploc"
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/pkg/setting"
8 | )
9 | /**
10 | * @Author: super
11 | * @Date: 2021-02-15 16:57
12 | * @Description:
13 | **/
14 |
15 | func NewIpParser(ipParserSetting *setting.IpParserSettingS)(locator *iploc.Locator, err error){
16 | locator, err = iploc.Open(ipParserSetting.FilePath)
17 | if err != nil {
18 | return
19 | }
20 | return
21 | }
22 |
23 | // 查询IP所属位置
24 | func GetIpLocationString(ip string) (string, error ){
25 | detail := global.IpParser.Find(ip)
26 | if detail != nil {
27 | return detail.String(), nil
28 | }else{
29 | return "", errors.New("can't find ip location")
30 | }
31 | }
--------------------------------------------------------------------------------
/pkg/ipParser/qqwry.utf8.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/pkg/ipParser/qqwry.utf8.dat
--------------------------------------------------------------------------------
/pkg/logger/logger.go:
--------------------------------------------------------------------------------
1 | package logger
2 |
3 | import (
4 | "context"
5 | "encoding/json"
6 | "fmt"
7 | "io"
8 | "log"
9 | "runtime"
10 | "time"
11 |
12 | "github.com/gin-gonic/gin"
13 | )
14 |
15 | /**
16 | * @Author: super
17 | * @Date: 2020-08-21 09:12
18 | * @Description: 方便后期进行统一配置
19 | **/
20 |
21 | type Level int8
22 |
23 | type Fields map[string]interface{}
24 |
25 | const (
26 | LevelDebug Level = iota
27 | LevelInfo
28 | LevelWarn
29 | LevelError
30 | LevelFatal
31 | LevelPanic
32 | )
33 |
34 | func (l Level) String() string {
35 | switch l {
36 | case LevelDebug:
37 | return "debug"
38 | case LevelInfo:
39 | return "info"
40 | case LevelWarn:
41 | return "warn"
42 | case LevelError:
43 | return "error"
44 | case LevelFatal:
45 | return "fatal"
46 | case LevelPanic:
47 | return "panic"
48 | }
49 | return ""
50 | }
51 |
52 | type Logger struct {
53 | newLogger *log.Logger
54 | ctx context.Context
55 | fields Fields
56 | callers []string
57 | }
58 |
59 | func NewLogger(w io.Writer, prefix string, flag int) *Logger {
60 | l := log.New(w, prefix, flag)
61 | return &Logger{newLogger: l}
62 | }
63 |
64 | func (l *Logger) clone() *Logger {
65 | nl := *l
66 | return &nl
67 | }
68 |
69 | func (l *Logger) WithFields(f Fields) *Logger {
70 | ll := l.clone()
71 | if ll.fields == nil {
72 | ll.fields = make(Fields)
73 | }
74 | for k, v := range f {
75 | ll.fields[k] = v
76 | }
77 | return ll
78 | }
79 |
80 | func (l *Logger) WithContext(ctx context.Context) *Logger {
81 | ll := l.clone()
82 | ll.ctx = ctx
83 | return ll
84 | }
85 |
86 | func (l *Logger) WithCaller(skip int) *Logger {
87 | ll := l.clone()
88 | pc, file, line, ok := runtime.Caller(skip)
89 | if ok {
90 | f := runtime.FuncForPC(pc)
91 | ll.callers = []string{fmt.Sprintf("%s: %d %s", file, line, f.Name())}
92 | }
93 |
94 | return ll
95 | }
96 |
97 | func (l *Logger) WithCallersFrames() *Logger {
98 | maxCallerDepth := 25
99 | minCallerDepth := 1
100 | callers := []string{}
101 | pcs := make([]uintptr, maxCallerDepth)
102 | depth := runtime.Callers(minCallerDepth, pcs)
103 | frames := runtime.CallersFrames(pcs[:depth])
104 | for frame, more := frames.Next(); more; frame, more = frames.Next() {
105 | s := fmt.Sprintf("%s: %d %s", frame.File, frame.Line, frame.Function)
106 | callers = append(callers, s)
107 | if !more {
108 | break
109 | }
110 | }
111 | ll := l.clone()
112 | ll.callers = callers
113 | return ll
114 | }
115 |
116 | func (l *Logger) WithTrace() *Logger {
117 | ginCtx, ok := l.ctx.(*gin.Context)
118 | if ok {
119 | return l.WithFields(Fields{
120 | "trace_id": ginCtx.MustGet("X-Trace-ID"),
121 | "span_id": ginCtx.MustGet("X-Span-ID"),
122 | })
123 | }
124 | return l
125 | }
126 |
127 | func (l *Logger) JSONFormat(level Level, message string) map[string]interface{} {
128 | data := make(Fields, len(l.fields)+4)
129 | data["level"] = level.String()
130 | data["time"] = time.Now().Local().UnixNano()
131 | data["message"] = message
132 | data["callers"] = l.callers
133 | if len(l.fields) > 0 {
134 | for k, v := range l.fields {
135 | if _, ok := data[k]; !ok {
136 | data[k] = v
137 | }
138 | }
139 | }
140 |
141 | return data
142 | }
143 |
144 | func (l *Logger) Output(level Level, message string) {
145 | body, _ := json.Marshal(l.JSONFormat(level, message))
146 | content := string(body)
147 | switch level {
148 | case LevelDebug:
149 | l.newLogger.Print(content)
150 | case LevelInfo:
151 | l.newLogger.Print(content)
152 | case LevelWarn:
153 | l.newLogger.Print(content)
154 | case LevelError:
155 | l.newLogger.Print(content)
156 | case LevelFatal:
157 | l.newLogger.Fatal(content)
158 | case LevelPanic:
159 | l.newLogger.Panic(content)
160 | }
161 | }
162 |
163 | func (l *Logger) Debug(ctx context.Context, v ...interface{}) {
164 | l.WithContext(ctx).WithTrace().Output(LevelDebug, fmt.Sprint(v...))
165 | }
166 |
167 | func (l *Logger) Debugf(ctx context.Context, format string, v ...interface{}) {
168 | l.WithContext(ctx).WithTrace().Output(LevelDebug, fmt.Sprintf(format, v...))
169 | }
170 |
171 | func (l *Logger) Info(ctx context.Context, v ...interface{}) {
172 | l.WithContext(ctx).WithTrace().Output(LevelInfo, fmt.Sprint(v...))
173 | }
174 |
175 | func (l *Logger) Infof(ctx context.Context, format string, v ...interface{}) {
176 | l.WithContext(ctx).WithTrace().Output(LevelInfo, fmt.Sprintf(format, v...))
177 | }
178 |
179 | func (l *Logger) Warn(ctx context.Context, v ...interface{}) {
180 | l.WithContext(ctx).WithTrace().Output(LevelWarn, fmt.Sprint(v...))
181 | }
182 |
183 | func (l *Logger) Warnf(ctx context.Context, format string, v ...interface{}) {
184 | l.WithContext(ctx).WithTrace().Output(LevelWarn, fmt.Sprintf(format, v...))
185 | }
186 |
187 | func (l *Logger) Error(ctx context.Context, v ...interface{}) {
188 | l.WithContext(ctx).WithTrace().Output(LevelError, fmt.Sprint(v...))
189 | }
190 |
191 | func (l *Logger) Errorf(ctx context.Context, format string, v ...interface{}) {
192 | l.WithContext(ctx).WithTrace().Output(LevelError, fmt.Sprintf(format, v...))
193 | }
194 |
195 | func (l *Logger) Fatal(ctx context.Context, v ...interface{}) {
196 | l.WithContext(ctx).WithTrace().Output(LevelFatal, fmt.Sprint(v...))
197 | }
198 |
199 | func (l *Logger) Fatalf(ctx context.Context, format string, v ...interface{}) {
200 | l.WithContext(ctx).WithTrace().Output(LevelFatal, fmt.Sprintf(format, v...))
201 | }
202 |
203 | func (l *Logger) Panic(ctx context.Context, v ...interface{}) {
204 | l.WithContext(ctx).WithTrace().Output(LevelPanic, fmt.Sprint(v...))
205 | }
206 |
207 | func (l *Logger) Panicf(ctx context.Context, format string, v ...interface{}) {
208 | l.WithContext(ctx).WithTrace().Output(LevelPanic, fmt.Sprintf(format, v...))
209 | }
210 |
--------------------------------------------------------------------------------
/pkg/mongoDB/mongo.go:
--------------------------------------------------------------------------------
1 | package mongoDB
2 |
3 | import (
4 | "context"
5 | "go-crawler-distributed/pkg/setting"
6 | "go.mongodb.org/mongo-driver/mongo"
7 | "go.mongodb.org/mongo-driver/mongo/options"
8 | "go.mongodb.org/mongo-driver/mongo/readpref"
9 | "time"
10 | )
11 |
12 | /**
13 | * @Author: super
14 | * @Date: 2021-02-02 11:46
15 | * @Description:
16 | **/
17 |
18 | func NewMongoDBEngine(mongoDbSetting *setting.MongoDBSettingS) (*mongo.Client, error) {
19 | var client *mongo.Client
20 | var err error
21 | ctx, _ := context.WithTimeout(context.Background(), time.Duration(mongoDbSetting.Timeout)*time.Second)
22 | opt := options.Client().ApplyURI(mongoDbSetting.Url)
23 | opt.SetMaxPoolSize(mongoDbSetting.MaxPoolSize)
24 | if client, err = mongo.Connect(ctx, opt); err != nil {
25 | return nil, err
26 | } else {
27 | ctx2, _ := context.WithTimeout(context.Background(), time.Duration(mongoDbSetting.Timeout)*time.Second)
28 | err := client.Ping(ctx2, readpref.Primary())
29 | if err != nil {
30 | return nil, err
31 | }
32 | }
33 | return client, nil
34 | }
35 |
--------------------------------------------------------------------------------
/pkg/mq/consumer.go:
--------------------------------------------------------------------------------
1 | package mq
2 |
3 | import (
4 | "github.com/streadway/amqp"
5 |
6 | "go-crawler-distributed/global"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-12-29 13:47
12 | * @Description:
13 | **/
14 |
15 | func Consume(queueName string) (<-chan amqp.Delivery, error) {
16 | //1. 申请队列,如果队列不存在则自动创建,如果存在则获取存在的队列
17 | //保证队列存在,使消息发送到队列中
18 | _, err := global.RabbitMQEngine.Channel.QueueDeclare(queueName,
19 | //是否持久化
20 | false,
21 | //是否自动删除
22 | false,
23 | //是否具有排他性,独占队列
24 | false,
25 | //是否阻塞
26 | false,
27 | //额外属性
28 | nil,
29 | )
30 | if err != nil {
31 | return nil, err
32 | }
33 |
34 | //接受消息
35 | msgs, err := global.RabbitMQEngine.Channel.Consume(
36 | queueName,
37 | //用于区分多个不同的消费者
38 | "",
39 | //是否自动应答,也就是消费者消费一个队列后是否主动告知rabbitmq当前的消息我已经消费完
40 | //rabbitmq会根据这个判断是否可以删除该消息
41 | //为false的话要手动实现
42 | false,
43 | //是否具有排他性
44 | false,
45 | //如果为true不能在同一个connection中发送消息传递给当前conn的消费者
46 | false,
47 | false,
48 | nil,
49 | )
50 | if err != nil {
51 | return nil, err
52 | }
53 | return msgs, nil
54 | }
55 |
--------------------------------------------------------------------------------
/pkg/mq/producer.go:
--------------------------------------------------------------------------------
1 | package mq
2 |
3 | import (
4 | "github.com/streadway/amqp"
5 |
6 | "go-crawler-distributed/global"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-12-29 13:43
12 | * @Description:
13 | **/
14 |
15 | //发布消息
16 | func Publish(queueName string, msg []byte) error {
17 | //检查channel是否正常
18 | if err := initChannel(global.RabbitMQEngine, global.RabbitMQSetting); err != nil {
19 | return err
20 | }
21 |
22 | //1. 申请队列,如果队列不存在则自动创建,如果存在则获取存在的队列
23 | //保证队列存在,使消息发送到队列中
24 | _, err := global.RabbitMQEngine.Channel.QueueDeclare(queueName,
25 | //是否持久化
26 | false,
27 | //是否自动删除
28 | false,
29 | //是否具有排他性,独占队列
30 | false,
31 | //是否阻塞
32 | false,
33 | //额外属性
34 | nil,
35 | )
36 | if err != nil {
37 | return err
38 | }
39 |
40 | //2. 发送消息到队列中
41 | err = global.RabbitMQEngine.Channel.Publish(
42 | "",
43 | queueName,
44 | // 如果为true, 则根据exchange类型和routkey规则,如果无法找到符合条件的队列
45 | // 那么会把发送的消息回退给publish
46 | false,
47 | //如果为true,当exchange发送消息到队列后发现没有consume,则会把发送的消息返回给发送者
48 | false,
49 | amqp.Publishing{
50 | ContentType: "text/plain",
51 | Body: msg,
52 | },
53 | )
54 | if err != nil {
55 | return err
56 | }
57 | return nil
58 | }
59 |
--------------------------------------------------------------------------------
/pkg/mq/rabbitmq.go:
--------------------------------------------------------------------------------
1 | package mq
2 |
3 | import (
4 | "github.com/streadway/amqp"
5 |
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/pkg/setting"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2020-11-18 11:50
13 | * @Description: rabbitMQ连接池
14 | **/
15 |
16 | // 如果异常关闭,会接收通知
17 | var notifyClose chan *amqp.Error
18 |
19 | // Init : 初始化MQ连接信息
20 | func NewRabbitMQEngine(rabbitMQSetting *setting.RabbitMQSettingS) (*global.RabbitMQ, error) {
21 | rabbit := &global.RabbitMQ{}
22 | if err := initChannel(rabbit, rabbitMQSetting); err != nil {
23 | if rabbit.Channel != nil {
24 | rabbit.Channel.NotifyClose(notifyClose)
25 | }
26 | return nil, err
27 | }
28 | // 断线自动重连
29 | go func(rabbitMQ *global.RabbitMQ, rabbitMQSetting *setting.RabbitMQSettingS) {
30 | for {
31 | select {
32 | case _ = <-notifyClose:
33 | rabbit.Conn = nil
34 | rabbit.Channel = nil
35 | _ = initChannel(rabbitMQ, rabbitMQSetting)
36 | }
37 | }
38 | }(rabbit, rabbitMQSetting)
39 | return rabbit, nil
40 | }
41 |
42 | //初始化channel
43 | func initChannel(rabbitMQ *global.RabbitMQ, rabbitMQSetting *setting.RabbitMQSettingS) error {
44 | if rabbitMQ.Channel != nil {
45 | return nil
46 | }
47 | var err error
48 | rabbitHost := "amqp://" + rabbitMQSetting.UserName + ":" + rabbitMQSetting.Password + "@" + rabbitMQSetting.Host + "/"
49 | rabbitMQ.Conn, err = amqp.Dial(rabbitHost)
50 | if err != nil {
51 | return err
52 | }
53 |
54 | rabbitMQ.Channel, err = rabbitMQ.Conn.Channel()
55 | if err != nil {
56 | return err
57 | }
58 |
59 | return nil
60 | }
61 |
--------------------------------------------------------------------------------
/pkg/mq/rabbitmq_test.go:
--------------------------------------------------------------------------------
1 | package mq
2 |
3 | import (
4 | "fmt"
5 | "go-crawler-distributed/global"
6 | "go-crawler-distributed/pkg/setting"
7 | "strings"
8 | "testing"
9 | )
10 |
11 | /**
12 | * @Author: super
13 | * @Date: 2020-12-29 13:58
14 | * @Description:
15 | **/
16 |
17 | func TestPublish(t *testing.T) {
18 | newSetting, err := setting.NewSetting(strings.Split("/Users/super/develop/superTools-frontground-backend/configs", ",")...)
19 | if err != nil {
20 | t.Error(err)
21 | }
22 | err = newSetting.ReadSection("RabbitMQ", &global.RabbitMQSetting)
23 | if err != nil {
24 | t.Error(err)
25 | }
26 | global.RabbitMQEngine, err = NewRabbitMQEngine(global.RabbitMQSetting)
27 | if err != nil {
28 | t.Error(err)
29 | }
30 | err = Publish("test.oss", []byte("dddddddwedad"))
31 | if err != nil {
32 | t.Log(err)
33 | }
34 | }
35 |
36 | func TestConsume(t *testing.T) {
37 | newSetting, err := setting.NewSetting(strings.Split("/Users/super/develop/superTools-frontground-backend/configs", ",")...)
38 | if err != nil {
39 | t.Error(err)
40 | }
41 | err = newSetting.ReadSection("RabbitMQ", &global.RabbitMQSetting)
42 | if err != nil {
43 | t.Error(err)
44 | }
45 | global.RabbitMQEngine, err = NewRabbitMQEngine(global.RabbitMQSetting)
46 | if err != nil {
47 | t.Error(err)
48 | }
49 | msgs, err := Consume("test.oss")
50 | forever := make(chan bool)
51 | go func() {
52 | for d := range msgs {
53 | fmt.Println(d.Body)
54 | //实现其他的逻辑函数
55 | }
56 | }()
57 | <-forever
58 | }
59 |
--------------------------------------------------------------------------------
/pkg/otgorm/otgorm.go:
--------------------------------------------------------------------------------
1 | package otgorm
2 |
3 | import (
4 | "context"
5 | "fmt"
6 | "strings"
7 |
8 | "github.com/jinzhu/gorm"
9 | "github.com/opentracing/opentracing-go"
10 | "github.com/opentracing/opentracing-go/ext"
11 | )
12 |
13 | /**
14 | * @Author: super
15 | * @Date: 2020-09-24 08:35
16 | * @Description: gorm调用链追踪
17 | **/
18 |
19 | const (
20 | parentSpanGormKey = "opentracing:parent.span"
21 | spanGormKey = "opentracing:span"
22 | )
23 |
24 | // SetSpanToGorm sets span to gorm settings, returns cloned DB
25 | func WithContext(ctx context.Context, db *gorm.DB) *gorm.DB {
26 | if ctx == nil {
27 | return db
28 | }
29 | parentSpan := opentracing.SpanFromContext(ctx)
30 | if parentSpan == nil {
31 | return db
32 | }
33 | return db.Set(parentSpanGormKey, parentSpan)
34 | }
35 |
36 | // AddGormCallbacks adds callbacks for tracing, you should call SetSpanToGorm to make them work
37 | func AddGormCallbacks(db *gorm.DB) {
38 | callbacks := newCallbacks()
39 | registerCallbacks(db, "create", callbacks)
40 | registerCallbacks(db, "query", callbacks)
41 | registerCallbacks(db, "update", callbacks)
42 | registerCallbacks(db, "delete", callbacks)
43 | registerCallbacks(db, "row_query", callbacks)
44 | }
45 |
46 | type callbacks struct{}
47 |
48 | func newCallbacks() *callbacks {
49 | return &callbacks{}
50 | }
51 |
52 | func (c *callbacks) beforeCreate(scope *gorm.Scope) { c.before(scope) }
53 | func (c *callbacks) afterCreate(scope *gorm.Scope) { c.after(scope, "INSERT") }
54 | func (c *callbacks) beforeQuery(scope *gorm.Scope) { c.before(scope) }
55 | func (c *callbacks) afterQuery(scope *gorm.Scope) { c.after(scope, "SELECT") }
56 | func (c *callbacks) beforeUpdate(scope *gorm.Scope) { c.before(scope) }
57 | func (c *callbacks) afterUpdate(scope *gorm.Scope) { c.after(scope, "UPDATE") }
58 | func (c *callbacks) beforeDelete(scope *gorm.Scope) { c.before(scope) }
59 | func (c *callbacks) afterDelete(scope *gorm.Scope) { c.after(scope, "DELETE") }
60 | func (c *callbacks) beforeRowQuery(scope *gorm.Scope) { c.before(scope) }
61 | func (c *callbacks) afterRowQuery(scope *gorm.Scope) { c.after(scope, "") }
62 |
63 | func (c *callbacks) before(scope *gorm.Scope) {
64 | val, ok := scope.Get(parentSpanGormKey)
65 | if !ok {
66 | return
67 | }
68 | parentSpan := val.(opentracing.Span)
69 | tr := parentSpan.Tracer()
70 | sp := tr.StartSpan("sql", opentracing.ChildOf(parentSpan.Context()))
71 | ext.DBType.Set(sp, "sql")
72 | scope.Set(spanGormKey, sp)
73 | }
74 |
75 | func (c *callbacks) after(scope *gorm.Scope, operation string) {
76 | val, ok := scope.Get(spanGormKey)
77 | if !ok {
78 | return
79 | }
80 | sp := val.(opentracing.Span)
81 | if operation == "" {
82 | operation = strings.ToUpper(strings.Split(scope.SQL, " ")[0])
83 | }
84 | ext.Error.Set(sp, scope.HasError())
85 | ext.DBStatement.Set(sp, scope.SQL)
86 | sp.SetTag("db.table", scope.TableName())
87 | sp.SetTag("db.method", operation)
88 | sp.SetTag("db.err", scope.HasError())
89 | sp.SetTag("db.count", scope.DB().RowsAffected)
90 | sp.Finish()
91 | }
92 |
93 | func registerCallbacks(db *gorm.DB, name string, c *callbacks) {
94 | beforeName := fmt.Sprintf("tracing:%v_before", name)
95 | afterName := fmt.Sprintf("tracing:%v_after", name)
96 | gormCallbackName := fmt.Sprintf("gorm:%v", name)
97 | // gorm does some magic, if you pass CallbackProcessor here - nothing works
98 | switch name {
99 | case "create":
100 | db.Callback().Create().Before(gormCallbackName).Register(beforeName, c.beforeCreate)
101 | db.Callback().Create().After(gormCallbackName).Register(afterName, c.afterCreate)
102 | case "query":
103 | db.Callback().Query().Before(gormCallbackName).Register(beforeName, c.beforeQuery)
104 | db.Callback().Query().After(gormCallbackName).Register(afterName, c.afterQuery)
105 | case "update":
106 | db.Callback().Update().Before(gormCallbackName).Register(beforeName, c.beforeUpdate)
107 | db.Callback().Update().After(gormCallbackName).Register(afterName, c.afterUpdate)
108 | case "delete":
109 | db.Callback().Delete().Before(gormCallbackName).Register(beforeName, c.beforeDelete)
110 | db.Callback().Delete().After(gormCallbackName).Register(afterName, c.afterDelete)
111 | case "row_query":
112 | db.Callback().RowQuery().Before(gormCallbackName).Register(beforeName, c.beforeRowQuery)
113 | db.Callback().RowQuery().After(gormCallbackName).Register(afterName, c.afterRowQuery)
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/pkg/setting/section.go:
--------------------------------------------------------------------------------
1 | package setting
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-09-18 08:32
6 | * @Description: 系统设置
7 | **/
8 |
9 | import (
10 | "time"
11 | )
12 |
13 | type ServerSettingS struct {
14 | RunMode string
15 | HttpPort string
16 | ReadTimeout time.Duration
17 | WriteTimeout time.Duration
18 | }
19 |
20 | type AppSettingS struct {
21 | DefaultPageSize int
22 | MaxPageSize int
23 | DefaultContextTimeout time.Duration
24 | LogSavePath string
25 | LogFileName string
26 | LogFileExt string
27 | UploadSavePath string
28 | UploadServerUrl string
29 | UploadImageMaxSize int
30 | UploadImageAllowExts []string
31 | }
32 |
33 | type EmailSettingS struct {
34 | Host string
35 | Port int
36 | UserName string
37 | Password string
38 | IsSSL bool
39 | From string
40 | To []string
41 | }
42 |
43 | type JWTSettingS struct {
44 | Secret string
45 | Issuer string
46 | Expire time.Duration
47 | }
48 |
49 | type DatabaseSettingS struct {
50 | DBType string
51 | UserName string
52 | Password string
53 | Host string
54 | DBName string
55 | TablePrefix string
56 | Charset string
57 | ParseTime bool
58 | MaxIdleConns int
59 | MaxOpenConns int
60 | }
61 |
62 | type CacheSettingS struct {
63 | UserName string
64 | Password string
65 | Host string
66 | MaxIdle int
67 | MaxActive int
68 | }
69 |
70 | type RabbitMQSettingS struct {
71 | UserName string
72 | Password string
73 | Host string
74 | }
75 |
76 | type ElasticSettingS struct {
77 | Url string
78 | Index string
79 | }
80 |
81 | type ConsulSettingS struct {
82 | Url string
83 | ConfigPath string
84 | }
85 |
86 | type TracerSettingS struct {
87 | ServiceName string
88 | Host string
89 | }
90 |
91 | type MongoDBSettingS struct {
92 | Url string
93 | MaxPoolSize uint64
94 | Timeout int
95 | }
96 |
97 | type EtcdSettingS struct {
98 | Endpoint string
99 | DialTimeout int
100 | }
101 |
102 | type IpParserSettingS struct {
103 | FilePath string
104 | }
105 |
106 | var sections = make(map[string]interface{})
107 |
108 | func (s *Setting) ReadSection(k string, v interface{}) error {
109 | err := s.vp.UnmarshalKey(k, v)
110 | if err != nil {
111 | return err
112 | }
113 |
114 | if _, ok := sections[k]; !ok {
115 | sections[k] = v
116 | }
117 | return nil
118 | }
119 |
120 | func (s *Setting) ReloadAllSection() error {
121 | for k, v := range sections {
122 | err := s.ReadSection(k, v)
123 | if err != nil {
124 | return err
125 | }
126 | }
127 |
128 | return nil
129 | }
130 |
--------------------------------------------------------------------------------
/pkg/setting/setting.go:
--------------------------------------------------------------------------------
1 | package setting
2 |
3 | import (
4 | "github.com/fsnotify/fsnotify"
5 | "github.com/spf13/viper"
6 | )
7 |
8 | /**
9 | * @Author: super
10 | * @Date: 2020-09-18 08:28
11 | * @Description: 监听系统配置与section.go结合实现热更新
12 | **/
13 |
14 | type Setting struct {
15 | vp *viper.Viper
16 | }
17 |
18 | func NewSetting(configs ...string) (*Setting, error) {
19 | vp := viper.New()
20 | vp.SetConfigName("config")
21 | for _, config := range configs {
22 | if config != "" {
23 | vp.AddConfigPath(config)
24 | }
25 | }
26 | vp.SetConfigType("yaml")
27 | err := vp.ReadInConfig()
28 | if err != nil {
29 | return nil, err
30 | }
31 |
32 | s := &Setting{vp}
33 | s.WatchSettingChange()
34 | return s, nil
35 | }
36 |
37 | func (s *Setting) WatchSettingChange() {
38 | go func() {
39 | s.vp.WatchConfig()
40 | s.vp.OnConfigChange(func(in fsnotify.Event) {
41 | _ = s.ReloadAllSection()
42 | })
43 | }()
44 | }
45 |
--------------------------------------------------------------------------------
/pkg/tracer/tracer.go:
--------------------------------------------------------------------------------
1 | package tracer
2 |
3 | import (
4 | "io"
5 | "time"
6 |
7 | opentracing "github.com/opentracing/opentracing-go"
8 | "github.com/uber/jaeger-client-go/config"
9 | )
10 |
11 | /**
12 | * @Author: super
13 | * @Date: 2020-09-24 08:08
14 | * @Description: 调用链追踪
15 | **/
16 |
17 | func NewJaegerTracer(serviceName, agentHostPort string) (opentracing.Tracer, io.Closer, error) {
18 | cfg := &config.Configuration{
19 | ServiceName: serviceName,
20 | Sampler: &config.SamplerConfig{
21 | Type: "const",
22 | Param: 1,
23 | },
24 | Reporter: &config.ReporterConfig{
25 | LogSpans: true,
26 | BufferFlushInterval: 1 * time.Second,
27 | LocalAgentHostPort: agentHostPort,
28 | },
29 | }
30 | tracer, closer, err := cfg.NewTracer()
31 | if err != nil {
32 | return nil, nil, err
33 | }
34 | opentracing.SetGlobalTracer(tracer)
35 | return tracer, closer, nil
36 | }
37 |
--------------------------------------------------------------------------------
/pkg/upload/file.go:
--------------------------------------------------------------------------------
1 | package upload
2 |
3 | import (
4 | "go-crawler-distributed/global"
5 | "go-crawler-distributed/pkg/util"
6 |
7 | "io"
8 | "io/ioutil"
9 | "mime/multipart"
10 | "os"
11 | "path"
12 | "strings"
13 | )
14 |
15 | /**
16 | * @Author: super
17 | * @Date: 2020-09-23 19:02
18 | * @Description: 用于处理文件上传
19 | **/
20 |
21 | type FileType int
22 |
23 | const TypeImage FileType = iota + 1
24 |
25 | func GetFileName(name string) string {
26 | ext := GetFileExt(name)
27 | fileName := strings.TrimSuffix(name, ext)
28 | fileName = util.EncodeMD5(fileName)
29 |
30 | return fileName + ext
31 | }
32 |
33 | func GetFileExt(name string) string {
34 | return path.Ext(name)
35 | }
36 |
37 | func GetSavePath() string {
38 | return global.AppSetting.UploadSavePath
39 | }
40 |
41 | func GetServerUrl() string {
42 | return global.AppSetting.UploadServerUrl
43 | }
44 |
45 | func CheckSavePath(dst string) bool {
46 | _, err := os.Stat(dst)
47 |
48 | return os.IsNotExist(err)
49 | }
50 |
51 | func CheckContainExt(t FileType, name string) bool {
52 | ext := GetFileExt(name)
53 | ext = strings.ToUpper(ext)
54 | switch t {
55 | case TypeImage:
56 | for _, allowExt := range global.AppSetting.UploadImageAllowExts {
57 | if strings.ToUpper(allowExt) == ext {
58 | return true
59 | }
60 | }
61 | }
62 | return false
63 | }
64 |
65 | func CheckMaxSize(t FileType, f multipart.File) bool {
66 | content, _ := ioutil.ReadAll(f)
67 | size := len(content)
68 | switch t {
69 | case TypeImage:
70 | if size >= global.AppSetting.UploadImageMaxSize*1024*1024 {
71 | return true
72 | }
73 | }
74 | return false
75 | }
76 |
77 | func CheckPermission(dst string) bool {
78 | _, err := os.Stat(dst)
79 |
80 | return os.IsPermission(err)
81 | }
82 |
83 | func CreateSavePath(dst string, perm os.FileMode) error {
84 | err := os.MkdirAll(dst, perm)
85 | if err != nil {
86 | return err
87 | }
88 |
89 | return nil
90 | }
91 |
92 | func SaveFile(file *multipart.FileHeader, dst string) error {
93 | src, err := file.Open()
94 | if err != nil {
95 | return err
96 | }
97 | defer src.Close()
98 |
99 | out, err := os.Create(dst)
100 | if err != nil {
101 | return err
102 | }
103 | defer out.Close()
104 |
105 | _, err = io.Copy(out, src)
106 | return err
107 | }
108 |
--------------------------------------------------------------------------------
/pkg/util/aes.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "bytes"
5 | "crypto/aes"
6 | "crypto/cipher"
7 | "encoding/base64"
8 | "errors"
9 |
10 | "go-crawler-distributed/configs"
11 | )
12 |
13 | /**
14 | * @Author: super
15 | * @Date: 2020-12-06 14:02
16 | * @Description:
17 | **/
18 |
19 | //16,24,32位字符串分别对应AES-128,AES-192,AES-256算法
20 | var PwdKey = []byte(configs.PWD_KEY)
21 |
22 | //PKCS7 填充模式
23 | func PKCS7Padding(ciphertext []byte, blockSize int) []byte {
24 | padding := blockSize - len(ciphertext)%blockSize
25 | //Repeat()函数的功能是把切片[]byte{byte(padding)}复制padding个,然后合并成新的字节切片返回
26 | padtext := bytes.Repeat([]byte{byte(padding)}, padding)
27 | return append(ciphertext, padtext...)
28 | }
29 |
30 | //填充的反向操作,删除填充字符串
31 | func PKCS7UnPadding(origData []byte) ([]byte, error) {
32 | //获取数据长度
33 | length := len(origData)
34 | if length == 0 {
35 | return nil, errors.New("加密字符串错误!")
36 | } else {
37 | //获取填充字符串长度
38 | unpadding := int(origData[length-1])
39 | //截取切片,删除填充字节,并且返回明文
40 | return origData[:(length - unpadding)], nil
41 | }
42 | }
43 |
44 | //实现加密
45 | func AesEcrypt(origData []byte, key []byte) ([]byte, error) {
46 | //创建加密算法实例
47 | block, err := aes.NewCipher(key)
48 | if err != nil {
49 | return nil, err
50 | }
51 | //获取块的大小
52 | blockSize := block.BlockSize()
53 | //对数据进行填充,让数据长度满足需求
54 | origData = PKCS7Padding(origData, blockSize)
55 | //采用AES加密方法中CBC加密模式
56 | blocMode := cipher.NewCBCEncrypter(block, key[:blockSize])
57 | crypted := make([]byte, len(origData))
58 | //执行加密
59 | blocMode.CryptBlocks(crypted, origData)
60 | return crypted, nil
61 | }
62 |
63 | //实现解密
64 | func AesDeCrypt(cypted []byte, key []byte) ([]byte, error) {
65 | //创建加密算法实例
66 | block, err := aes.NewCipher(key)
67 | if err != nil {
68 | return nil, err
69 | }
70 | //获取块大小
71 | blockSize := block.BlockSize()
72 | //创建加密客户端实例
73 | blockMode := cipher.NewCBCDecrypter(block, key[:blockSize])
74 | origData := make([]byte, len(cypted))
75 | //这个函数也可以用来解密
76 | blockMode.CryptBlocks(origData, cypted)
77 | //去除填充字符串
78 | origData, err = PKCS7UnPadding(origData)
79 | if err != nil {
80 | return nil, err
81 | }
82 | return origData, err
83 | }
84 |
85 | //加密base64
86 | func EnPwdCode(pwd []byte) (string, error) {
87 | result, err := AesEcrypt(pwd, PwdKey)
88 | if err != nil {
89 | return "", err
90 | }
91 | return base64.StdEncoding.EncodeToString(result), err
92 | }
93 |
94 | //解密
95 | func DePwdCode(pwd string) ([]byte, error) {
96 | //解密base64字符串
97 | pwdByte, err := base64.StdEncoding.DecodeString(pwd)
98 | if err != nil {
99 | return nil, err
100 | }
101 | //执行AES解密
102 | return AesDeCrypt(pwdByte, PwdKey)
103 | }
104 |
--------------------------------------------------------------------------------
/pkg/util/base64.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "bytes"
5 | "compress/gzip"
6 | "encoding/base64"
7 | "io/ioutil"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2020-09-24 20:04
13 | * @Description: base64编码与解码
14 | **/
15 |
16 | //将文本通过gzip压缩后通过base64编码
17 | func EncodeBase64(value string) (string, error) {
18 | s := []byte(value)
19 | var b bytes.Buffer
20 | gz := gzip.NewWriter(&b)
21 | if _, err := gz.Write(s); err != nil {
22 | return "", err
23 | }
24 | if err := gz.Flush(); err != nil {
25 | return "", err
26 | }
27 | if err := gz.Close(); err != nil {
28 | return "", err
29 | }
30 | str := base64.StdEncoding.EncodeToString(b.Bytes())
31 | return str, nil
32 | }
33 |
34 | //将编码的base64字符串解码回原文本
35 | func DecodeBase64(value string) string {
36 | data, _ := base64.StdEncoding.DecodeString(value)
37 | rdata := bytes.NewReader(data)
38 | r, _ := gzip.NewReader(rdata)
39 | s, _ := ioutil.ReadAll(r)
40 | return string(s)
41 | }
42 |
--------------------------------------------------------------------------------
/pkg/util/base64_test.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import "testing"
4 |
5 | /**
6 | * @Author: super
7 | * @Date: 2020-09-24 20:08
8 | * @Description:
9 | **/
10 |
11 | func TestEncodeBase64(t *testing.T) {
12 | s, err := EncodeBase64("hello world")
13 | if err != nil {
14 | t.Error(err)
15 | }
16 | t.Log(s)
17 | }
18 |
19 | func TestDecodeBase64(t *testing.T) {
20 | s := DecodeBase64("H4sIAAAAAAAA/8pIzcnJVyjPL8pJAQAAAP//AQAA//+FEUoNCwAAAA==")
21 | t.Log(s)
22 | }
23 |
24 | func BenchmarkEncodeBase64(b *testing.B) {
25 | for i := 0; i < b.N; i++ {
26 | s, err := EncodeBase64("helloworldasdafsdfasfsdgadfgadfweaweterteggdfsgdsbdfbvxvczxvfasdfasdfasdfsadfsadfsadfsd")
27 | if err != nil {
28 | b.Error(err)
29 | }
30 | b.Log(s)
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/pkg/util/json.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import "encoding/json"
4 |
5 | /**
6 | * @Author: super
7 | * @Date: 2020-12-06 17:43
8 | * @Description:
9 | **/
10 |
11 | func EncodeToJson(object interface{}) (string, error) {
12 | encodeBytes, err := json.Marshal(object)
13 | if err != nil {
14 | return "", err
15 | }
16 | return string(encodeBytes), nil
17 | }
18 |
19 | func DecodeToStruct(input string) (interface{}, error) {
20 | var result interface{}
21 | err := json.Unmarshal([]byte(input), &result)
22 | if err != nil {
23 | return nil, err
24 | }
25 | return result, nil
26 | }
27 |
--------------------------------------------------------------------------------
/pkg/util/json_test.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | /**
8 | * @Author: super
9 | * @Date: 2020-12-06 17:45
10 | * @Description:
11 | **/
12 |
13 | type LoginUser struct {
14 | ID string `json:"id"`
15 | UserName string `json:"user_name"`
16 | IPAddress string `json:"ip_address"`
17 | }
18 |
19 | func TestEncodeToJson(t *testing.T) {
20 | loginUser := LoginUser{
21 | ID: "123",
22 | UserName: "username",
23 | IPAddress: "192.1.1.222",
24 | }
25 | result, err := EncodeToJson(loginUser)
26 | if err != nil {
27 | t.Error(err)
28 | }
29 | t.Log(result)
30 | }
31 |
32 | func BenchmarkEncodeToJson(b *testing.B) {
33 | loginUser := LoginUser{
34 | ID: "123",
35 | UserName: "username",
36 | IPAddress: "192.1.1.222",
37 | }
38 | for i := 0; i < b.N; i++ {
39 | _, err := EncodeToJson(loginUser)
40 | if err != nil {
41 | b.Error(err)
42 | }
43 | }
44 | }
45 |
46 | func TestDecodeToStruct(t *testing.T) {
47 | input := `{"id":"123","user_name":"username","ip_address":"192.1.1.222"}`
48 | result, err := DecodeToStruct(input)
49 | if err != nil {
50 | t.Error(err)
51 | }
52 | t.Log(result)
53 | }
54 |
--------------------------------------------------------------------------------
/pkg/util/jwt.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-08-22 09:13
6 | * @Description:
7 | **/
8 |
9 | import (
10 | "github.com/dgrijalva/jwt-go"
11 |
12 | "time"
13 | )
14 |
15 | var secret = ""
16 |
17 | var jwtSecret = []byte(secret)
18 |
19 | type Claims struct {
20 | Username string `json:"username"`
21 | Password string `json:"password"`
22 | jwt.StandardClaims
23 | }
24 |
25 | func GenerateToken(username, password string) (string, error) {
26 | nowTime := time.Now()
27 | expireTime := nowTime.Add(3 * time.Hour)
28 |
29 | claims := Claims{
30 | username,
31 | password,
32 | jwt.StandardClaims{
33 | ExpiresAt: expireTime.Unix(),
34 | Issuer: "gin-blog",
35 | },
36 | }
37 |
38 | tokenClaims := jwt.NewWithClaims(jwt.SigningMethodHS256, claims)
39 | token, err := tokenClaims.SignedString(jwtSecret)
40 |
41 | return token, err
42 | }
43 |
44 | func ParseToken(token string) (*Claims, error) {
45 | tokenClaims, err := jwt.ParseWithClaims(token, &Claims{}, func(token *jwt.Token) (interface{}, error) {
46 | return jwtSecret, nil
47 | })
48 |
49 | if tokenClaims != nil {
50 | if claims, ok := tokenClaims.Claims.(*Claims); ok && tokenClaims.Valid {
51 | return claims, nil
52 | }
53 | }
54 |
55 | return nil, err
56 | }
57 |
--------------------------------------------------------------------------------
/pkg/util/md5.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "crypto/md5"
5 | "encoding/hex"
6 | )
7 |
8 | /**
9 | * @Author: super
10 | * @Date: 2020-09-23 18:59
11 | * @Description:
12 | **/
13 |
14 | //字符串md5
15 | func EncodeMD5(value string) string {
16 | m := md5.New()
17 | m.Write([]byte(value))
18 | return hex.EncodeToString(m.Sum(nil))
19 | }
20 |
--------------------------------------------------------------------------------
/pkg/util/md5_test.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import "testing"
4 |
5 | /**
6 | * @Author: super
7 | * @Date: 2020-09-24 19:54
8 | * @Description:
9 | **/
10 |
11 | func TestEncodeMD5(t *testing.T) {
12 | var tests = []struct {
13 | in string
14 | out string
15 | }{
16 | {"", "d41d8cd98f00b204e9800998ecf8427e"},
17 | {"hello", "5d41402abc4b2a76b9719d911017c592"},
18 | {"小肥猫告别了小瘦猫,去大城市闯荡了。“再见了,小瘦猫,我一只猫也能过得很好。”小肥猫抹下最后一颗泪珠,赌着气给小瘦猫写下了这封诀别信。小瘦猫还在熟睡着,连小肥猫用力关门的“咣当”声都没能把它吵醒。小肥猫一步三回头,却始终没有猫来追它回去。小肥猫叹口气,只能自己一个人去讨生活。她向来是胆小的,面对未知有诸多恐惧。恰逢大雨,航班居然延误十二个小时。坐在旁边的旅客兔小姐对同伴说:“按照我往常的经验,这趟航班应该被取消了”。小肥猫本来坚定下来的心又开始犹犹豫豫打起了鼓,要不要回去呢,可是回去就意味着自己的妥协。就在小肥猫犹豫不决的时候,小瘦猫突然出现了,没有责怪,也没有问小肥猫想离开它之后去哪里,小瘦猫只是把小肥猫拥抱在怀里。小肥猫退了票,像个跟屁虫一样,被小瘦猫牵回了家。小肥猫余气未消,走着走着她甩开手:每次你都是这样,一句多余的话都不说,连我要去哪里都不问吗?小瘦猫当然知道它想赌气离开自己,虽然不知道自己哪里做错了,可能在对待女朋友这里还是觉悟不够高吧。小瘦猫回答说:我不会问的。因为我会难过,无论你去到哪里,那个地方都没有我。", "80e99dc093cafed983ef6428a27ed645"},
19 | }
20 |
21 | for i, tt := range tests {
22 | s := EncodeMD5(tt.in)
23 | if s != tt.out {
24 | t.Errorf("%d. %q => %q, wanted: %q", i, tt.in, s, tt.out)
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/pkg/util/morse.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "errors"
5 | "strings"
6 | )
7 |
8 | /**
9 | * @Author: super
10 | * @Date: 2020-08-24 09:33
11 | * @Description:
12 | **/
13 |
14 | var lengthError = errors.New("length must > 1")
15 | var unsupportedError = errors.New("unsupported byte")
16 |
17 | var morseMap = map[byte]string{
18 | 'a': ".-",
19 | 'b': "-...",
20 | 'c': "-.-.",
21 | 'd': "-..",
22 | 'e': ".",
23 | 'f': "..-.",
24 | 'g': "--.",
25 | 'h': "....",
26 | 'i': "..",
27 | 'j': ".---",
28 | 'k': "-.-",
29 | 'l': ".-..",
30 | 'm': "--",
31 | 'n': "-.",
32 | 'o': "---",
33 | 'p': ".--.",
34 | 'q': "--.-",
35 | 'r': ".-.",
36 | 's': "...",
37 | 't': "-",
38 | 'u': "..-",
39 | 'v': "...-",
40 | 'w': ".--",
41 | 'x': "-..-",
42 | 'y': "-.--",
43 | 'z': "--..",
44 | '1': ".----",
45 | '2': "..---",
46 | '3': "...--",
47 | '4': "....-",
48 | '5': ".....",
49 | '6': "-....",
50 | '7': "--...",
51 | '8': "---..",
52 | '9': "----.",
53 | '0': "-----",
54 | }
55 |
56 | func GenerateMorse(str string) (string, error) {
57 | str = strings.TrimSpace(str)
58 | length := len(str)
59 | if length == 0 {
60 | return "", lengthError
61 | }
62 | var builder strings.Builder
63 | bytes := []byte(str)
64 | for _, v := range bytes {
65 | if value, ok := morseMap[v]; ok {
66 | builder.WriteString(value)
67 | } else {
68 | return "", unsupportedError
69 | }
70 | }
71 | return builder.String(), nil
72 | }
73 |
--------------------------------------------------------------------------------
/pkg/util/morse_test.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | /**
8 | * @Author: super
9 | * @Date: 2020-08-24 10:04
10 | * @Description:
11 | **/
12 |
13 | type Morse struct {
14 | str string
15 | real string
16 | err error
17 | }
18 |
19 | func TestGenerateMorse(t *testing.T) {
20 | var morses = []struct {
21 | str string
22 | code string
23 | err error
24 | }{
25 | {"aa11", ".-.-.----.----", nil},
26 | {"11aa", ".----.----.-.-", nil},
27 | {"", "", lengthError},
28 | {"111,as", "", unsupportedError},
29 | {"中文", "", unsupportedError},
30 | {"1a12 ", ".----.-.----..---", nil},
31 | {" ", "", lengthError},
32 | {"asdj$%#, 441", "", unsupportedError},
33 | {"!@#$", "", unsupportedError},
34 | }
35 |
36 | for i, v := range morses {
37 | code, e := GenerateMorse(v.str)
38 | if code != v.code {
39 | t.Errorf("%d. %s morse code %s, wanted: %s, error= %v", i, v.str, code, v.code, e)
40 | } else if e != v.err {
41 | t.Errorf("%d. %s morse code %s, wanted: %s, error= %v", i, v.str, code, v.code, e)
42 | }
43 | }
44 | }
45 |
46 | func BenchmarkGenerateMorse(b *testing.B) {
47 | for i := 0; i < b.N; i++ {
48 | _, _ = GenerateMorse("asasd12454")
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/pkg/util/qrcode.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-08-21 22:08
6 | * @Description:
7 | **/
8 |
9 | import "github.com/skip2/go-qrcode"
10 |
11 | func GenerateQRCodeByte(str string) ([]byte, error) {
12 | return qrcode.Encode(str, qrcode.Highest, 256)
13 | }
14 |
--------------------------------------------------------------------------------
/pkg/util/reb2hex_test.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 | )
7 |
8 | /**
9 | * @Author: super
10 | * @Date: 2020-11-30 20:21
11 | * @Description:
12 | **/
13 |
14 | func TestRgbToHex(t *testing.T) {
15 | fmt.Println(RgbToHex(`{
16 | "red": 12,
17 | "green": 255,
18 | "blue": 255
19 | }`))
20 | }
21 |
22 | func BenchmarkRgbToHex(b *testing.B) {
23 | for i := 0; i < b.N; i++ {
24 | _, _ = RgbToHex(`{
25 | "red": 255,
26 | "green": 255,
27 | "blue": 255
28 | }`)
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/pkg/util/regularExpression.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | /**
4 | * @Author: super
5 | * @Date: 2020-08-24 09:45
6 | * @Description:
7 | **/
8 |
9 | func StringMatching() {
10 |
11 | }
12 |
--------------------------------------------------------------------------------
/pkg/util/rgb2hex.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "encoding/json"
5 | "fmt"
6 | )
7 |
8 | /**
9 | * @Author: super
10 | * @Date: 2020-11-27 16:37
11 | * @Description:
12 | **/
13 | type RGB struct {
14 | Red int `json:"red"`
15 | Green int `json:"green"`
16 | Blue int `json:"blue"`
17 | }
18 |
19 | func (rgb *RGB) ToString() string {
20 | result := "#"
21 | if rgb.Red < 16 {
22 | result += fmt.Sprintf("0%x", rgb.Red)
23 | } else {
24 | result += fmt.Sprintf("%x", rgb.Red)
25 | }
26 | if rgb.Green < 16 {
27 | result += fmt.Sprintf("0%x", rgb.Green)
28 | } else {
29 | result += fmt.Sprintf("%x", rgb.Green)
30 | }
31 | if rgb.Blue < 16 {
32 | result += fmt.Sprintf("0%x", rgb.Blue)
33 | } else {
34 | result += fmt.Sprintf("%x", rgb.Blue)
35 | }
36 | return result
37 | }
38 |
39 | func RgbToHex(rgb string) (string, error) {
40 | rgbStruct := &RGB{}
41 | err := json.Unmarshal([]byte(rgb), rgbStruct)
42 | if err != nil {
43 | return "", err
44 | }
45 | return rgbStruct.ToString(), nil
46 | }
47 |
--------------------------------------------------------------------------------
/pkg/util/stringCode.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "bytes"
5 | "compress/gzip"
6 | "encoding/base64"
7 | "io/ioutil"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2020-09-01 20:06
13 | * @Description: 字符串压缩
14 | **/
15 |
16 | func ZipString(s []byte) (string, error) {
17 | var b bytes.Buffer
18 | gz := gzip.NewWriter(&b)
19 | if _, err := gz.Write(s); err != nil {
20 | return "", err
21 | }
22 | if err := gz.Flush(); err != nil {
23 | return "", err
24 | }
25 | if err := gz.Close(); err != nil {
26 | return "", err
27 | }
28 | str := base64.StdEncoding.EncodeToString(b.Bytes())
29 | return str, nil
30 | }
31 |
32 | func UnzipString(str string) string {
33 | data, _ := base64.StdEncoding.DecodeString(str)
34 | rdata := bytes.NewReader(data)
35 | r, _ := gzip.NewReader(rdata)
36 | s, _ := ioutil.ReadAll(r)
37 | return string(s)
38 | }
39 |
--------------------------------------------------------------------------------
/pkg/util/structMapping.go:
--------------------------------------------------------------------------------
1 | package util
2 |
3 | import (
4 | "go-crawler-distributed/internal/model"
5 | "go-crawler-distributed/service/elastic/proto"
6 | )
7 |
8 | /**
9 | * @Author: super
10 | * @Date: 2021-01-05 19:27
11 | * @Description:
12 | **/
13 |
14 | func ProtoToArticle(article *proto.Article) *model.Article {
15 | result := &model.Article{}
16 | result.Title = article.Title
17 | result.Genres = article.Genres
18 | result.Url = article.Url
19 | result.Content = article.Content
20 | return result
21 | }
22 |
23 | func ArticleToProto(article *model.Article) *proto.Article {
24 | result := &proto.Article{}
25 | result.Title = article.Title
26 | result.Genres = article.Genres
27 | result.Url = article.Url
28 | result.Content = article.Content
29 | return result
30 | }
31 |
--------------------------------------------------------------------------------
/service/cache/client/client.go:
--------------------------------------------------------------------------------
1 | package client
2 |
3 | import (
4 | "context"
5 | "github.com/micro/go-micro/v2"
6 | "github.com/micro/go-micro/v2/registry"
7 | "github.com/micro/go-plugins/registry/consul/v2"
8 | "go-crawler-distributed/global"
9 | "go-crawler-distributed/service/cache/proto"
10 | )
11 |
12 | /**
13 | * @Author: super
14 | * @Date: 2021-01-05 19:33
15 | * @Description:
16 | **/
17 |
18 | var redisOP proto.RedisOperationService
19 |
20 | func init() {
21 | reg := consul.NewRegistry(func(options *registry.Options) {
22 | options.Addrs = []string{
23 | global.ConsulSetting.Url,
24 | }
25 | })
26 |
27 | service := micro.NewService(
28 | micro.Registry(reg),
29 | micro.Name("go.micro.service.redis.client"),
30 | )
31 | service.Init()
32 | redisOP = proto.NewRedisOperationService("go.micro.service.redis", service.Client())
33 | }
34 |
35 | func AddElementToSet(key string, value string) (int32, error) {
36 | res, err := redisOP.AddElementToSet(context.TODO(), &proto.Request{Key: key, Value: value})
37 | if err != nil {
38 | global.Logger.Error(context.Background(), err)
39 | return -1, err
40 | }
41 | return res.Result, nil
42 | }
43 |
44 | func ElementIsInSet(key string, value string) (bool, error) {
45 | rsp, err := redisOP.ElementIsInSet(context.TODO(), &proto.Request{Key: key, Value: value})
46 | if err != nil {
47 | global.Logger.Error(context.Background(), err)
48 | return false, err
49 | }
50 | return rsp.Result, err
51 | }
52 |
--------------------------------------------------------------------------------
/service/cache/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "time"
6 |
7 | "github.com/micro/go-micro/v2"
8 | "github.com/micro/go-micro/v2/registry"
9 | "github.com/micro/go-plugins/registry/consul/v2"
10 |
11 | "go-crawler-distributed/global"
12 | "go-crawler-distributed/service/cache/proto"
13 | "go-crawler-distributed/service/cache/server"
14 | )
15 |
16 | /**
17 | * @Author: super
18 | * @Date: 2020-08-17 20:21
19 | * @Description:
20 | **/
21 |
22 | func main() {
23 | reg := consul.NewRegistry(func(options *registry.Options) {
24 | options.Addrs = []string{
25 | global.ConsulSetting.Url,
26 | }
27 | })
28 |
29 | service := micro.NewService(
30 | micro.Registry(reg),
31 | micro.Name("go.micro.service.redis"),
32 | micro.RegisterTTL(time.Second*10),
33 | micro.RegisterInterval(time.Second*5),
34 | )
35 | service.Init()
36 |
37 | // 注册处理器
38 | err := proto.RegisterRedisOperationHandler(service.Server(), new(server.CacheStruct))
39 | if err != nil {
40 | global.Logger.Error(context.Background(), err)
41 | }
42 |
43 | // 运行服务
44 | if err := service.Run(); err != nil {
45 | global.Logger.Error(context.Background(), err)
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/service/cache/proto/redis.pb.micro.go:
--------------------------------------------------------------------------------
1 | // Code generated by protoc-gen-micro. DO NOT EDIT.
2 | // source: redis.proto
3 |
4 | package proto
5 |
6 | import (
7 | fmt "fmt"
8 | proto "github.com/golang/protobuf/proto"
9 | math "math"
10 | )
11 |
12 | import (
13 | context "context"
14 | api "github.com/micro/go-micro/v2/api"
15 | client "github.com/micro/go-micro/v2/client"
16 | server "github.com/micro/go-micro/v2/server"
17 | )
18 |
19 | // Reference imports to suppress errors if they are not otherwise used.
20 | var _ = proto.Marshal
21 | var _ = fmt.Errorf
22 | var _ = math.Inf
23 |
24 | // This is a compile-time assertion to ensure that this generated file
25 | // is compatible with the proto package it is being compiled against.
26 | // A compilation error at this line likely means your copy of the
27 | // proto package needs to be updated.
28 | const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
29 |
30 | // Reference imports to suppress errors if they are not otherwise used.
31 | var _ api.Endpoint
32 | var _ context.Context
33 | var _ client.Option
34 | var _ server.Option
35 |
36 | // Api Endpoints for RedisOperation service
37 |
38 | func NewRedisOperationEndpoints() []*api.Endpoint {
39 | return []*api.Endpoint{}
40 | }
41 |
42 | // Client API for RedisOperation service
43 |
44 | type RedisOperationService interface {
45 | SetString(ctx context.Context, in *Request, opts ...client.CallOption) (*StringResponse, error)
46 | GetString(ctx context.Context, in *Request, opts ...client.CallOption) (*StringResponse, error)
47 | AddElementToSet(ctx context.Context, in *Request, opts ...client.CallOption) (*IntResponse, error)
48 | ElementIsInSet(ctx context.Context, in *Request, opts ...client.CallOption) (*BoolResponse, error)
49 | GetAllElementFromSet(ctx context.Context, in *Request, opts ...client.CallOption) (*StringsResponse, error)
50 | }
51 |
52 | type redisOperationService struct {
53 | c client.Client
54 | name string
55 | }
56 |
57 | func NewRedisOperationService(name string, c client.Client) RedisOperationService {
58 | return &redisOperationService{
59 | c: c,
60 | name: name,
61 | }
62 | }
63 |
64 | func (c *redisOperationService) SetString(ctx context.Context, in *Request, opts ...client.CallOption) (*StringResponse, error) {
65 | req := c.c.NewRequest(c.name, "RedisOperation.SetString", in)
66 | out := new(StringResponse)
67 | err := c.c.Call(ctx, req, out, opts...)
68 | if err != nil {
69 | return nil, err
70 | }
71 | return out, nil
72 | }
73 |
74 | func (c *redisOperationService) GetString(ctx context.Context, in *Request, opts ...client.CallOption) (*StringResponse, error) {
75 | req := c.c.NewRequest(c.name, "RedisOperation.GetString", in)
76 | out := new(StringResponse)
77 | err := c.c.Call(ctx, req, out, opts...)
78 | if err != nil {
79 | return nil, err
80 | }
81 | return out, nil
82 | }
83 |
84 | func (c *redisOperationService) AddElementToSet(ctx context.Context, in *Request, opts ...client.CallOption) (*IntResponse, error) {
85 | req := c.c.NewRequest(c.name, "RedisOperation.AddElementToSet", in)
86 | out := new(IntResponse)
87 | err := c.c.Call(ctx, req, out, opts...)
88 | if err != nil {
89 | return nil, err
90 | }
91 | return out, nil
92 | }
93 |
94 | func (c *redisOperationService) ElementIsInSet(ctx context.Context, in *Request, opts ...client.CallOption) (*BoolResponse, error) {
95 | req := c.c.NewRequest(c.name, "RedisOperation.ElementIsInSet", in)
96 | out := new(BoolResponse)
97 | err := c.c.Call(ctx, req, out, opts...)
98 | if err != nil {
99 | return nil, err
100 | }
101 | return out, nil
102 | }
103 |
104 | func (c *redisOperationService) GetAllElementFromSet(ctx context.Context, in *Request, opts ...client.CallOption) (*StringsResponse, error) {
105 | req := c.c.NewRequest(c.name, "RedisOperation.GetAllElementFromSet", in)
106 | out := new(StringsResponse)
107 | err := c.c.Call(ctx, req, out, opts...)
108 | if err != nil {
109 | return nil, err
110 | }
111 | return out, nil
112 | }
113 |
114 | // Server API for RedisOperation service
115 |
116 | type RedisOperationHandler interface {
117 | SetString(context.Context, *Request, *StringResponse) error
118 | GetString(context.Context, *Request, *StringResponse) error
119 | AddElementToSet(context.Context, *Request, *IntResponse) error
120 | ElementIsInSet(context.Context, *Request, *BoolResponse) error
121 | GetAllElementFromSet(context.Context, *Request, *StringsResponse) error
122 | }
123 |
124 | func RegisterRedisOperationHandler(s server.Server, hdlr RedisOperationHandler, opts ...server.HandlerOption) error {
125 | type redisOperation interface {
126 | SetString(ctx context.Context, in *Request, out *StringResponse) error
127 | GetString(ctx context.Context, in *Request, out *StringResponse) error
128 | AddElementToSet(ctx context.Context, in *Request, out *IntResponse) error
129 | ElementIsInSet(ctx context.Context, in *Request, out *BoolResponse) error
130 | GetAllElementFromSet(ctx context.Context, in *Request, out *StringsResponse) error
131 | }
132 | type RedisOperation struct {
133 | redisOperation
134 | }
135 | h := &redisOperationHandler{hdlr}
136 | return s.Handle(s.NewHandler(&RedisOperation{h}, opts...))
137 | }
138 |
139 | type redisOperationHandler struct {
140 | RedisOperationHandler
141 | }
142 |
143 | func (h *redisOperationHandler) SetString(ctx context.Context, in *Request, out *StringResponse) error {
144 | return h.RedisOperationHandler.SetString(ctx, in, out)
145 | }
146 |
147 | func (h *redisOperationHandler) GetString(ctx context.Context, in *Request, out *StringResponse) error {
148 | return h.RedisOperationHandler.GetString(ctx, in, out)
149 | }
150 |
151 | func (h *redisOperationHandler) AddElementToSet(ctx context.Context, in *Request, out *IntResponse) error {
152 | return h.RedisOperationHandler.AddElementToSet(ctx, in, out)
153 | }
154 |
155 | func (h *redisOperationHandler) ElementIsInSet(ctx context.Context, in *Request, out *BoolResponse) error {
156 | return h.RedisOperationHandler.ElementIsInSet(ctx, in, out)
157 | }
158 |
159 | func (h *redisOperationHandler) GetAllElementFromSet(ctx context.Context, in *Request, out *StringsResponse) error {
160 | return h.RedisOperationHandler.GetAllElementFromSet(ctx, in, out)
161 | }
162 |
--------------------------------------------------------------------------------
/service/cache/proto/redis.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto3";
2 |
3 | option go_package = ".;proto";
4 |
5 | service RedisOperation {
6 | rpc SetString(Request) returns (StringResponse) {}
7 | rpc GetString(Request) returns (StringResponse) {}
8 | rpc AddElementToSet(Request) returns (IntResponse) {}
9 | rpc ElementIsInSet(Request) returns (BoolResponse) {}
10 | rpc GetAllElementFromSet(Request) returns (StringsResponse){}
11 | }
12 |
13 | message Request {
14 | string key = 1;
15 | string value = 2;
16 | }
17 |
18 | message StringResponse{
19 | string result = 1;
20 | }
21 |
22 | message IntResponse{
23 | int32 result = 1;
24 | }
25 |
26 | message BoolResponse{
27 | bool result = 1;
28 | }
29 |
30 | message StringsResponse{
31 | repeated string result = 1;
32 | }
--------------------------------------------------------------------------------
/service/cache/server/server.go:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import (
4 | "context"
5 | "github.com/garyburd/redigo/redis"
6 | "go-crawler-distributed/global"
7 | "go-crawler-distributed/service/cache/proto"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2020-08-17 20:08
13 | * @Description: redis微服务化
14 | **/
15 |
16 | type CacheStruct struct {
17 | }
18 |
19 | func (cache *CacheStruct) SetString(ctx context.Context, req *proto.Request, res *proto.StringResponse) error {
20 | c := global.RedisEngine.Get()
21 |
22 | str, err := redis.String(c.Do("set", req.Key, req.Value))
23 | if err != nil {
24 | return err
25 | }
26 | res.Result = str
27 | return nil
28 | }
29 |
30 | func (cache *CacheStruct) GetString(ctx context.Context, req *proto.Request, res *proto.StringResponse) error {
31 | c := global.RedisEngine.Get()
32 |
33 | str, err := redis.String(c.Do("get", req.Key))
34 | if err != nil {
35 | return err
36 | }
37 | res.Result = str
38 | return nil
39 | }
40 |
41 | func (cache *CacheStruct) AddElementToSet(ctx context.Context, req *proto.Request, res *proto.IntResponse) error {
42 | c := global.RedisEngine.Get()
43 | defer c.Close()
44 |
45 | result, err := redis.Int(c.Do("sadd", req.Key, req.Value))
46 | if err != nil {
47 | return err
48 | }
49 | res.Result = int32(result)
50 | return nil
51 | }
52 |
53 | func (cache *CacheStruct) ElementIsInSet(ctx context.Context, req *proto.Request, res *proto.BoolResponse) error {
54 | c := global.RedisEngine.Get()
55 | defer c.Close()
56 |
57 | result, err := redis.Int(c.Do("sismember", req.Key, req.Value))
58 | if err != nil {
59 | return err
60 | }
61 | if result == 1 {
62 | res.Result = true
63 | } else {
64 | res.Result = false
65 | }
66 | return nil
67 | }
68 |
69 | func (cache *CacheStruct) GetAllElementFromSet(ctx context.Context, req *proto.Request, res *proto.StringsResponse) error {
70 | c := global.RedisEngine.Get()
71 | strs, err := redis.Strings(c.Do("smembers", req.Key))
72 | if err != nil {
73 | return err
74 | }
75 | res.Result = strs
76 | return nil
77 | }
78 |
--------------------------------------------------------------------------------
/service/douban/crawl_detail/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "go-crawler-distributed/internal/crawler"
5 | "go-crawler-distributed/internal/crawler/crawerConfig"
6 | "go-crawler-distributed/internal/crawler/douban/parser"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-08-31 16:52
12 | * @Description:
13 | **/
14 |
15 | func main() {
16 | crawler.Crawl(crawerConfig.BookDetailUrl, crawerConfig.BookDetail, "BookDetail", parser.ParseBookDetail)
17 | }
18 |
--------------------------------------------------------------------------------
/service/douban/crawl_list/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "go-crawler-distributed/internal/crawler"
5 | "go-crawler-distributed/internal/crawler/crawerConfig"
6 | "go-crawler-distributed/internal/crawler/douban/parser"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-08-31 16:51
12 | * @Description:
13 | **/
14 |
15 | func main() {
16 | crawler.Crawl(crawerConfig.TagUrl, crawerConfig.BookDetailUrl, "tagList", parser.ParseBookList)
17 | }
18 |
--------------------------------------------------------------------------------
/service/douban/crawl_tags/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "go-crawler-distributed/internal/crawler"
5 | "go-crawler-distributed/internal/crawler/crawerConfig"
6 | "go-crawler-distributed/internal/crawler/douban/parser"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-08-31 16:50
12 | * @Description:
13 | **/
14 |
15 | func main() {
16 | crawler.Crawl("", crawerConfig.TagUrl, "tags", parser.ParseTagList)
17 | }
18 |
--------------------------------------------------------------------------------
/service/douban/storage_detail/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "go-crawler-distributed/internal/crawler"
5 | "go-crawler-distributed/internal/crawler/crawerConfig"
6 | "go-crawler-distributed/internal/crawler/douban/storage"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-08-31 16:52
12 | * @Description:
13 | **/
14 |
15 | func main() {
16 | crawler.Crawl(crawerConfig.BookDetail, "", "storageBookDetail", storage.ParseAndStorage)
17 | }
18 |
--------------------------------------------------------------------------------
/service/elastic/client/client.go:
--------------------------------------------------------------------------------
1 | package client
2 |
3 | import (
4 | "context"
5 | "github.com/micro/go-micro/v2"
6 | "github.com/micro/go-micro/v2/registry"
7 | "github.com/micro/go-plugins/registry/consul/v2"
8 | "go-crawler-distributed/global"
9 | "go-crawler-distributed/internal/model"
10 | "go-crawler-distributed/pkg/util"
11 | "go-crawler-distributed/service/elastic/proto"
12 | )
13 |
14 | /**
15 | * @Author: super
16 | * @Date: 2021-01-05 19:34
17 | * @Description:
18 | **/
19 |
20 | var elasticOP proto.ElasticOperationService
21 |
22 | func init() {
23 | reg := consul.NewRegistry(func(options *registry.Options) {
24 | options.Addrs = []string{
25 | global.ConsulSetting.Url,
26 | }
27 | })
28 |
29 | service := micro.NewService(
30 | micro.Registry(reg),
31 | micro.Name("go.micro.service.elastic.client"),
32 | )
33 | service.Init()
34 | elasticOP = proto.NewElasticOperationService("go.micro.service.elastic", service.Client())
35 | }
36 |
37 | func IndexExist(index string) (bool, error) {
38 | res, err := elasticOP.IndexExist(context.TODO(), &proto.IndexExistRequest{Index: index})
39 | if err != nil {
40 | global.Logger.Error(context.Background(), err)
41 | return false, err
42 | }
43 | return res.Exist, nil
44 | }
45 |
46 | func SaveInfo(table string, data *model.Article) (string, error) {
47 | article := util.ArticleToProto(data)
48 | res, err := elasticOP.SaveInfo(context.TODO(), &proto.SaveInfoRequest{Table: table, Article: article})
49 | if err != nil {
50 | global.Logger.Error(context.Background(), err)
51 | return "", err
52 | }
53 | return res.Result, nil
54 | }
55 |
56 | func GetInfo(table string, id string) (*model.Article, error) {
57 | res, err := elasticOP.GetInfo(context.TODO(), &proto.GetInfoRequest{Table: table, Id: id})
58 | if err != nil {
59 | global.Logger.Error(context.Background(), err)
60 | return nil, err
61 | }
62 | article := util.ProtoToArticle(res.Article)
63 | return article, nil
64 | }
65 |
66 | func SearchInfo(table string, fieldName string, fieldValue string) ([]*model.Article, error) {
67 | res, err := elasticOP.SearchInfo(context.TODO(), &proto.SearchInfoRequest{Table: table, FieldName: fieldName, FieldValue: fieldValue})
68 | if err != nil {
69 | global.Logger.Error(context.Background(), err)
70 | return nil, err
71 | }
72 | l := len(res.Article)
73 | result := make([]*model.Article, l)
74 |
75 | for i := 0; i < l; i++ {
76 | temp := util.ProtoToArticle(res.Article[i])
77 | result = append(result, temp)
78 | }
79 | return result, nil
80 | }
81 |
--------------------------------------------------------------------------------
/service/elastic/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "github.com/micro/go-micro/v2"
6 | "github.com/micro/go-micro/v2/registry"
7 | "github.com/micro/go-plugins/registry/consul/v2"
8 | "go-crawler-distributed/global"
9 | "go-crawler-distributed/service/elastic/proto"
10 | "go-crawler-distributed/service/elastic/server"
11 | "time"
12 | )
13 |
14 | /**
15 | * @Author: super
16 | * @Date: 2020-09-01 20:44
17 | * @Description:
18 | **/
19 |
20 | func main() {
21 | reg := consul.NewRegistry(func(options *registry.Options) {
22 | options.Addrs = []string{
23 | global.ConsulSetting.Url,
24 | }
25 | })
26 |
27 | service := micro.NewService(
28 | micro.Registry(reg),
29 | micro.Name("go.micro.service.elastic"),
30 | micro.RegisterTTL(time.Second*10),
31 | micro.RegisterInterval(time.Second*5),
32 | )
33 | service.Init()
34 |
35 | // 注册处理器
36 | err := proto.RegisterElasticOperationHandler(service.Server(), new(server.Elastic))
37 | if err != nil {
38 | global.Logger.Error(context.Background(), err)
39 | }
40 |
41 | // 运行服务
42 | if err := service.Run(); err != nil {
43 | global.Logger.Error(context.Background(), err)
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/service/elastic/proto/elastic.pb.micro.go:
--------------------------------------------------------------------------------
1 | // Code generated by protoc-gen-micro. DO NOT EDIT.
2 | // source: elastic.proto
3 |
4 | package proto
5 |
6 | import (
7 | fmt "fmt"
8 | proto "github.com/golang/protobuf/proto"
9 | math "math"
10 | )
11 |
12 | import (
13 | context "context"
14 | api "github.com/micro/go-micro/v2/api"
15 | client "github.com/micro/go-micro/v2/client"
16 | server "github.com/micro/go-micro/v2/server"
17 | )
18 |
19 | // Reference imports to suppress errors if they are not otherwise used.
20 | var _ = proto.Marshal
21 | var _ = fmt.Errorf
22 | var _ = math.Inf
23 |
24 | // This is a compile-time assertion to ensure that this generated file
25 | // is compatible with the proto package it is being compiled against.
26 | // A compilation error at this line likely means your copy of the
27 | // proto package needs to be updated.
28 | const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
29 |
30 | // Reference imports to suppress errors if they are not otherwise used.
31 | var _ api.Endpoint
32 | var _ context.Context
33 | var _ client.Option
34 | var _ server.Option
35 |
36 | // Api Endpoints for ElasticOperation service
37 |
38 | func NewElasticOperationEndpoints() []*api.Endpoint {
39 | return []*api.Endpoint{}
40 | }
41 |
42 | // Client API for ElasticOperation service
43 |
44 | type ElasticOperationService interface {
45 | IndexExist(ctx context.Context, in *IndexExistRequest, opts ...client.CallOption) (*IndexExistResponse, error)
46 | SaveInfo(ctx context.Context, in *SaveInfoRequest, opts ...client.CallOption) (*SaveInfoResponse, error)
47 | GetInfo(ctx context.Context, in *GetInfoRequest, opts ...client.CallOption) (*GetInfoResponse, error)
48 | SearchInfo(ctx context.Context, in *SearchInfoRequest, opts ...client.CallOption) (*SearchInfoResponse, error)
49 | }
50 |
51 | type elasticOperationService struct {
52 | c client.Client
53 | name string
54 | }
55 |
56 | func NewElasticOperationService(name string, c client.Client) ElasticOperationService {
57 | return &elasticOperationService{
58 | c: c,
59 | name: name,
60 | }
61 | }
62 |
63 | func (c *elasticOperationService) IndexExist(ctx context.Context, in *IndexExistRequest, opts ...client.CallOption) (*IndexExistResponse, error) {
64 | req := c.c.NewRequest(c.name, "ElasticOperation.IndexExist", in)
65 | out := new(IndexExistResponse)
66 | err := c.c.Call(ctx, req, out, opts...)
67 | if err != nil {
68 | return nil, err
69 | }
70 | return out, nil
71 | }
72 |
73 | func (c *elasticOperationService) SaveInfo(ctx context.Context, in *SaveInfoRequest, opts ...client.CallOption) (*SaveInfoResponse, error) {
74 | req := c.c.NewRequest(c.name, "ElasticOperation.SaveInfo", in)
75 | out := new(SaveInfoResponse)
76 | err := c.c.Call(ctx, req, out, opts...)
77 | if err != nil {
78 | return nil, err
79 | }
80 | return out, nil
81 | }
82 |
83 | func (c *elasticOperationService) GetInfo(ctx context.Context, in *GetInfoRequest, opts ...client.CallOption) (*GetInfoResponse, error) {
84 | req := c.c.NewRequest(c.name, "ElasticOperation.GetInfo", in)
85 | out := new(GetInfoResponse)
86 | err := c.c.Call(ctx, req, out, opts...)
87 | if err != nil {
88 | return nil, err
89 | }
90 | return out, nil
91 | }
92 |
93 | func (c *elasticOperationService) SearchInfo(ctx context.Context, in *SearchInfoRequest, opts ...client.CallOption) (*SearchInfoResponse, error) {
94 | req := c.c.NewRequest(c.name, "ElasticOperation.SearchInfo", in)
95 | out := new(SearchInfoResponse)
96 | err := c.c.Call(ctx, req, out, opts...)
97 | if err != nil {
98 | return nil, err
99 | }
100 | return out, nil
101 | }
102 |
103 | // Server API for ElasticOperation service
104 |
105 | type ElasticOperationHandler interface {
106 | IndexExist(context.Context, *IndexExistRequest, *IndexExistResponse) error
107 | SaveInfo(context.Context, *SaveInfoRequest, *SaveInfoResponse) error
108 | GetInfo(context.Context, *GetInfoRequest, *GetInfoResponse) error
109 | SearchInfo(context.Context, *SearchInfoRequest, *SearchInfoResponse) error
110 | }
111 |
112 | func RegisterElasticOperationHandler(s server.Server, hdlr ElasticOperationHandler, opts ...server.HandlerOption) error {
113 | type elasticOperation interface {
114 | IndexExist(ctx context.Context, in *IndexExistRequest, out *IndexExistResponse) error
115 | SaveInfo(ctx context.Context, in *SaveInfoRequest, out *SaveInfoResponse) error
116 | GetInfo(ctx context.Context, in *GetInfoRequest, out *GetInfoResponse) error
117 | SearchInfo(ctx context.Context, in *SearchInfoRequest, out *SearchInfoResponse) error
118 | }
119 | type ElasticOperation struct {
120 | elasticOperation
121 | }
122 | h := &elasticOperationHandler{hdlr}
123 | return s.Handle(s.NewHandler(&ElasticOperation{h}, opts...))
124 | }
125 |
126 | type elasticOperationHandler struct {
127 | ElasticOperationHandler
128 | }
129 |
130 | func (h *elasticOperationHandler) IndexExist(ctx context.Context, in *IndexExistRequest, out *IndexExistResponse) error {
131 | return h.ElasticOperationHandler.IndexExist(ctx, in, out)
132 | }
133 |
134 | func (h *elasticOperationHandler) SaveInfo(ctx context.Context, in *SaveInfoRequest, out *SaveInfoResponse) error {
135 | return h.ElasticOperationHandler.SaveInfo(ctx, in, out)
136 | }
137 |
138 | func (h *elasticOperationHandler) GetInfo(ctx context.Context, in *GetInfoRequest, out *GetInfoResponse) error {
139 | return h.ElasticOperationHandler.GetInfo(ctx, in, out)
140 | }
141 |
142 | func (h *elasticOperationHandler) SearchInfo(ctx context.Context, in *SearchInfoRequest, out *SearchInfoResponse) error {
143 | return h.ElasticOperationHandler.SearchInfo(ctx, in, out)
144 | }
145 |
--------------------------------------------------------------------------------
/service/elastic/proto/elastic.proto:
--------------------------------------------------------------------------------
1 | syntax = "proto3";
2 |
3 | option go_package = ".;proto";
4 |
5 | service ElasticOperation {
6 | rpc IndexExist(IndexExistRequest) returns (IndexExistResponse) {}
7 | rpc SaveInfo(SaveInfoRequest) returns (SaveInfoResponse) {}
8 | rpc GetInfo(GetInfoRequest) returns (GetInfoResponse) {}
9 | rpc SearchInfo(SearchInfoRequest) returns (SearchInfoResponse) {}
10 | }
11 |
12 | message Article{
13 | string title = 1;
14 | string url = 2;
15 | repeated string genres = 3;
16 | string content = 4;
17 | }
18 |
19 | message IndexExistRequest {
20 | string index = 1;
21 | }
22 |
23 | message IndexExistResponse{
24 | bool exist = 1;
25 | }
26 |
27 | message SaveInfoRequest{
28 | string table = 1;
29 | Article article = 2;
30 | }
31 |
32 | message SaveInfoResponse{
33 | string result = 1;
34 | }
35 |
36 | message GetInfoRequest{
37 | string table = 1;
38 | string id = 2;
39 | }
40 |
41 | message GetInfoResponse{
42 | Article article = 1;
43 | }
44 |
45 | message SearchInfoRequest{
46 | string table = 1;
47 | string fieldName = 2;
48 | string fieldValue = 3;
49 | }
50 |
51 | message SearchInfoResponse{
52 | repeated Article article = 1;
53 | }
--------------------------------------------------------------------------------
/service/elastic/server/server.go:
--------------------------------------------------------------------------------
1 | package server
2 |
3 | import (
4 | "context"
5 | "go-crawler-distributed/pkg/elastic"
6 | "go-crawler-distributed/pkg/util"
7 | "go-crawler-distributed/service/elastic/proto"
8 | )
9 |
10 | /**
11 | * @Author: super
12 | * @Date: 2020-09-01 21:33
13 | * @Description:
14 | **/
15 |
16 | type Elastic struct {
17 | }
18 |
19 | func (e *Elastic) IndexExist(ctx context.Context, req *proto.IndexExistRequest, res *proto.IndexExistResponse) error {
20 | exist, err := elastic.IndexExist(req.Index)
21 | if err != nil {
22 | return err
23 | }
24 | res.Exist = exist
25 | return nil
26 | }
27 |
28 | func (e *Elastic) SaveInfo(ctx context.Context, req *proto.SaveInfoRequest, res *proto.SaveInfoResponse) error {
29 |
30 | article := util.ProtoToArticle(req.Article)
31 |
32 | id, err := elastic.SaveInfo(req.Table, article)
33 | if err != nil {
34 | return err
35 | }
36 | res.Result = id
37 | return nil
38 | }
39 |
40 | func (e *Elastic) GetInfo(ctx context.Context, req *proto.GetInfoRequest, res *proto.GetInfoResponse) error {
41 | article, err := elastic.GetInfo(req.Table, req.Id)
42 | if err != nil {
43 | return err
44 | }
45 | result := util.ArticleToProto(article)
46 | res.Article = result
47 | return nil
48 | }
49 |
50 | func (e *Elastic) SearchInfo(ctx context.Context, req *proto.SearchInfoRequest, res *proto.SearchInfoResponse) error {
51 | articles, err := elastic.SearchInfo(req.Table, req.FieldName, req.FieldValue)
52 | if err != nil {
53 | return err
54 | }
55 | l := len(articles)
56 | result := make([]*proto.Article, l)
57 |
58 | for i := 0; i < l; i++ {
59 | temp := util.ArticleToProto(articles[i])
60 | result = append(result, temp)
61 | }
62 | res.Article = result
63 | return nil
64 | }
65 |
--------------------------------------------------------------------------------
/service/meituan/crawl_detail/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "go-crawler-distributed/internal/crawler"
5 | "go-crawler-distributed/internal/crawler/crawerConfig"
6 | "go-crawler-distributed/internal/crawler/meituan/parser"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-09-01 19:10
12 | * @Description:
13 | **/
14 |
15 | func main() {
16 | crawler.Crawl(crawerConfig.ArticleUrlList, crawerConfig.ArticleDetail, "ArticleDetail", parser.ParseArticleDetail)
17 | }
18 |
--------------------------------------------------------------------------------
/service/meituan/crawl_list/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "go-crawler-distributed/internal/crawler"
5 | "go-crawler-distributed/internal/crawler/crawerConfig"
6 | "go-crawler-distributed/internal/crawler/meituan/parser"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-09-01 18:30
12 | * @Description:
13 | **/
14 |
15 | func main() {
16 | crawler.Crawl("", crawerConfig.ArticleList, "ArticleList", parser.ParseArticleList)
17 | }
18 |
--------------------------------------------------------------------------------
/service/meituan/crawl_urllist/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "go-crawler-distributed/internal/crawler"
5 | "go-crawler-distributed/internal/crawler/crawerConfig"
6 | "go-crawler-distributed/internal/crawler/meituan/parser"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-09-01 19:03
12 | * @Description:
13 | **/
14 |
15 | func main() {
16 | crawler.Crawl(crawerConfig.ArticleList, crawerConfig.ArticleUrlList, "ArticleUrlList", parser.ParseArticleUrlList)
17 | }
18 |
--------------------------------------------------------------------------------
/service/meituan/storage_detail/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "go-crawler-distributed/internal/crawler"
5 | "go-crawler-distributed/internal/crawler/crawerConfig"
6 | "go-crawler-distributed/internal/crawler/meituan/storage"
7 | )
8 |
9 | /**
10 | * @Author: super
11 | * @Date: 2020-09-01 19:37
12 | * @Description:
13 | **/
14 |
15 | func main() {
16 | crawler.Crawl(crawerConfig.ArticleDetail, "", "storageArticleDetail", storage.StorageArticle)
17 | }
18 |
--------------------------------------------------------------------------------