├── .DS_Store ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── dependencies ├── docker-compose.yml ├── init-mongo.js ├── nginx │ ├── conf │ │ └── nginx.conf │ ├── docker-compose.yml │ └── html │ │ └── index.html └── traefik.toml ├── deploy ├── buildScript │ ├── linux_build.sh │ ├── mac_build.sh │ └── win_build.sh ├── deploy │ ├── start-douban-direct.sh │ └── start-meituan-direct.sh ├── dockerBuildScript │ └── docker_build.sh └── service │ ├── cache │ └── Dockerfile │ ├── docker-compose.yml │ ├── douban │ ├── crawl_detail │ │ └── Dockerfile │ ├── crawl_list │ │ └── Dockerfile │ ├── crawl_tags │ │ └── Dockerfile │ ├── docker-compose.yml │ └── storage_detail │ │ └── Dockerfile │ ├── elastic │ └── Dockerfile │ └── meituan │ ├── crawl_detail │ └── Dockerfile │ ├── crawl_list │ └── Dockerfile │ ├── crawl_urllist │ └── Dockerfile │ ├── docker-compose.yml │ └── storage_detail │ └── Dockerfile ├── global ├── data.go ├── setting.go └── tracer.go ├── go.mod ├── go.sum ├── img ├── consul.png ├── consul_config.png ├── consul_service.png ├── douban.png ├── elasticsearch.png ├── flow.png ├── framework.png ├── meituan.png ├── rabbitmq.png └── swagger.png ├── initConf └── init.go ├── internal ├── crawler │ ├── crawerConfig │ │ ├── articleMQConfig.go │ │ ├── bookMQConfig.go │ │ └── start.go │ ├── crawlOperation.go │ ├── cronJob │ │ └── main.go │ ├── douban │ │ ├── parser │ │ │ ├── bookDetail.go │ │ │ ├── booklist.go │ │ │ └── tagList.go │ │ └── storage │ │ │ └── bookDetail.go │ ├── fetcher │ │ └── fetcher.go │ ├── meituan │ │ ├── conf │ │ │ └── mapping.go │ │ ├── parser │ │ │ ├── articleDetail.go │ │ │ ├── articleList.go │ │ │ └── articleUrlList.go │ │ └── storage │ │ │ └── articleDetail.go │ ├── persistence │ │ └── persistence.go │ └── worker │ │ ├── types.go │ │ └── worker.go ├── crontab │ ├── common │ │ ├── constants.go │ │ ├── job.go │ │ ├── job_easyjson.go │ │ └── log.go │ ├── master │ │ └── etcd.go │ └── worker │ │ ├── etcd.go │ │ ├── executor.go │ │ ├── jobLock.go │ │ ├── logSink.go │ │ ├── main │ │ └── main.go │ │ └── scheduler.go ├── dao │ ├── article.go │ ├── book.go │ ├── dao.go │ └── forbes.go ├── middleware │ ├── access_log.go │ ├── context_timeout.go │ ├── recovery.go │ ├── tracer.go │ └── translations.go ├── model │ ├── article.go │ ├── article_easyjson.go │ ├── book.go │ ├── book_easyjson.go │ ├── db.go │ └── forbes.go ├── routers │ ├── job │ │ └── job.go │ ├── router.go │ └── sd │ │ └── check.go └── service │ └── job.go ├── main.go ├── pkg ├── app │ ├── app.go │ ├── form.go │ └── pagination.go ├── cache │ ├── cache.go │ ├── cacheOperation.go │ └── cache_test.go ├── consistentHash │ └── consistent.go ├── convert │ └── convert.go ├── db │ └── db.go ├── elastic │ ├── elastic.go │ └── elasticOpeartion.go ├── email │ └── email.go ├── errcode │ ├── common_code.go │ ├── ercd_code.go │ ├── errcode.go │ └── user.go ├── etcd │ └── etcd.go ├── file │ └── file.go ├── idGenerator │ ├── idGenerator.go │ └── idGenerator_test.go ├── ipParser │ ├── ipParser.go │ └── qqwry.utf8.dat ├── logger │ └── logger.go ├── mongoDB │ └── mongo.go ├── mq │ ├── consumer.go │ ├── producer.go │ ├── rabbitmq.go │ └── rabbitmq_test.go ├── otgorm │ └── otgorm.go ├── setting │ ├── section.go │ └── setting.go ├── tracer │ └── tracer.go ├── upload │ └── file.go └── util │ ├── aes.go │ ├── base64.go │ ├── base64_test.go │ ├── json.go │ ├── json_test.go │ ├── jwt.go │ ├── md5.go │ ├── md5_test.go │ ├── morse.go │ ├── morse_test.go │ ├── qrcode.go │ ├── reb2hex_test.go │ ├── regularExpression.go │ ├── rgb2hex.go │ ├── stringCode.go │ └── structMapping.go ├── service ├── cache │ ├── client │ │ └── client.go │ ├── main.go │ ├── proto │ │ ├── redis.pb.go │ │ ├── redis.pb.micro.go │ │ └── redis.proto │ └── server │ │ └── server.go ├── douban │ ├── crawl_detail │ │ └── main.go │ ├── crawl_list │ │ └── main.go │ ├── crawl_tags │ │ └── main.go │ └── storage_detail │ │ └── main.go ├── elastic │ ├── client │ │ └── client.go │ ├── main.go │ ├── proto │ │ ├── elastic.pb.go │ │ ├── elastic.pb.micro.go │ │ └── elastic.proto │ └── server │ │ └── server.go └── meituan │ ├── crawl_detail │ └── main.go │ ├── crawl_list │ └── main.go │ ├── crawl_urllist │ └── main.go │ └── storage_detail │ └── main.go └── storage └── logs └── app.log /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | .idea 8 | 9 | # Test binary, built with `go test -c` 10 | *.test 11 | 12 | # Output of the go coverage tool, specifically when used with LiteIDE 13 | *.out 14 | 15 | # Dependency directories (remove the comment below to include it) 16 | # vendor/ 17 | 18 | bin/ 19 | 20 | configs/ 21 | .DS_Store 22 | log/ 23 | config.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Knowledge-Precipitation-Tribe 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/Makefile -------------------------------------------------------------------------------- /dependencies/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | 5 | redis: 6 | image: redis 7 | restart: always 8 | ports: 9 | - "6379:6379" 10 | volumes: 11 | - redis-data:/data 12 | 13 | mysql: 14 | image: mysql 15 | command: --default-authentication-plugin=mysql_native_password 16 | restart: always 17 | environment: 18 | MYSQL_ROOT_PASSWORD: example 19 | 20 | mongo: 21 | image: mongo 22 | environment: 23 | - MONGO_INITDB_DATABASE=cron 24 | - MONGO_INITDB_ROOT_USERNAME=root 25 | - MONGO_INITDB_ROOT_PASSWORD=password 26 | volumes: 27 | - ./init-mongo.js:/docker-entyrpoint-initdb.d/init-mongo.js:ro 28 | - mongo-data:/data/db 29 | ports: 30 | - '27017-27019:27017-27019' 31 | 32 | elastic: 33 | image: docker.elastic.co/elasticsearch/elasticsearch:7.8.0 34 | ports: 35 | - "9200:9200" 36 | - "9300:9300" 37 | volumes: 38 | - elastic-data:/data 39 | environment: 40 | - discovery.type=single-node 41 | 42 | rabbitmq: 43 | image: rabbitmq:management 44 | hostname: myrabbitmq 45 | ports: 46 | - "5672:5672" 47 | - "15672:15672" 48 | volumes: 49 | - rabbitmq-data:/var/lib/rabbitmq 50 | 51 | consul1: 52 | image: consul 53 | restart: always 54 | ports: 55 | - "8500:8500" 56 | - "8300:8300" 57 | - "8301:8301" 58 | - "8302:8302" 59 | - "8600:8600" 60 | command: agent -server -bootstrap-expect 2 -ui -bind=0.0.0.0 -client=0.0.0.0 61 | 62 | consul2: 63 | image: consul 64 | restart: always 65 | ports: 66 | - "8501:8500" 67 | command: agent -server -ui -bind=0.0.0.0 -client=0.0.0.0 -join consul1 68 | 69 | consul3: 70 | image: consul 71 | restart: always 72 | ports: 73 | - "8502:8500" 74 | command: agent -server -ui -bind=0.0.0.0 -client=0.0.0.0 -join consul1 75 | 76 | # proxy: 77 | # image: traefik 78 | # command: --api --docker --docker.domain=docker.localhost --logLevel=DEBUG 79 | # ports: 80 | # - "80:80" 81 | # - "8080:8080" 82 | # volumes: 83 | # - /var/run/docker.sock:/var/run/docker.sock 84 | # - ./traefik.toml:/root/go-crawler/traefik.toml 85 | 86 | jaeger: 87 | image: jaegertracing/all-in-one:1.16 88 | ports: 89 | - "5775:5775/udp" 90 | - "6831:6831/udp" 91 | - "6832:6832/udp" 92 | - "5778:5778" 93 | - "16686:16686" 94 | - "14268:14268" 95 | - "9411:9411" 96 | environment: 97 | - "COLLECTOR_ZIPKIN_HTTP_PORT=9411" 98 | 99 | etcd: 100 | image: quay.io/coreos/etcd:v3.3.12 101 | ports: 102 | - "2379:2379" 103 | - "2380:2380" 104 | environment: 105 | ETCDCTL_API: 3 106 | volumes: 107 | - etcd-data:/etcd-data 108 | command: 109 | - "/usr/local/bin/etcd" 110 | - "--name" 111 | - "s1" 112 | - "--data-dir" 113 | - "/etcd-data" 114 | - "--advertise-client-urls" 115 | - "http://0.0.0.0:2379" 116 | - --listen-client-urls 117 | - "http://0.0.0.0:2379" 118 | - "--initial-advertise-peer-urls" 119 | - "http://0.0.0.0:2380" 120 | - "--listen-peer-urls" 121 | - "http://0.0.0.0:2380" 122 | - "--initial-cluster-token" 123 | - "tkn" 124 | - "--initial-cluster" 125 | - "s1=http://0.0.0.0:2380" 126 | - "--initial-cluster-state" 127 | - "new" 128 | 129 | volumes: 130 | elastic-data: 131 | rabbitmq-data: 132 | redis-data: 133 | mongo-data: 134 | etcd-data: -------------------------------------------------------------------------------- /dependencies/init-mongo.js: -------------------------------------------------------------------------------- 1 | db.createUser( 2 | { 3 | user:"root", 4 | pwd:"password", 5 | roles:[ 6 | { 7 | role:"readWrite", 8 | db:"cron" 9 | } 10 | ] 11 | } 12 | ) 13 | -------------------------------------------------------------------------------- /dependencies/nginx/conf/nginx.conf: -------------------------------------------------------------------------------- 1 | #user nobody; 2 | worker_processes 1; 3 | 4 | #error_log logs/error.log; 5 | #error_log logs/error.log notice; 6 | #error_log logs/error.log info; 7 | 8 | #pid logs/nginx.pid; 9 | 10 | 11 | events { 12 | worker_connections 1024; 13 | } 14 | 15 | 16 | http { 17 | include mime.types; 18 | default_type application/octet-stream; 19 | 20 | #log_format main 'remoteaddr−remote_user [timelocal]"request" ' 21 | # 'statusbody_bytes_sent "$http_referer" ' 22 | # '"httpuseragent""http_x_forwarded_for"'; 23 | 24 | #access_log logs/access.log main; 25 | 26 | sendfile on; 27 | #tcp_nopush on; 28 | 29 | #keepalive_timeout 0; 30 | keepalive_timeout 65; 31 | 32 | #gzip on; 33 | 34 | server { 35 | listen 80; 36 | server_name localhost; 37 | 38 | #charset koi8-r; 39 | 40 | #access_log logs/dig.log main; 41 | 42 | location / { 43 | root /html; 44 | index index.html index.htm; 45 | } 46 | 47 | #error_page 404 /404.html; 48 | 49 | # redirect server error pages to the static page /50x.html 50 | # 51 | error_page 500 502 503 504 /50x.html; 52 | location = /50x.html { 53 | root html; 54 | } 55 | 56 | # proxy the PHP scripts to Apache listening on 127.0.0.1:80 57 | # 58 | #location ~ \.php$ { 59 | # proxy_pass http://127.0.0.1; 60 | #} 61 | 62 | # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000 63 | # 64 | #location ~ \.php$ { 65 | # root html; 66 | # fastcgi_pass 127.0.0.1:9000; 67 | # fastcgi_index index.php; 68 | # fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name; 69 | # include fastcgi_params; 70 | #} 71 | 72 | # deny access to .htaccess files, if Apache's document root 73 | # concurs with nginx's one 74 | # 75 | #location ~ /\.ht { 76 | # deny all; 77 | #} 78 | } 79 | 80 | 81 | # another virtual host using mix of IP-, name-, and port-based configuration 82 | # 83 | #server { 84 | # listen 8000; 85 | # listen somename:8080; 86 | # server_name somename alias another.alias; 87 | 88 | # location / { 89 | # root html; 90 | # index index.html index.htm; 91 | # } 92 | #} 93 | 94 | 95 | # HTTPS server 96 | # 97 | #server { 98 | # listen 443 ssl; 99 | # server_name localhost; 100 | 101 | # ssl_certificate cert.pem; 102 | # ssl_certificate_key cert.key; 103 | 104 | # ssl_session_cache shared:SSL:1m; 105 | # ssl_session_timeout 5m; 106 | 107 | # ssl_ciphers HIGH:!aNULL:!MD5; 108 | # ssl_prefer_server_ciphers on; 109 | 110 | # location / { 111 | # root html; 112 | # index index.html index.htm; 113 | # } 114 | #} 115 | 116 | } -------------------------------------------------------------------------------- /dependencies/nginx/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | nginx: 5 | image: nginx:stable-alpine-perl 6 | restart: always 7 | ports: 8 | - 80:80 9 | volumes: 10 | - ./html:/html 11 | - ./logs:/etc/nginx/logs 12 | - ./conf.d:/etc/nginx/conf.d 13 | - ./conf/nginx.conf:/etc/nginx/nginx.conf -------------------------------------------------------------------------------- /dependencies/traefik.toml: -------------------------------------------------------------------------------- 1 | defaultEntryPoints = ["http"] 2 | insecureSkipVerify = true 3 | [entryPoints] 4 | [entryPoints.http] 5 | address = ":80" -------------------------------------------------------------------------------- /deploy/buildScript/linux_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ROOT_DIR=/Users/super/develop/go-crawler-distributed 3 | 4 | douban_services=" 5 | storage_detail 6 | crawl_detail 7 | crawl_list 8 | crawl_tags 9 | " 10 | 11 | meituan_services=" 12 | storage_detail 13 | crawl_detail 14 | crawl_list 15 | crawl_urllist 16 | " 17 | 18 | common_services=" 19 | cache 20 | elastic 21 | " 22 | 23 | build_common_service() { 24 | rm -f ${ROOT_DIR}/deploy/service/$1/bin/$1 25 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo -o ${ROOT_DIR}/deploy/service/$1/bin/$1 ${ROOT_DIR}/service/$1/main.go 26 | echo -e "\033[32m编译完成: \033[0m ${ROOT_DIR}/deploy/service/$1/bin/$1" 27 | } 28 | 29 | build_douban_service() { 30 | rm -f ${ROOT_DIR}/deploy/service/douban/$1/bin/$1 31 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo -o ${ROOT_DIR}/deploy/service/douban/$1/bin/$1 ${ROOT_DIR}/service/douban/$1/main.go 32 | echo -e "\033[32m编译完成: \033[0m ${ROOT_DIR}/deploy/service/douban/$1/bin/$1" 33 | } 34 | 35 | build_meituan_service() { 36 | rm -f ${ROOT_DIR}/deploy/service/meituan/$1/bin/$1 37 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo -o ${ROOT_DIR}/deploy/service/meituan/$1/bin/$1 ${ROOT_DIR}/service/meituan/$1/main.go 38 | echo -e "\033[32m编译完成: \033[0m ${ROOT_DIR}/deploy/service/meituan/$1/bin/$1" 39 | } 40 | 41 | for service in $common_services 42 | do 43 | build_common_service $service 44 | done 45 | echo -e "\033[32m编译完成: \033[0m common" 46 | 47 | for service in $douban_services 48 | do 49 | build_douban_service $service 50 | done 51 | echo -e "\033[32m编译完成: \033[0m douban_service" 52 | 53 | for service in $meituan_services 54 | do 55 | build_meituan_service $service 56 | done 57 | echo -e "\033[32m编译完成: \033[0m meituan_service" -------------------------------------------------------------------------------- /deploy/buildScript/mac_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ROOT_DIR=/Users/super/develop/go-crawler-distributed 3 | 4 | services=" 5 | cache 6 | storage_detail 7 | crawl_detail 8 | crawl_list 9 | crawl_tags 10 | " 11 | 12 | # 编译service可执行文件 13 | build_service() { 14 | go build -o ${ROOT_DIR}/deploy/mac/bin/$1 ${ROOT_DIR}/service/$1/main.go 15 | echo -e "\033[32m编译完成: \033[0m ${ROOT_DIR}/deploy/linux/bin/" 16 | } 17 | 18 | # 执行编译service 19 | mkdir -p ${ROOT_DIR}/deploy/mac/bin && rm -f ${ROOT_DIR}/deploy/mac/bin/* 20 | for service in $services 21 | do 22 | build_service $service 23 | done -------------------------------------------------------------------------------- /deploy/buildScript/win_build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ROOT_DIR=/Users/super/develop/go-crawler-distributed 3 | 4 | services=" 5 | cache 6 | storage_detail 7 | crawl_detail 8 | crawl_list 9 | crawl_tags 10 | " 11 | 12 | # 编译service可执行文件 13 | build_service() { 14 | rm -f ${ROOT_DIR}/deploy/win/bin/ 15 | CGO_ENABLED=0 GOOS=windows GOARCH=amd64 go build -a -installsuffix cgo -o ${ROOT_DIR}/deploy/win/bin/$1.exe ${ROOT_DIR}/service/$1/main.go 16 | echo -e "\033[32m编译完成: \033[0m ${ROOT_DIR}/deploy/win/bin/" 17 | } 18 | 19 | #cache service 20 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo -o ${ROOT_DIR}/deploy/win/bin/$1.exe ${ROOT_DIR}/service/cache/main.go 21 | 22 | 23 | # 执行编译service 24 | mkdir -p ${ROOT_DIR}/deploy/win/bin && rm -f ${ROOT_DIR}/deploy/win/bin/* 25 | for service in $services 26 | do 27 | build_service $service 28 | done -------------------------------------------------------------------------------- /deploy/deploy/start-douban-direct.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ROOT_DIR=/Users/super/develop/go-crawler-distributed 3 | 4 | services=" 5 | storage_detail 6 | crawl_detail 7 | crawl_list 8 | crawl_tags 9 | " 10 | 11 | cd ${ROOT_DIR} 12 | 13 | # 编译service可执行文件 14 | run_service() { 15 | go run ${ROOT_DIR}/service/douban/$1/main.go 16 | echo -e "\033[32m启动完成: \033[0m $1" 17 | } 18 | 19 | go run ${ROOT_DIR}/service/cache/main.go 20 | echo -e "\033[32m启动完成: \033[0m cache" 21 | 22 | # 执行编译service 23 | for service in $services 24 | do 25 | run_service $service 26 | done 27 | 28 | -------------------------------------------------------------------------------- /deploy/deploy/start-meituan-direct.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | ROOT_DIR=/Users/super/develop/go-crawler-distributed 3 | 4 | services=" 5 | storage_detail 6 | crawl_detail 7 | crawl_list 8 | crawl_urllist 9 | " 10 | 11 | cd ${ROOT_DIR} 12 | 13 | # 编译service可执行文件 14 | run_service() { 15 | go run ${ROOT_DIR}/service/meituan/$1/main.go 16 | echo -e "\033[32m启动完成: \033[0m $1" 17 | } 18 | 19 | go run ${ROOT_DIR}/service/elastic/main.go 20 | echo -e "\033[32m启动完成: \033[0m elastic" 21 | 22 | # 执行编译service 23 | for service in $services 24 | do 25 | run_service $service 26 | done 27 | 28 | -------------------------------------------------------------------------------- /deploy/dockerBuildScript/docker_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ROOT_DIR=/Users/super/develop/go-crawler-distributed 4 | 5 | services=" 6 | cache 7 | storage_detail 8 | crawl_detail 9 | crawl_list 10 | crawl_tags 11 | " 12 | 13 | # 打包镜像 14 | build_image() { 15 | sudo docker build -t superssssss/crawler/$1 -f ./service/$1/Dockerfile . 16 | echo -e "\033[32m镜像打包完成: \033[0m superssssss/crawler/$1\n" 17 | } 18 | 19 | # 切换到工程根目录 20 | cd ${ROOT_DIR} 21 | 22 | echo -e "\033[32m开始构建docker镜像... \033[0m" 23 | 24 | # 打包微服务镜像 25 | cd ${ROOT_DIR}/deploy/ 26 | for service in $services 27 | do 28 | build_image $service 29 | done 30 | 31 | echo -e "\033[32mdocker镜像构建完毕.\033[0m" -------------------------------------------------------------------------------- /deploy/service/cache/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | ADD bin/cache /app/ 4 | ADD config.json /app/config/ 5 | 6 | RUN chmod 777 /app/cache 7 | 8 | WORKDIR /app 9 | 10 | ENTRYPOINT ["./cache"] -------------------------------------------------------------------------------- /deploy/service/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | 5 | cache: 6 | build: 7 | context: cache 8 | dockerfile: Dockerfile 9 | networks: 10 | - crawler 11 | 12 | elastic: 13 | build: 14 | context: elastic 15 | dockerfile: Dockerfile 16 | depends_on: 17 | - elastic_server 18 | networks: 19 | - crawler 20 | 21 | redis: 22 | image: redis 23 | restart: always 24 | ports: 25 | - "6379:6379" 26 | volumes: 27 | - redis-data:/data 28 | networks: 29 | - crawler 30 | 31 | mysql: 32 | image: mysql 33 | command: --default-authentication-plugin=mysql_native_password 34 | restart: always 35 | ports: 36 | - "3306:3306" 37 | environment: 38 | MYSQL_ROOT_PASSWORD: example 39 | networks: 40 | - crawler 41 | 42 | elastic_server: 43 | image: docker.elastic.co/elasticsearch/elasticsearch:7.8.0 44 | ports: 45 | - "9200:9200" 46 | - "9300:9300" 47 | volumes: 48 | - elastic-data:/data 49 | environment: 50 | - discovery.type=single-node 51 | networks: 52 | - crawler 53 | 54 | rabbitmq: 55 | image: rabbitmq:management 56 | hostname: myrabbitmq 57 | ports: 58 | - "5672:5672" 59 | - "15672:15672" 60 | volumes: 61 | - rabbitmq-data:/var/lib/rabbitmq 62 | networks: 63 | - crawler 64 | 65 | consul1: 66 | image: consul 67 | restart: always 68 | ports: 69 | - "8500:8500" 70 | - "8300:8300" 71 | - "8301:8301" 72 | - "8302:8302" 73 | - "8600:8600" 74 | command: agent -server -bootstrap-expect 2 -ui -bind=0.0.0.0 -client=0.0.0.0 75 | networks: 76 | - crawler 77 | 78 | consul2: 79 | image: consul 80 | restart: always 81 | ports: 82 | - "8501:8500" 83 | command: agent -server -ui -bind=0.0.0.0 -client=0.0.0.0 -join consul1 84 | networks: 85 | - crawler 86 | 87 | consul3: 88 | image: consul 89 | restart: always 90 | ports: 91 | - "8502:8500" 92 | command: agent -server -ui -bind=0.0.0.0 -client=0.0.0.0 -join consul1 93 | networks: 94 | - crawler 95 | 96 | 97 | volumes: 98 | elastic-data: 99 | rabbitmq-data: 100 | redis-data: 101 | 102 | networks: 103 | crawler: 104 | external: true -------------------------------------------------------------------------------- /deploy/service/douban/crawl_detail/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | COPY bin/crawl_detail /app/ 4 | COPY config.json /app/config/ 5 | 6 | RUN chmod 777 /app/crawl_detail 7 | 8 | WORKDIR /app 9 | 10 | ENTRYPOINT ["./crawl_detail"] -------------------------------------------------------------------------------- /deploy/service/douban/crawl_list/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | COPY bin/crawl_list /app/ 4 | COPY config.json /app/config/ 5 | 6 | RUN chmod 777 /app/crawl_list 7 | 8 | WORKDIR /app 9 | 10 | ENTRYPOINT ["./crawl_list"] -------------------------------------------------------------------------------- /deploy/service/douban/crawl_tags/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | COPY bin/crawl_tags /app/ 4 | COPY config.json /app/config/ 5 | 6 | RUN chmod 777 /app/crawl_tags 7 | 8 | WORKDIR /app 9 | 10 | ENTRYPOINT ["./crawl_tags"] -------------------------------------------------------------------------------- /deploy/service/douban/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | 5 | crawl_list: 6 | build: 7 | context: crawl_list 8 | dockerfile: Dockerfile 9 | 10 | crawl_tags: 11 | build: 12 | context: crawl_tags 13 | dockerfile: Dockerfile 14 | 15 | crawl_detail: 16 | build: 17 | context: crawl_detail 18 | dockerfile: Dockerfile 19 | 20 | storage_detail: 21 | build: 22 | context: storage_detail 23 | dockerfile: Dockerfile -------------------------------------------------------------------------------- /deploy/service/douban/storage_detail/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | COPY bin/storage_detail /app/ 4 | COPY config.json /app/config/ 5 | 6 | RUN chmod 777 /app/storage_detail 7 | 8 | WORKDIR /app 9 | 10 | ENTRYPOINT ["./storage_detail"] -------------------------------------------------------------------------------- /deploy/service/elastic/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | ADD bin/elastic /app/ 4 | ADD config.json /app/config/ 5 | 6 | RUN chmod 777 /app/elastic 7 | 8 | WORKDIR /app 9 | 10 | ENTRYPOINT ["./elastic"] -------------------------------------------------------------------------------- /deploy/service/meituan/crawl_detail/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | COPY bin/crawl_detail /app/ 4 | COPY config.json /app/config/ 5 | 6 | RUN chmod 777 /app/crawl_detail 7 | 8 | WORKDIR /app 9 | 10 | ENTRYPOINT ["./crawl_detail"] -------------------------------------------------------------------------------- /deploy/service/meituan/crawl_list/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | COPY bin/crawl_list /app/ 4 | COPY config.json /app/config/ 5 | 6 | RUN chmod 777 /app/crawl_list 7 | 8 | WORKDIR /app 9 | 10 | ENTRYPOINT ["./crawl_list"] -------------------------------------------------------------------------------- /deploy/service/meituan/crawl_urllist/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | COPY bin/crawl_urllist /app/ 4 | COPY config.json /app/config/ 5 | 6 | RUN chmod 777 /app/crawl_urllist 7 | 8 | WORKDIR /app 9 | 10 | ENTRYPOINT ["./crawl_urllist"] -------------------------------------------------------------------------------- /deploy/service/meituan/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | 5 | crawl_list: 6 | build: 7 | context: crawl_list 8 | dockerfile: Dockerfile 9 | networks: 10 | - crawler 11 | 12 | crawl_tags: 13 | build: 14 | context: crawl_urllist 15 | dockerfile: Dockerfile 16 | networks: 17 | - crawler 18 | 19 | crawl_detail: 20 | build: 21 | context: crawl_detail 22 | dockerfile: Dockerfile 23 | networks: 24 | - crawler 25 | 26 | storage_detail: 27 | build: 28 | context: storage_detail 29 | dockerfile: Dockerfile 30 | networks: 31 | - crawler 32 | 33 | networks: 34 | crawler: 35 | external: true -------------------------------------------------------------------------------- /deploy/service/meituan/storage_detail/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:7 2 | 3 | COPY bin/storage_detail /app/ 4 | COPY config.json /app/config/ 5 | 6 | RUN chmod 777 /app/storage_detail 7 | 8 | WORKDIR /app 9 | 10 | ENTRYPOINT ["./storage_detail"] -------------------------------------------------------------------------------- /global/data.go: -------------------------------------------------------------------------------- 1 | package global 2 | 3 | import ( 4 | "github.com/coreos/etcd/clientv3" 5 | "github.com/garyburd/redigo/redis" 6 | "github.com/jinzhu/gorm" 7 | "github.com/kayon/iploc" 8 | "github.com/olivere/elastic/v7" 9 | "github.com/streadway/amqp" 10 | "go.mongodb.org/mongo-driver/mongo" 11 | ) 12 | 13 | /** 14 | * @Author: super 15 | * @Date: 2020-09-18 08:51 16 | * @Description: 全局配置DB 17 | **/ 18 | 19 | type RabbitMQ struct { 20 | Conn *amqp.Connection 21 | Channel *amqp.Channel 22 | } 23 | 24 | var ( 25 | DBEngine *gorm.DB 26 | RedisEngine *redis.Pool 27 | RabbitMQEngine *RabbitMQ 28 | ElasticEngine *elastic.Client 29 | MongoDBEngine *mongo.Client 30 | EtcdEngine *clientv3.Client 31 | EtcdKV clientv3.KV 32 | EtcdLease clientv3.Lease 33 | EtcdWatcher clientv3.Watcher 34 | IpParser *iploc.Locator 35 | ) 36 | -------------------------------------------------------------------------------- /global/setting.go: -------------------------------------------------------------------------------- 1 | package global 2 | 3 | import ( 4 | "go-crawler-distributed/pkg/logger" 5 | "go-crawler-distributed/pkg/setting" 6 | ) 7 | 8 | /** 9 | * @Author: super 10 | * @Date: 2020-09-18 08:32 11 | * @Description: 全局配置包括:服务,数据库,Email,JWT和日志 12 | **/ 13 | 14 | var ( 15 | ServerSetting *setting.ServerSettingS 16 | AppSetting *setting.AppSettingS 17 | DatabaseSetting *setting.DatabaseSettingS 18 | CacheSetting *setting.CacheSettingS 19 | RabbitMQSetting *setting.RabbitMQSettingS 20 | ElasticSetting *setting.ElasticSettingS 21 | EmailSetting *setting.EmailSettingS 22 | JWTSetting *setting.JWTSettingS 23 | Logger *logger.Logger 24 | ConsulSetting *setting.ConsulSettingS 25 | MongoDBSetting *setting.MongoDBSettingS 26 | EtcdSetting *setting.EtcdSettingS 27 | TracerSetting *setting.TracerSettingS 28 | IpParserSetting *setting.IpParserSettingS 29 | ) 30 | -------------------------------------------------------------------------------- /global/tracer.go: -------------------------------------------------------------------------------- 1 | package global 2 | 3 | import "github.com/opentracing/opentracing-go" 4 | 5 | /** 6 | * @Author: super 7 | * @Date: 2020-09-24 08:10 8 | * @Description: 配置全局统一的调用链追踪 9 | **/ 10 | 11 | var ( 12 | Tracer opentracing.Tracer 13 | ) 14 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module go-crawler-distributed 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/HdrHistogram/hdrhistogram-go v1.0.0 // indirect 7 | github.com/PuerkitoBio/goquery v1.5.1 8 | github.com/bwmarrin/snowflake v0.3.0 9 | github.com/coreos/etcd v3.3.18+incompatible 10 | github.com/dgrijalva/jwt-go v3.2.0+incompatible 11 | github.com/fsnotify/fsnotify v1.4.9 12 | github.com/garyburd/redigo v1.6.2 13 | github.com/gin-contrib/cors v1.3.1 14 | github.com/gin-gonic/gin v1.9.1 15 | github.com/go-acme/lego/v3 v3.4.0 16 | github.com/go-playground/locales v0.14.1 17 | github.com/go-playground/universal-translator v0.18.1 18 | github.com/go-playground/validator/v10 v10.14.0 19 | github.com/golang/protobuf v1.5.0 20 | github.com/jinzhu/gorm v1.9.16 21 | github.com/kayon/iploc v0.0.0-20200312105652-bda3e968a794 22 | github.com/mailru/easyjson v0.7.6 23 | github.com/micro/go-micro/v2 v2.9.1 24 | github.com/micro/go-plugins/registry/consul/v2 v2.9.1 25 | github.com/olivere/elastic/v7 v7.0.22 26 | github.com/opentracing/opentracing-go v1.2.0 27 | github.com/robfig/cron/v3 v3.0.1 28 | github.com/shirou/gopsutil v0.0.0-20190901111213-e4ec7b275ada 29 | github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e 30 | github.com/spf13/viper v1.7.1 31 | github.com/streadway/amqp v1.0.0 32 | github.com/uber/jaeger-client-go v2.25.0+incompatible 33 | github.com/uber/jaeger-lib v2.4.0+incompatible // indirect 34 | go.mongodb.org/mongo-driver v1.5.1 35 | go.uber.org/atomic v1.6.0 // indirect 36 | golang.org/x/net v0.17.0 37 | golang.org/x/text v0.13.0 38 | google.golang.org/protobuf v1.23.0 39 | gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect 40 | gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df 41 | gopkg.in/natefinch/lumberjack.v2 v2.0.0 42 | ) 43 | -------------------------------------------------------------------------------- /img/consul.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/consul.png -------------------------------------------------------------------------------- /img/consul_config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/consul_config.png -------------------------------------------------------------------------------- /img/consul_service.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/consul_service.png -------------------------------------------------------------------------------- /img/douban.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/douban.png -------------------------------------------------------------------------------- /img/elasticsearch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/elasticsearch.png -------------------------------------------------------------------------------- /img/flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/flow.png -------------------------------------------------------------------------------- /img/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/framework.png -------------------------------------------------------------------------------- /img/meituan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/meituan.png -------------------------------------------------------------------------------- /img/rabbitmq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/rabbitmq.png -------------------------------------------------------------------------------- /img/swagger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/img/swagger.png -------------------------------------------------------------------------------- /initConf/init.go: -------------------------------------------------------------------------------- 1 | package initConf 2 | 3 | import ( 4 | "go-crawler-distributed/pkg/etcd" 5 | "go-crawler-distributed/pkg/ipParser" 6 | "go-crawler-distributed/pkg/mongoDB" 7 | "log" 8 | "strings" 9 | "time" 10 | 11 | "go-crawler-distributed/global" 12 | "go-crawler-distributed/pkg/cache" 13 | "go-crawler-distributed/pkg/db" 14 | "go-crawler-distributed/pkg/elastic" 15 | "go-crawler-distributed/pkg/idGenerator" 16 | "go-crawler-distributed/pkg/logger" 17 | "go-crawler-distributed/pkg/mq" 18 | "go-crawler-distributed/pkg/setting" 19 | "go-crawler-distributed/pkg/tracer" 20 | 21 | "gopkg.in/natefinch/lumberjack.v2" 22 | ) 23 | 24 | /** 25 | * @Author: super 26 | * @Date: 2021-01-05 14:25 27 | * @Description: 28 | **/ 29 | func Init(config string) { 30 | //初始化配置 31 | err := setupSetting(config) 32 | if err != nil { 33 | log.Printf("init setupSetting err: %v\n", err) 34 | } else { 35 | log.Printf("初始化配置信息成功") 36 | } 37 | //初始化日志 38 | err = setupLogger() 39 | if err != nil { 40 | log.Printf("init setupLogger err: %v\n", err) 41 | } else { 42 | log.Printf("初始化logger成功") 43 | } 44 | //初始化数据库 45 | err = setupDBEngine() 46 | if err != nil { 47 | log.Printf("init setupDBEngine err: %v\n", err) 48 | } else { 49 | log.Printf("初始化数据库成功") 50 | } 51 | //初始化redis 52 | err = setupCacheEngine() 53 | if err != nil { 54 | log.Printf("init setupCacheEngine err: %v\n", err) 55 | } else { 56 | log.Printf("初始化cache成功") 57 | } 58 | //初始化RabbitMQ 59 | err = setupRabbitMQEngine() 60 | if err != nil { 61 | log.Printf("init setupRabbitMQEngine err: %v\n", err) 62 | } else { 63 | log.Printf("初始化消息队列成功") 64 | } 65 | //初始化elastic 66 | //err = setupElasticEngine() 67 | //if err != nil { 68 | // log.Printf("init setupElasticEngine err: %v\n", err) 69 | //}else{ 70 | // log.Printf("初始化elastic成功") 71 | //} 72 | // 73 | //初始化mongoDB 74 | err = setupMongoDBEngine() 75 | if err != nil { 76 | log.Printf("init setupMongoDBEngine err: %v\n", err) 77 | } else { 78 | log.Printf("初始化mongoDb成功") 79 | } 80 | //初始化etcd 81 | err = setupEtcdEngine() 82 | if err != nil { 83 | log.Printf("init setupEtcdEngine err: %v\n", err) 84 | } else { 85 | log.Printf("初始化etcd成功") 86 | } 87 | //初始化ipParser 88 | err = setupIpParser() 89 | if err != nil { 90 | log.Printf("init setupIpParser err: %v\n", err) 91 | } else { 92 | log.Printf("初始化ipParser成功") 93 | } 94 | //初始化追踪 95 | err = setupTracer() 96 | if err != nil { 97 | log.Printf("init.setupTracer err: %v\n", err) 98 | } else { 99 | log.Printf("初始化Tracer成功") 100 | } 101 | //初始化ID生成器 102 | err = idGenerator.InitSnowflake() 103 | if err != nil { 104 | log.Printf("init.snowflak err: %v\n", err) 105 | } else { 106 | log.Printf("初始化idGenerator成功") 107 | } 108 | } 109 | 110 | func setupSetting(config string) error { 111 | newSetting, err := setting.NewSetting(strings.Split(config, ",")...) 112 | if err != nil { 113 | return err 114 | } 115 | err = newSetting.ReadSection("Server", &global.ServerSetting) 116 | if err != nil { 117 | return err 118 | } 119 | err = newSetting.ReadSection("App", &global.AppSetting) 120 | if err != nil { 121 | return err 122 | } 123 | err = newSetting.ReadSection("Database", &global.DatabaseSetting) 124 | if err != nil { 125 | return err 126 | } 127 | err = newSetting.ReadSection("Cache", &global.CacheSetting) 128 | if err != nil { 129 | return err 130 | } 131 | err = newSetting.ReadSection("RabbitMQ", &global.RabbitMQSetting) 132 | if err != nil { 133 | return err 134 | } 135 | err = newSetting.ReadSection("Elastic", &global.ElasticSetting) 136 | if err != nil { 137 | return err 138 | } 139 | err = newSetting.ReadSection("JWT", &global.JWTSetting) 140 | if err != nil { 141 | return err 142 | } 143 | err = newSetting.ReadSection("Email", &global.EmailSetting) 144 | if err != nil { 145 | return err 146 | } 147 | err = newSetting.ReadSection("Consul", &global.ConsulSetting) 148 | if err != nil { 149 | return err 150 | } 151 | err = newSetting.ReadSection("MongoDB", &global.MongoDBSetting) 152 | if err != nil { 153 | return err 154 | } 155 | err = newSetting.ReadSection("Etcd", &global.EtcdSetting) 156 | if err != nil { 157 | return err 158 | } 159 | err = newSetting.ReadSection("IpParser", &global.IpParserSetting) 160 | if err != nil { 161 | return err 162 | } 163 | err = newSetting.ReadSection("Tracer", &global.TracerSetting) 164 | if err != nil { 165 | return err 166 | } 167 | 168 | global.AppSetting.DefaultContextTimeout *= time.Second 169 | global.ServerSetting.ReadTimeout *= time.Second 170 | global.ServerSetting.WriteTimeout *= time.Second 171 | global.JWTSetting.Expire *= time.Second 172 | 173 | return nil 174 | } 175 | 176 | func setupDBEngine() error { 177 | var err error 178 | global.DBEngine, err = db.NewDBEngine(global.DatabaseSetting) 179 | if err != nil { 180 | return err 181 | } 182 | return nil 183 | } 184 | 185 | func setupCacheEngine() error { 186 | var err error 187 | global.RedisEngine, err = cache.NewRedisEngine(global.CacheSetting) 188 | if err != nil { 189 | return err 190 | } 191 | return nil 192 | } 193 | 194 | func setupRabbitMQEngine() error { 195 | var err error 196 | global.RabbitMQEngine, err = mq.NewRabbitMQEngine(global.RabbitMQSetting) 197 | if err != nil { 198 | return err 199 | } 200 | return nil 201 | } 202 | 203 | func setupElasticEngine() error { 204 | var err error 205 | global.ElasticEngine, err = elastic.NewElasticEngine(global.ElasticSetting) 206 | if err != nil { 207 | return err 208 | } 209 | return nil 210 | } 211 | 212 | func setupMongoDBEngine() error { 213 | var err error 214 | global.MongoDBEngine, err = mongoDB.NewMongoDBEngine(global.MongoDBSetting) 215 | if err != nil { 216 | return err 217 | } 218 | return nil 219 | } 220 | 221 | func setupEtcdEngine() error { 222 | var err error 223 | global.EtcdEngine, global.EtcdKV, global.EtcdLease, global.EtcdWatcher, err = etcd.NewEtcdEngine(global.EtcdSetting) 224 | if err != nil { 225 | return err 226 | } 227 | return nil 228 | } 229 | 230 | func setupIpParser() error { 231 | var err error 232 | global.IpParser, err = ipParser.NewIpParser(global.IpParserSetting) 233 | if err != nil { 234 | return err 235 | } 236 | return nil 237 | } 238 | 239 | func setupLogger() error { 240 | fileName := global.AppSetting.LogSavePath + "/" + global.AppSetting.LogFileName + global.AppSetting.LogFileExt 241 | log.Println("log file name ", fileName) 242 | global.Logger = logger.NewLogger(&lumberjack.Logger{ 243 | Filename: fileName, 244 | MaxSize: 500, 245 | MaxAge: 10, 246 | LocalTime: true, 247 | }, "", log.LstdFlags).WithCaller(2) 248 | 249 | return nil 250 | } 251 | 252 | func setupTracer() error { 253 | jaegerTracer, _, err := tracer.NewJaegerTracer(global.TracerSetting.ServiceName, global.TracerSetting.Host) 254 | if err != nil { 255 | return err 256 | } 257 | global.Tracer = jaegerTracer 258 | return nil 259 | } 260 | -------------------------------------------------------------------------------- /internal/crawler/crawerConfig/articleMQConfig.go: -------------------------------------------------------------------------------- 1 | package crawerConfig 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-09-01 19:04 6 | * @Description: 7 | **/ 8 | 9 | const ArticleList = "articleList" 10 | const ArticleUrlList = "articleUrlList" 11 | const ArticleDetail = "articleDetail" 12 | -------------------------------------------------------------------------------- /internal/crawler/crawerConfig/bookMQConfig.go: -------------------------------------------------------------------------------- 1 | package crawerConfig 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-08-14 16:08 6 | * @Description: 可以放入配置文件,通过viper统一读取 7 | **/ 8 | const BookDetailUrl = "bookDetailURL" 9 | const BookDetail = "bookDetail" 10 | const TagUrl = "tagURL" 11 | -------------------------------------------------------------------------------- /internal/crawler/crawerConfig/start.go: -------------------------------------------------------------------------------- 1 | package crawerConfig 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-09-01 19:05 6 | * @Description: 7 | **/ 8 | 9 | const StartUrl = "https://book.douban.com/tag/" 10 | 11 | //const StartUrl = "https://tech.meituan.com/" 12 | 13 | const StopTAG = "finish" 14 | -------------------------------------------------------------------------------- /internal/crawler/crawlOperation.go: -------------------------------------------------------------------------------- 1 | package crawler 2 | 3 | import ( 4 | "context" 5 | "go-crawler-distributed/global" 6 | "go-crawler-distributed/internal/crawler/crawerConfig" 7 | "go-crawler-distributed/internal/crawler/worker" 8 | "go-crawler-distributed/pkg/mq" 9 | "sync" 10 | "time" 11 | ) 12 | 13 | /** 14 | * @Author: super 15 | * @Date: 2020-08-31 15:20 16 | * @Description: 17 | **/ 18 | 19 | //sourceMQ: 配置从哪里读取消息 20 | //targetMQ: 配置将解析好的消息发送到什么位置 21 | //name: 当前工作节点的名称 22 | //function: 页面的具体解析函数 23 | func Crawl(sourceMQ string, targetMQ string, name string, function worker.ParserFunc) { 24 | funcParser := worker.NewFuncParser(function, targetMQ, name) 25 | if sourceMQ == "" { 26 | //代表开始模块 27 | url := crawerConfig.StartUrl 28 | doCrawler(url, funcParser) 29 | } else if targetMQ == "" { 30 | //存储模块 31 | getMessage(sourceMQ, funcParser, true) 32 | } else { 33 | getMessage(sourceMQ, funcParser, false) 34 | } 35 | } 36 | 37 | func getMessage(sourceMQ string, funcParser *worker.FuncParser, isStorage bool) { 38 | messages, err := mq.Consume(sourceMQ) 39 | if err != nil { 40 | global.Logger.Error(context.Background(), err) 41 | return 42 | } 43 | global.Logger.Infof(context.Background(), "parser name: %s", funcParser.Name) 44 | 45 | var wg sync.WaitGroup 46 | for d := range messages { 47 | d.Ack(false) 48 | if string(d.Body) == crawerConfig.StopTAG { 49 | break 50 | } else { 51 | wg.Add(1) 52 | go func(data []byte) { 53 | defer wg.Done() 54 | //是否是保存操作 55 | if isStorage { 56 | doStorage(data, funcParser) 57 | } else { 58 | d := string(data) 59 | doCrawler(d, funcParser) 60 | } 61 | }(d.Body) 62 | } 63 | time.Sleep(time.Second * 2) 64 | } 65 | wg.Wait() 66 | global.Logger.Infof(context.Background(), "finish fetching parser name: %s", funcParser.Name) 67 | } 68 | 69 | func doCrawler(url string, funcParser *worker.FuncParser) { 70 | global.Logger.Infof(context.Background(), "fetching: %s", url) 71 | r := worker.Request{ 72 | Url: url, 73 | Parser: funcParser, 74 | } 75 | 76 | worker.Worker(r) 77 | } 78 | 79 | func doStorage(data []byte, funcParser *worker.FuncParser) { 80 | global.Logger.Infof(context.Background(), "saving: %s", data) 81 | funcParser.Parse(data, "") 82 | } 83 | -------------------------------------------------------------------------------- /internal/crawler/cronJob/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/robfig/cron/v3" 6 | "time" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2021-01-19 13:52 12 | * @Description: 定时任务 13 | **/ 14 | 15 | func main() { 16 | c := cron.New() 17 | //c.AddFunc("1 * * * *", func() { fmt.Println("Every hour on the half hour") }) 18 | //c.AddFunc("30 3-6,20-23 * * *", func() { fmt.Println(".. in the range 3-6am, 8-11pm") }) 19 | //c.AddFunc("CRON_TZ=Asia/Tokyo 30 04 * * *", func() { fmt.Println("Runs at 04:30 Tokyo time every day") }) 20 | //c.AddFunc("@hourly", func() { fmt.Println("Every hour, starting an hour from now") }) 21 | //c.AddFunc("@every 1h30m", func() { fmt.Println("Every hour thirty, starting an hour thirty from now") }) 22 | c.AddFunc("@every 2s", func() { fmt.Println("Every hour thirty, starting an hour thirty from now") }) 23 | c.Start() 24 | 25 | t1 := time.NewTimer(time.Second * 10) 26 | for { 27 | select { 28 | case <-t1.C: 29 | t1.Reset(time.Second * 10) 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /internal/crawler/douban/parser/bookDetail.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "context" 5 | "github.com/PuerkitoBio/goquery" 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/internal/model" 8 | "go-crawler-distributed/pkg/mq" 9 | "regexp" 10 | "strconv" 11 | "strings" 12 | ) 13 | 14 | /** 15 | * @Author: super 16 | * @Date: 2020-08-14 14:22 17 | * @Description: 18 | **/ 19 | var re = regexp.MustCompile(`]*>([^<]+)]*>([^<]+)<`) 20 | var re1 = regexp.MustCompile(`]*>([^<]+)[^>]*>([^<]+)<`) 21 | var DateRe = regexp.MustCompile(`([0-9]{3}[1-9]|[0-9]{2}[1-9][0-9]{1}|[0-9]{1}[1-9][0-9]{2}|[1-9][0-9]{3})-(((0[13578]|1[02])-(0[1-9]|[12][0-9]|3[01]))|((0[469]|11)-(0[1-9]|[12][0-9]|30))|(02-(0[1-9]|[1][0-9]|2[0-8])))`) 22 | var priceRe = regexp.MustCompile(`[0-9]+[.]?[0-9]*`) 23 | 24 | func ParseBookDetail(contents []byte, queueName string, url string) { 25 | dom, err := goquery.NewDocumentFromReader(strings.NewReader(string(contents))) 26 | if err != nil { 27 | global.Logger.Error(context.Background(), err) 28 | } 29 | 30 | book := &model.Book{} 31 | book.Url = url 32 | 33 | //封面图片 34 | result := dom.Find("img[title]") 35 | img, _ := result.Attr("src") 36 | book.Img = img 37 | //书名 38 | title, _ := result.Attr("alt") 39 | book.Title = title 40 | 41 | //图书信息 42 | allSubmatch := re.FindAllSubmatch(contents, -1) 43 | for _, m := range allSubmatch { 44 | k := string(m[1]) 45 | k = strings.TrimSpace(k) 46 | v := string(m[2]) 47 | v = strings.TrimSpace(v) 48 | switch { 49 | case k == "ISBN:": 50 | book.ISBN = v 51 | case k == "出版年:": 52 | dateMatch := DateRe.FindAllSubmatch([]byte(v), -1) 53 | if len(dateMatch) == 0 { 54 | v = "2006-01-02" 55 | } 56 | if v == "" { 57 | v = "2006-01-02" 58 | } 59 | book.PublishYear = v 60 | case k == "副标题:": 61 | book.SubTitle = v 62 | case k == "原作名:": 63 | book.OriginalName = v 64 | case k == "定价:": 65 | priceMatch := priceRe.Find([]byte(v)) 66 | if len(priceMatch) == 0 { 67 | v = "0" 68 | } else { 69 | v = string(priceMatch) 70 | } 71 | p, _ := strconv.ParseFloat(v, 64) 72 | book.Price = p 73 | case k == "装帧:": 74 | book.Layout = v 75 | case k == "页数:": 76 | p, _ := strconv.Atoi(v) 77 | book.Pages = p 78 | case k == "出版社:": 79 | book.Publish = v 80 | } 81 | } 82 | allSubmatch = re1.FindAllSubmatch(contents, -1) 83 | for _, m := range allSubmatch { 84 | k := string(m[1]) 85 | k = strings.TrimSpace(k) 86 | v := string(m[2]) 87 | v = strings.TrimSpace(v) 88 | switch { 89 | case k == "丛书:": 90 | book.Series = v 91 | case k == "作者:": 92 | book.Author = v 93 | case k == "出品方:": 94 | book.Producer = v 95 | } 96 | } 97 | 98 | //评分 99 | result = dom.Find("strong") 100 | score, _ := strconv.ParseFloat(strings.TrimSpace(result.Text()), 64) 101 | book.Score = score 102 | 103 | //评价人数 104 | result = dom.Find("a[class=rating_people]") 105 | length := len(result.Text()) 106 | if length <= 9 { 107 | book.Comments = 0 108 | } else { 109 | comment := result.Text()[:length-9] 110 | comments, _ := strconv.Atoi(comment) 111 | book.Comments = comments 112 | } 113 | 114 | //短评 115 | result = dom.Find("div[class=indent]+p") 116 | commentUrl, _ := result.Children().Attr("href") 117 | book.CommentUrl = commentUrl 118 | 119 | //Book结构体转json 120 | bytes, err := book.MarshalJSON() 121 | if err != nil { 122 | global.Logger.Error(context.Background(), err) 123 | } else { 124 | //将解析到的图书详细信息URL放到消息队列 125 | err = mq.Publish(queueName, bytes) 126 | if err != nil { 127 | global.Logger.Error(context.Background(), err) 128 | } 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /internal/crawler/douban/parser/booklist.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "context" 5 | "github.com/PuerkitoBio/goquery" 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/pkg/mq" 8 | "go-crawler-distributed/service/cache/client" 9 | "strings" 10 | ) 11 | 12 | /** 13 | * @Author: super 14 | * @Date: 2020-08-14 13:54 15 | * @Description: 16 | **/ 17 | 18 | func ParseBookList(contents []byte, queueName string, url string) { 19 | dom, err := goquery.NewDocumentFromReader(strings.NewReader(string(contents))) 20 | if err != nil { 21 | global.Logger.Error(context.Background(), err) 22 | } 23 | 24 | result := dom.Find("a[title]") 25 | result.Each(func(i int, selection *goquery.Selection) { 26 | href, _ := selection.Attr("href") 27 | global.Logger.Infof(context.Background(), "url: %s", href) 28 | 29 | //redis去重 30 | boolean, _ := client.ElementIsInSet(queueName, href) 31 | if !boolean { 32 | //不再redis中就添加 33 | _, _ = client.AddElementToSet(queueName, href) 34 | //将解析到的图书详细信息URL放到消息队列 35 | err = mq.Publish(queueName, []byte(href)) 36 | if err != nil { 37 | global.Logger.Error(context.Background(), err) 38 | } 39 | } 40 | }) 41 | } 42 | -------------------------------------------------------------------------------- /internal/crawler/douban/parser/tagList.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "context" 5 | "github.com/PuerkitoBio/goquery" 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/internal/crawler/crawerConfig" 8 | "go-crawler-distributed/pkg/mq" 9 | "strconv" 10 | "strings" 11 | "sync" 12 | "time" 13 | ) 14 | 15 | /** 16 | * @Author: super 17 | * @Date: 2020-08-14 20:49 18 | * @Description: 19 | **/ 20 | func ParseTagList(contents []byte, queueName string, url string) { 21 | 22 | dom, err := goquery.NewDocumentFromReader(strings.NewReader(string(contents))) 23 | if err != nil { 24 | global.Logger.Error(context.Background(), err) 25 | } 26 | 27 | result := dom.Find("table[class=tagCol]").Find("a") 28 | href := "" 29 | var wg sync.WaitGroup 30 | result.Each(func(i int, selection *goquery.Selection) { 31 | href = url + selection.Text() 32 | for i := 0; i <= 1000; i = i + 20 { 33 | wg.Add(1) 34 | go func(i int) { 35 | defer wg.Done() 36 | url := href + "?start=" + strconv.Itoa(i) + "&type=T" 37 | global.Logger.Infof(context.Background(), "url", url) 38 | 39 | //将解析到的图书详细信息URL放到消息队列 40 | err = mq.Publish(queueName, []byte(href)) 41 | if err != nil { 42 | global.Logger.Error(context.Background(), err) 43 | } 44 | }(i) 45 | time.Sleep(time.Millisecond * 100) 46 | } 47 | }) 48 | wg.Wait() 49 | 50 | err = mq.Publish(queueName, []byte(crawerConfig.StopTAG)) 51 | if err != nil { 52 | global.Logger.Error(context.Background(), err) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /internal/crawler/douban/storage/bookDetail.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "context" 5 | "go-crawler-distributed/global" 6 | "go-crawler-distributed/internal/dao" 7 | "go-crawler-distributed/internal/model" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2020-08-16 08:56 13 | * @Description: 14 | **/ 15 | 16 | func ParseAndStorage(contents []byte, _ string, _ string) { 17 | book := model.Book{} 18 | err := book.UnmarshalJSON(contents) 19 | if err != nil { 20 | global.Logger.Error(context.Background(), err) 21 | return 22 | } 23 | 24 | bookManager := dao.NewBookManager("books", global.DBEngine) 25 | 26 | _, err = bookManager.SaveBook(book) 27 | if err != nil { 28 | global.Logger.Error(context.Background(), err) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /internal/crawler/fetcher/fetcher.go: -------------------------------------------------------------------------------- 1 | package fetcher 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "fmt" 7 | "go-crawler-distributed/global" 8 | "golang.org/x/net/html/charset" 9 | "golang.org/x/text/encoding" 10 | "golang.org/x/text/encoding/unicode" 11 | "golang.org/x/text/transform" 12 | "io" 13 | "io/ioutil" 14 | "net/http" 15 | ) 16 | 17 | /** 18 | * @Author: super 19 | * @Date: 2020-08-14 13:47 20 | * @Description: 21 | **/ 22 | 23 | func Fetch(url string) ([]byte, error) { 24 | client := &http.Client{} 25 | request, err := http.NewRequest("GET", url, nil) 26 | if err != nil { 27 | global.Logger.Error(context.Background(), err) 28 | return nil, err 29 | } 30 | 31 | request.Header.Add("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36") 32 | 33 | resp, err := client.Do(request) 34 | if err != nil { 35 | return nil, err 36 | } 37 | defer resp.Body.Close() 38 | 39 | if resp.StatusCode != http.StatusOK { 40 | return nil, 41 | fmt.Errorf("wrong status code: %d", 42 | resp.StatusCode) 43 | } 44 | 45 | e := determineEncoding(resp.Body) 46 | 47 | utf8Reader := transform.NewReader(resp.Body, e.NewDecoder()) 48 | 49 | return ioutil.ReadAll(utf8Reader) 50 | } 51 | 52 | //自动判断编码 53 | func determineEncoding(r io.Reader) encoding.Encoding { 54 | bytes, err := bufio.NewReader(r).Peek(1024) 55 | if err != nil { 56 | global.Logger.Error(context.Background(), err) 57 | //默认UTF8编码 58 | return unicode.UTF8 59 | } 60 | e, _, _ := charset.DetermineEncoding(bytes, "") 61 | return e 62 | } 63 | -------------------------------------------------------------------------------- /internal/crawler/meituan/conf/mapping.go: -------------------------------------------------------------------------------- 1 | package conf 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-09-01 17:01 6 | * @Description: 用于存储与elastic的映射 7 | **/ 8 | 9 | const Mapping = ` 10 | { 11 | "mappings": { 12 | "properties": { 13 | "title": { 14 | "type": "text" 15 | }, 16 | "url": { 17 | "type": "text" 18 | }, 19 | "genres": { 20 | "type": "keyword" 21 | }, 22 | "content": { 23 | "type": "text" 24 | } 25 | } 26 | } 27 | }` 28 | -------------------------------------------------------------------------------- /internal/crawler/meituan/parser/articleDetail.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "context" 5 | "github.com/PuerkitoBio/goquery" 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/internal/model" 8 | "go-crawler-distributed/pkg/mq" 9 | "go-crawler-distributed/pkg/util" 10 | "strings" 11 | ) 12 | 13 | /** 14 | * @Author: super 15 | * @Date: 2020-09-01 19:09 16 | * @Description: 17 | **/ 18 | 19 | func ParseArticleDetail(contents []byte, queueName string, url string) { 20 | dom, err := goquery.NewDocumentFromReader(strings.NewReader(string(contents))) 21 | if err != nil { 22 | global.Logger.Error(context.Background(), err) 23 | } 24 | 25 | article := &model.Article{} 26 | 27 | result := dom.Find("a[rel=bookmark]") 28 | article.Url = url 29 | 30 | title := result.Text() 31 | article.Title = title 32 | 33 | s, err := util.ZipString(contents) 34 | if err != nil { 35 | global.Logger.Error(context.Background(), err) 36 | } 37 | article.Content = s 38 | 39 | result = dom.Find("a[rel=tag]") 40 | result.Each(func(i int, selection *goquery.Selection) { 41 | tag := selection.Text() 42 | article.Genres = append(article.Genres, tag) 43 | }) 44 | 45 | //Article结构体转json 46 | bytes, err := article.MarshalJSON() 47 | if err != nil { 48 | global.Logger.Error(context.Background(), err) 49 | } else { 50 | //将解析到的图书详细信息URL放到消息队列 51 | err = mq.Publish(queueName, bytes) 52 | if err != nil { 53 | global.Logger.Error(context.Background(), err) 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /internal/crawler/meituan/parser/articleList.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "context" 5 | "go-crawler-distributed/global" 6 | "go-crawler-distributed/internal/crawler/crawerConfig" 7 | "go-crawler-distributed/pkg/mq" 8 | "strconv" 9 | ) 10 | 11 | /** 12 | * @Author: super 13 | * @Date: 2020-09-01 16:00 14 | * @Description: 15 | **/ 16 | 17 | func ParseArticleList(contents []byte, queueName string, url string) { 18 | err := mq.Publish(queueName, []byte(url)) 19 | if err != nil { 20 | global.Logger.Error(context.Background(), err) 21 | } 22 | global.Logger.Infof(context.Background(), "url: %s", url) 23 | 24 | for i := 2; i < 22; i++ { 25 | url := "https://tech.meituan.com//page/" + strconv.Itoa(i) + ".html" 26 | global.Logger.Infof(context.Background(), "url: %s", url) 27 | err = mq.Publish(queueName, []byte(url)) 28 | if err != nil { 29 | global.Logger.Error(context.Background(), err) 30 | } 31 | } 32 | err = mq.Publish(queueName, []byte(crawerConfig.StopTAG)) 33 | if err != nil { 34 | global.Logger.Error(context.Background(), err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /internal/crawler/meituan/parser/articleUrlList.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "context" 5 | "github.com/PuerkitoBio/goquery" 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/pkg/mq" 8 | "strings" 9 | ) 10 | 11 | /** 12 | * @Author: super 13 | * @Date: 2020-09-01 18:59 14 | * @Description: 15 | **/ 16 | 17 | func ParseArticleUrlList(contents []byte, queueName string, _ string) { 18 | dom, err := goquery.NewDocumentFromReader(strings.NewReader(string(contents))) 19 | if err != nil { 20 | global.Logger.Error(context.Background(), err) 21 | } 22 | 23 | result := dom.Find("a[rel=bookmark]") 24 | result.Each(func(i int, selection *goquery.Selection) { 25 | href, exist := selection.Attr("href") 26 | if exist { 27 | global.Logger.Infof(context.Background(), "url: %s", href) 28 | //将解析到的图书详细信息URL放到消息队列 29 | err = mq.Publish(queueName, []byte(href)) 30 | if err != nil { 31 | global.Logger.Error(context.Background(), err) 32 | } 33 | } 34 | }) 35 | } 36 | -------------------------------------------------------------------------------- /internal/crawler/meituan/storage/articleDetail.go: -------------------------------------------------------------------------------- 1 | package storage 2 | 3 | import ( 4 | "context" 5 | 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/internal/model" 8 | "go-crawler-distributed/pkg/util" 9 | "go-crawler-distributed/service/elastic/client" 10 | ) 11 | 12 | /** 13 | * @Author: super 14 | * @Date: 2020-09-01 19:29 15 | * @Description: 16 | **/ 17 | 18 | func StorageArticle(contents []byte, _ string, _ string) { 19 | article := &model.Article{} 20 | err := article.UnmarshalJSON(contents) 21 | if err != nil { 22 | global.Logger.Error(context.Background(), err) 23 | return 24 | } 25 | article.Content = util.UnzipString(article.Content) 26 | 27 | index := global.ElasticSetting.Index 28 | _, _ = client.IndexExist(index) 29 | _, err = client.SaveInfo(index, article) 30 | if err != nil { 31 | global.Logger.Error(context.Background(), err) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /internal/crawler/persistence/persistence.go: -------------------------------------------------------------------------------- 1 | package persistence 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-08-16 09:01 6 | * @Description: 7 | **/ 8 | type ParseStorage func([]byte) error 9 | 10 | type FuncStorage struct { 11 | Name string 12 | ParseFunc ParseStorage 13 | } 14 | -------------------------------------------------------------------------------- /internal/crawler/worker/types.go: -------------------------------------------------------------------------------- 1 | package worker 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-08-16 08:04 6 | * @Description: 7 | **/ 8 | 9 | type ParserFunc func(contents []byte, queueName string, url string) 10 | 11 | type Parser interface { 12 | Parse(contents []byte, url string) 13 | } 14 | 15 | type Request struct { 16 | Url string 17 | Parser Parser 18 | } 19 | 20 | type FuncParser struct { 21 | parser ParserFunc 22 | QueueName string 23 | Name string 24 | } 25 | 26 | func (f *FuncParser) Parse(contents []byte, url string) { 27 | f.parser(contents, f.QueueName, url) 28 | } 29 | 30 | func NewFuncParser(p ParserFunc, mqName string, name string) *FuncParser { 31 | return &FuncParser{ 32 | parser: p, 33 | QueueName: mqName, 34 | Name: name, 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /internal/crawler/worker/worker.go: -------------------------------------------------------------------------------- 1 | package worker 2 | 3 | import "go-crawler-distributed/internal/crawler/fetcher" 4 | 5 | /** 6 | * @Author: super 7 | * @Date: 2020-08-16 07:55 8 | * @Description: 9 | **/ 10 | func Worker(r Request) { 11 | contents, _ := fetcher.Fetch(r.Url) 12 | r.Parser.Parse(contents, r.Url) 13 | } 14 | -------------------------------------------------------------------------------- /internal/crontab/common/constants.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import "errors" 4 | 5 | /** 6 | * @Author: super 7 | * @Date: 2021-02-06 19:56 8 | * @Description: 9 | **/ 10 | 11 | var ERR_LOCK_ALREDAY_REQUIRED = errors.New("锁被占用") 12 | var ERR_NO_LOCAL_IP_FOUND = errors.New("没有找到网卡IP") 13 | 14 | const ( 15 | // 任务保存目录 16 | JOB_SAVE_DIR = "/cron/jobs/" 17 | 18 | // 任务强杀目录 19 | JOB_KILLER_DIR = "/cron/killer/" 20 | 21 | // 任务锁目录 22 | JOB_LOCK_DIR = "/cron/lock/" 23 | 24 | // 服务注册目录 25 | JOB_WORKER_DIR = "/cron/workers/" 26 | 27 | // 保存任务事件 28 | JOB_EVENT_SAVE = 1 29 | 30 | // 删除任务事件 31 | JOB_EVENT_DELETE = 2 32 | 33 | // 强杀任务事件 34 | JOB_EVENT_KILL = 3 35 | ) 36 | -------------------------------------------------------------------------------- /internal/crontab/common/job.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | "context" 5 | "github.com/robfig/cron/v3" 6 | "strings" 7 | "time" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2021-02-06 18:44 13 | * @Description: 14 | **/ 15 | 16 | type Job struct { 17 | Name string `json:"name"` 18 | Command string `json:"command"` 19 | CronExpr string `json:"cron_expr"` 20 | } 21 | 22 | func (job *Job) Run() { 23 | } 24 | 25 | // 任务调度计划 26 | type JobSchedulePlan struct { 27 | Job *Job // 要调度的任务信息 28 | Expr string // cron_expr表达式 29 | Schedule cron.Schedule // cron_expr表达式 30 | NextTime time.Time // 下次调度时间 31 | } 32 | 33 | type JobExecuteInfo struct { 34 | Job *Job 35 | PlanTime time.Time // 理论上的调度时间 36 | RealTime time.Time // 实际的调度时间 37 | CancelCtx context.Context // 任务command的context 38 | CancelFunc context.CancelFunc // 用于取消command执行的cancel函数 39 | } 40 | 41 | // 变化事件 42 | type JobEvent struct { 43 | EventType int // SAVE, DELETE 44 | Job *Job 45 | } 46 | 47 | // 任务执行结果 48 | type JobExecuteResult struct { 49 | ExecuteInfo *JobExecuteInfo // 执行状态 50 | Output []byte // 脚本输出 51 | Err error // 脚本错误原因 52 | StartTime time.Time // 启动时间 53 | EndTime time.Time // 结束时间 54 | } 55 | 56 | // 从etcd的key中提取任务名 57 | // /cron/jobs/job10抹掉/cron/jobs/ 58 | func ExtractJobName(jobKey string) string { 59 | return strings.TrimPrefix(jobKey, JOB_SAVE_DIR) 60 | } 61 | 62 | // 从 /cron/killer/job10提取job10 63 | func ExtractKillerName(killerKey string) string { 64 | return strings.TrimPrefix(killerKey, JOB_KILLER_DIR) 65 | } 66 | 67 | // 任务变化事件有2种:1)更新任务 2)删除任务 68 | func BuildJobEvent(eventType int, job *Job) (jobEvent *JobEvent) { 69 | return &JobEvent{ 70 | EventType: eventType, 71 | Job: job, 72 | } 73 | } 74 | 75 | // 构造任务执行计划 76 | func BuildJobSchedulePlan(job *Job) (jobSchedulePlan *JobSchedulePlan, err error) { 77 | var ( 78 | schedule cron.Schedule 79 | ) 80 | 81 | // 解析JOB的cron表达式 82 | if schedule, err = cron.ParseStandard(job.CronExpr); err != nil { 83 | return 84 | } 85 | 86 | // 生成任务调度计划对象 87 | jobSchedulePlan = &JobSchedulePlan{ 88 | Job: job, 89 | Expr: job.CronExpr, 90 | Schedule: schedule, 91 | NextTime: schedule.Next(time.Now()), 92 | } 93 | return 94 | } 95 | 96 | // 构造执行状态信息 97 | func BuildJobExecuteInfo(jobSchedulePlan *JobSchedulePlan) (jobExecuteInfo *JobExecuteInfo) { 98 | jobExecuteInfo = &JobExecuteInfo{ 99 | Job: jobSchedulePlan.Job, 100 | PlanTime: jobSchedulePlan.NextTime, // 计算调度时间 101 | RealTime: time.Now(), // 真实调度时间 102 | } 103 | jobExecuteInfo.CancelCtx, jobExecuteInfo.CancelFunc = context.WithCancel(context.TODO()) 104 | return 105 | } 106 | 107 | // 提取worker的IP 108 | func ExtractWorkerIP(regKey string) string { 109 | return strings.TrimPrefix(regKey, JOB_WORKER_DIR) 110 | } 111 | -------------------------------------------------------------------------------- /internal/crontab/common/job_easyjson.go: -------------------------------------------------------------------------------- 1 | // Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT. 2 | 3 | package common 4 | 5 | import ( 6 | json "encoding/json" 7 | easyjson "github.com/mailru/easyjson" 8 | jlexer "github.com/mailru/easyjson/jlexer" 9 | jwriter "github.com/mailru/easyjson/jwriter" 10 | ) 11 | 12 | // suppress unused package warning 13 | var ( 14 | _ *json.RawMessage 15 | _ *jlexer.Lexer 16 | _ *jwriter.Writer 17 | _ easyjson.Marshaler 18 | ) 19 | 20 | func easyjson8a33d6c7DecodeGoCrawlerDistributedInternalCrontabCommon(in *jlexer.Lexer, out *Job) { 21 | isTopLevel := in.IsStart() 22 | if in.IsNull() { 23 | if isTopLevel { 24 | in.Consumed() 25 | } 26 | in.Skip() 27 | return 28 | } 29 | in.Delim('{') 30 | for !in.IsDelim('}') { 31 | key := in.UnsafeFieldName(false) 32 | in.WantColon() 33 | if in.IsNull() { 34 | in.Skip() 35 | in.WantComma() 36 | continue 37 | } 38 | switch key { 39 | case "name": 40 | out.Name = string(in.String()) 41 | case "command": 42 | out.Command = string(in.String()) 43 | case "cron_expr": 44 | out.CronExpr = string(in.String()) 45 | default: 46 | in.SkipRecursive() 47 | } 48 | in.WantComma() 49 | } 50 | in.Delim('}') 51 | if isTopLevel { 52 | in.Consumed() 53 | } 54 | } 55 | func easyjson8a33d6c7EncodeGoCrawlerDistributedInternalCrontabCommon(out *jwriter.Writer, in Job) { 56 | out.RawByte('{') 57 | first := true 58 | _ = first 59 | { 60 | const prefix string = ",\"name\":" 61 | out.RawString(prefix[1:]) 62 | out.String(string(in.Name)) 63 | } 64 | { 65 | const prefix string = ",\"command\":" 66 | out.RawString(prefix) 67 | out.String(string(in.Command)) 68 | } 69 | { 70 | const prefix string = ",\"cron_expr\":" 71 | out.RawString(prefix) 72 | out.String(string(in.CronExpr)) 73 | } 74 | out.RawByte('}') 75 | } 76 | 77 | // MarshalJSON supports json.Marshaler interface 78 | func (v Job) MarshalJSON() ([]byte, error) { 79 | w := jwriter.Writer{} 80 | easyjson8a33d6c7EncodeGoCrawlerDistributedInternalCrontabCommon(&w, v) 81 | return w.Buffer.BuildBytes(), w.Error 82 | } 83 | 84 | // MarshalEasyJSON supports easyjson.Marshaler interface 85 | func (v Job) MarshalEasyJSON(w *jwriter.Writer) { 86 | easyjson8a33d6c7EncodeGoCrawlerDistributedInternalCrontabCommon(w, v) 87 | } 88 | 89 | // UnmarshalJSON supports json.Unmarshaler interface 90 | func (v *Job) UnmarshalJSON(data []byte) error { 91 | r := jlexer.Lexer{Data: data} 92 | easyjson8a33d6c7DecodeGoCrawlerDistributedInternalCrontabCommon(&r, v) 93 | return r.Error() 94 | } 95 | 96 | // UnmarshalEasyJSON supports easyjson.Unmarshaler interface 97 | func (v *Job) UnmarshalEasyJSON(l *jlexer.Lexer) { 98 | easyjson8a33d6c7DecodeGoCrawlerDistributedInternalCrontabCommon(l, v) 99 | } 100 | -------------------------------------------------------------------------------- /internal/crontab/common/log.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2021-02-07 18:39 6 | * @Description: 7 | **/ 8 | 9 | // 任务执行日志 10 | type JobLog struct { 11 | JobName string `json:"jobName" bson:"jobName"` // 任务名字 12 | Command string `json:"command" bson:"command"` // 脚本命令 13 | Err string `json:"err" bson:"err"` // 错误原因 14 | Output string `json:"output" bson:"output"` // 脚本输出 15 | PlanTime int64 `json:"planTime" bson:"planTime"` // 计划开始时间 16 | ScheduleTime int64 `json:"scheduleTime" bson:"scheduleTime"` // 实际调度时间 17 | StartTime int64 `json:"startTime" bson:"startTime"` // 任务执行开始时间 18 | EndTime int64 `json:"endTime" bson:"endTime"` // 任务执行结束时间 19 | } 20 | 21 | // 日志批次,防止每条日志都单次插入数据库中 22 | type LogBatch struct { 23 | Logs []interface{} // 多条日志 24 | } 25 | 26 | // 任务日志过滤条件 27 | type JobLogFilter struct { 28 | JobName string `bson:"jobName"` 29 | } 30 | 31 | // 任务日志排序规则 32 | type SortLogByStartTime struct { 33 | SortOrder int `bson:"startTime"` // {startTime: -1} 34 | } 35 | -------------------------------------------------------------------------------- /internal/crontab/master/etcd.go: -------------------------------------------------------------------------------- 1 | package master 2 | 3 | import ( 4 | "context" 5 | "github.com/coreos/etcd/clientv3" 6 | "github.com/coreos/etcd/mvcc/mvccpb" 7 | "go-crawler-distributed/global" 8 | "go-crawler-distributed/internal/crontab/common" 9 | ) 10 | 11 | /** 12 | * @Author: super 13 | * @Date: 2021-02-06 19:25 14 | * @Description: 15 | **/ 16 | 17 | func EtcdSaveJob(ctx context.Context, job *common.Job) (oldJob *common.Job, err error) { 18 | jobKey := common.JOB_SAVE_DIR + job.Name 19 | jobValue, err := job.MarshalJSON() 20 | if err != nil { 21 | return 22 | } 23 | putResp, err := global.EtcdKV.Put(ctx, jobKey, string(jobValue), clientv3.WithPrevKV()) 24 | if err != nil { 25 | return 26 | } 27 | if putResp.PrevKv != nil { 28 | oldJobObj := &common.Job{} 29 | _ = oldJobObj.UnmarshalJSON(putResp.PrevKv.Value) 30 | oldJob = oldJobObj 31 | } 32 | return 33 | } 34 | 35 | func EtcdDeleteJob(ctx context.Context, name string) (oldJob *common.Job, err error) { 36 | jobKey := common.JOB_SAVE_DIR + name 37 | 38 | delResp, err := global.EtcdKV.Delete(ctx, jobKey, clientv3.WithPrevKV()) 39 | if err != nil { 40 | return 41 | } 42 | if len(delResp.PrevKvs) != 0 { 43 | oldJobObj := &common.Job{} 44 | _ = oldJobObj.UnmarshalJSON(delResp.PrevKvs[0].Value) 45 | oldJob = oldJobObj 46 | } 47 | return 48 | } 49 | 50 | func EtcdListJobs(ctx context.Context) (jobList []*common.Job, err error) { 51 | dirKey := common.JOB_SAVE_DIR 52 | 53 | getResp, err := global.EtcdKV.Get(ctx, dirKey, clientv3.WithPrefix()) 54 | if err != nil { 55 | return 56 | } 57 | jobList = make([]*common.Job, len(getResp.Kvs)) 58 | for i := 0; i < len(getResp.Kvs); i++ { 59 | job := &common.Job{} 60 | _ = job.UnmarshalJSON(getResp.Kvs[i].Value) 61 | jobList[i] = job 62 | } 63 | return 64 | } 65 | 66 | func EtcdKillJob(ctx context.Context, name string) (err error) { 67 | killerKey := common.JOB_KILLER_DIR + name 68 | 69 | leaseResp, err := global.EtcdLease.Grant(ctx, 1) 70 | if err != nil { 71 | return 72 | } 73 | leaseId := leaseResp.ID 74 | _, err = global.EtcdKV.Put(ctx, killerKey, "", clientv3.WithLease(leaseId)) 75 | return 76 | } 77 | 78 | func ListWorkers()(workerArr []string, err error){ 79 | var ( 80 | getResp *clientv3.GetResponse 81 | kv *mvccpb.KeyValue 82 | workerIP string 83 | ) 84 | 85 | // 初始化数组 86 | workerArr = make([]string, 0) 87 | 88 | // 获取目录下所有Kv 89 | if getResp, err = global.EtcdKV.Get(context.TODO(), common.JOB_WORKER_DIR, clientv3.WithPrefix()); err != nil { 90 | return 91 | } 92 | 93 | // 解析每个节点的IP 94 | for _, kv = range getResp.Kvs { 95 | // kv.Key : /cron/workers/192.168.2.1 96 | workerIP = common.ExtractWorkerIP(string(kv.Key)) 97 | workerArr = append(workerArr, workerIP) 98 | } 99 | return 100 | } 101 | -------------------------------------------------------------------------------- /internal/crontab/worker/etcd.go: -------------------------------------------------------------------------------- 1 | package worker 2 | 3 | import ( 4 | "context" 5 | "github.com/coreos/etcd/clientv3" 6 | "github.com/coreos/etcd/mvcc/mvccpb" 7 | "github.com/go-acme/lego/v3/log" 8 | "go-crawler-distributed/global" 9 | "go-crawler-distributed/internal/crontab/common" 10 | "net" 11 | "time" 12 | ) 13 | 14 | /** 15 | * @Author: super 16 | * @Date: 2021-02-07 17:14 17 | * @Description: 18 | **/ 19 | 20 | func WatchJobs(ctx context.Context) (err error) { 21 | getResp, err := global.EtcdKV.Get(ctx, common.JOB_SAVE_DIR, clientv3.WithPrefix()) 22 | if err != nil { 23 | return err 24 | } 25 | for i := 0; i < len(getResp.Kvs); i++ { 26 | job := &common.Job{} 27 | err := job.UnmarshalJSON(getResp.Kvs[i].Value) 28 | if err == nil { 29 | jobEvent := common.BuildJobEvent(common.JOB_EVENT_SAVE, job) 30 | //添加到任务调度器 31 | GlobalScheduler.PushJobEvent(jobEvent) 32 | } 33 | } 34 | 35 | revision := getResp.Header.Revision 36 | 37 | go func(watchStartRevision int64) { 38 | watchChan := global.EtcdWatcher.Watch(ctx, common.JOB_SAVE_DIR, 39 | clientv3.WithRev(watchStartRevision), 40 | clientv3.WithPrefix()) 41 | for watchResp := range watchChan { 42 | for _, watchEvent := range watchResp.Events { 43 | var jobEvent *common.JobEvent 44 | switch watchEvent.Type { 45 | case mvccpb.PUT: 46 | job := &common.Job{} 47 | err := job.UnmarshalJSON(watchEvent.Kv.Value) 48 | if err != nil { 49 | continue 50 | } 51 | jobEvent = common.BuildJobEvent(common.JOB_EVENT_SAVE, job) 52 | case mvccpb.DELETE: 53 | jobName := common.ExtractJobName(string(watchEvent.Kv.Key)) 54 | job := &common.Job{ 55 | Name: jobName, 56 | } 57 | jobEvent = common.BuildJobEvent(common.JOB_EVENT_DELETE, job) 58 | } 59 | //将变化情况推送给调度器 60 | GlobalScheduler.PushJobEvent(jobEvent) 61 | } 62 | } 63 | }(revision + 1) 64 | return 65 | } 66 | 67 | func WatchKiller(ctx context.Context) { 68 | go func() { 69 | // 监听/cron/killer/目录的变化 70 | watchChan := global.EtcdWatcher.Watch(ctx, common.JOB_KILLER_DIR, clientv3.WithPrefix()) 71 | // 处理监听事件 72 | for watchResp := range watchChan { 73 | for _, watchEvent := range watchResp.Events { 74 | switch watchEvent.Type { 75 | case mvccpb.PUT: // 杀死任务事件 76 | jobName := common.ExtractKillerName(string(watchEvent.Kv.Key)) 77 | job := &common.Job{Name: jobName} 78 | jobEvent := common.BuildJobEvent(common.JOB_EVENT_KILL, job) 79 | // 事件推给scheduler 80 | GlobalScheduler.PushJobEvent(jobEvent) 81 | case mvccpb.DELETE: // killer标记过期, 被自动删除 82 | } 83 | } 84 | } 85 | }() 86 | } 87 | 88 | // 获取本机网卡IP 89 | func getLocalIP() (ipv4 string, err error) { 90 | var ( 91 | addrs []net.Addr 92 | addr net.Addr 93 | ipNet *net.IPNet // IP地址 94 | isIpNet bool 95 | ) 96 | // 获取所有网卡 97 | if addrs, err = net.InterfaceAddrs(); err != nil { 98 | return 99 | } 100 | // 取第一个非lo的网卡IP 101 | for _, addr = range addrs { 102 | // 这个网络地址是IP地址: ipv4, ipv6 103 | if ipNet, isIpNet = addr.(*net.IPNet); isIpNet && !ipNet.IP.IsLoopback() { 104 | // 跳过IPV6 105 | if ipNet.IP.To4() != nil { 106 | ipv4 = ipNet.IP.String() // 192.168.1.1 107 | return 108 | } 109 | } 110 | } 111 | err = common.ERR_NO_LOCAL_IP_FOUND 112 | return 113 | } 114 | 115 | func KeepOnline(){ 116 | var ( 117 | ip string 118 | regKey string 119 | leaseGrantResp *clientv3.LeaseGrantResponse 120 | err error 121 | keepAliveChan <- chan *clientv3.LeaseKeepAliveResponse 122 | keepAliveResp *clientv3.LeaseKeepAliveResponse 123 | cancelCtx context.Context 124 | cancelFunc context.CancelFunc 125 | ) 126 | ip, err = getLocalIP() 127 | if err != nil{ 128 | log.Println("ip获取失败", err) 129 | return 130 | } 131 | for { 132 | // 注册路径 133 | regKey = common.JOB_WORKER_DIR + ip 134 | 135 | cancelFunc = nil 136 | 137 | // 创建租约 138 | if leaseGrantResp, err = global.EtcdLease.Grant(context.TODO(), 10); err != nil { 139 | goto RETRY 140 | } 141 | 142 | // 自动续租 143 | if keepAliveChan, err = global.EtcdLease.KeepAlive(context.TODO(), leaseGrantResp.ID); err != nil { 144 | goto RETRY 145 | } 146 | 147 | cancelCtx, cancelFunc = context.WithCancel(context.TODO()) 148 | 149 | // 注册到etcd 150 | if _, err = global.EtcdKV.Put(cancelCtx, regKey, "", clientv3.WithLease(leaseGrantResp.ID)); err != nil { 151 | goto RETRY 152 | } 153 | 154 | // 处理续租应答 155 | for { 156 | select { 157 | case keepAliveResp = <- keepAliveChan: 158 | if keepAliveResp == nil { // 续租失败 159 | goto RETRY 160 | } 161 | } 162 | } 163 | 164 | RETRY: 165 | time.Sleep(1 * time.Second) 166 | if cancelFunc != nil { 167 | cancelFunc() 168 | } 169 | } 170 | } 171 | 172 | func CreateJobLocker(jobName string) (jobLocker *JobLocker) { 173 | jobLocker = NewJobLocker(jobName) 174 | return 175 | } 176 | -------------------------------------------------------------------------------- /internal/crontab/worker/executor.go: -------------------------------------------------------------------------------- 1 | package worker 2 | 3 | import ( 4 | "go-crawler-distributed/internal/crontab/common" 5 | "math/rand" 6 | "os/exec" 7 | "time" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2021-02-09 10:45 13 | * @Description: 14 | **/ 15 | 16 | type Executor struct { 17 | } 18 | 19 | var ( 20 | GlobalExecutor *Executor 21 | ) 22 | 23 | func (e *Executor) ExecuteJob(info *common.JobExecuteInfo) { 24 | go func() { 25 | var ( 26 | cmd *exec.Cmd 27 | err error 28 | output []byte 29 | result *common.JobExecuteResult 30 | jobLocker *JobLocker 31 | ) 32 | // 任务结果 33 | result = &common.JobExecuteResult{ 34 | ExecuteInfo: info, 35 | Output: make([]byte, 0), 36 | } 37 | 38 | //初始化分布式锁 39 | jobLocker = CreateJobLocker(info.Job.Name) 40 | 41 | result.StartTime = time.Now() 42 | 43 | // 随机睡眠(0~1s),防止单个节点总是抢占任务 44 | time.Sleep(time.Duration(rand.Intn(1000)) * time.Millisecond) 45 | err = jobLocker.TryLock() 46 | defer jobLocker.Unlock() 47 | 48 | if err != nil { 49 | result.Err = err 50 | result.EndTime = time.Now() 51 | } else { 52 | result.StartTime = time.Now() 53 | // 执行shell命令 54 | cmd = exec.CommandContext(info.CancelCtx, "/bin/bash", "-c", info.Job.Command) 55 | 56 | // 执行并捕获输出 57 | output, err = cmd.CombinedOutput() 58 | 59 | // 记录任务结束时间 60 | result.EndTime = time.Now() 61 | result.Output = output 62 | result.Err = err 63 | } 64 | 65 | GlobalScheduler.PushJobResult(result) 66 | }() 67 | } 68 | 69 | func NewExecutor() (err error) { 70 | GlobalExecutor = &Executor{} 71 | return 72 | } 73 | -------------------------------------------------------------------------------- /internal/crontab/worker/jobLock.go: -------------------------------------------------------------------------------- 1 | package worker 2 | 3 | import ( 4 | "context" 5 | "github.com/coreos/etcd/clientv3" 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/internal/crontab/common" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2021-02-07 18:53 13 | * @Description: 14 | **/ 15 | 16 | type JobLocker struct { 17 | JobName string 18 | CancelFunc context.CancelFunc 19 | LeaseId clientv3.LeaseID 20 | IsLocked bool 21 | } 22 | 23 | func (jobLocker *JobLocker) TryLock() (err error) { 24 | var ( 25 | leaseGrantResp *clientv3.LeaseGrantResponse 26 | cancelCtx context.Context 27 | cancelFunc context.CancelFunc 28 | leaseId clientv3.LeaseID 29 | keepRespChan <-chan *clientv3.LeaseKeepAliveResponse 30 | txn clientv3.Txn 31 | lockKey string 32 | txnResp *clientv3.TxnResponse 33 | ) 34 | if leaseGrantResp, err = global.EtcdLease.Grant(context.TODO(), 5); err != nil { 35 | return 36 | } 37 | cancelCtx, cancelFunc = context.WithCancel(context.TODO()) 38 | leaseId = leaseGrantResp.ID 39 | 40 | if keepRespChan, err = global.EtcdLease.KeepAlive(cancelCtx, leaseId); err != nil { 41 | cancelFunc() 42 | global.EtcdLease.Revoke(context.TODO(), leaseId) 43 | return 44 | } 45 | 46 | go func() { 47 | var ( 48 | keepResp *clientv3.LeaseKeepAliveResponse 49 | ) 50 | for { 51 | select { 52 | case keepResp = <-keepRespChan: // 自动续租的应答 53 | if keepResp == nil { 54 | return 55 | } 56 | } 57 | } 58 | }() 59 | 60 | txn = global.EtcdKV.Txn(context.TODO()) 61 | lockKey = common.JOB_LOCK_DIR + jobLocker.JobName 62 | 63 | txn.If(clientv3.Compare(clientv3.CreateRevision(lockKey), "=", 0)). 64 | Then(clientv3.OpPut(lockKey, "", clientv3.WithLease(leaseId))). 65 | Else(clientv3.OpGet(lockKey)) 66 | 67 | if txnResp, err = txn.Commit(); err != nil { 68 | cancelFunc() 69 | global.EtcdLease.Revoke(context.TODO(), leaseId) 70 | return 71 | } 72 | 73 | if !txnResp.Succeeded { 74 | err = common.ERR_LOCK_ALREDAY_REQUIRED 75 | cancelFunc() 76 | global.EtcdLease.Revoke(context.TODO(), leaseId) 77 | return 78 | } 79 | // 抢锁成功 80 | jobLocker.LeaseId = leaseId 81 | jobLocker.CancelFunc = cancelFunc 82 | jobLocker.IsLocked = true 83 | return 84 | } 85 | 86 | func (jobLocker *JobLocker) Unlock() { 87 | if jobLocker.IsLocked { 88 | jobLocker.CancelFunc() // 取消我们程序自动续租的协程 89 | global.EtcdLease.Revoke(context.TODO(), jobLocker.LeaseId) // 释放租约 90 | } 91 | } 92 | 93 | func NewJobLocker(jobName string) *JobLocker { 94 | return &JobLocker{ 95 | JobName: jobName, 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /internal/crontab/worker/logSink.go: -------------------------------------------------------------------------------- 1 | package worker 2 | 3 | import ( 4 | "context" 5 | "go-crawler-distributed/global" 6 | "go-crawler-distributed/internal/crontab/common" 7 | "go.mongodb.org/mongo-driver/mongo" 8 | "log" 9 | "time" 10 | ) 11 | 12 | /** 13 | * @Author: super 14 | * @Date: 2021-02-09 14:06 15 | * @Description: 16 | **/ 17 | 18 | type LogSink struct { 19 | LogCollection *mongo.Collection 20 | LogChan chan *common.JobLog 21 | AutoCommitChan chan *common.LogBatch 22 | } 23 | 24 | var ( 25 | GlobalLogSink *LogSink 26 | ) 27 | 28 | func (l *LogSink) SaveLogs(batch *common.LogBatch) { 29 | _, err := l.LogCollection.InsertMany(context.TODO(), batch.Logs) 30 | if err != nil { 31 | log.Println("saveLogs", err) 32 | } 33 | log.Println("saveLogs") 34 | } 35 | 36 | func (l *LogSink) writeLoop() { 37 | var ( 38 | jobLog *common.JobLog 39 | logBatch *common.LogBatch // 当前的批次 40 | commitTimer *time.Timer 41 | timeoutBatch *common.LogBatch // 超时批次 42 | ) 43 | for { 44 | select { 45 | case jobLog = <-l.LogChan: 46 | if logBatch == nil { 47 | logBatch = &common.LogBatch{} 48 | // 让这个批次超时自动提交(给1秒的时间) 49 | commitTimer = time.AfterFunc( 50 | time.Duration(1000)*time.Millisecond, 51 | func(batch *common.LogBatch) func() { 52 | return func() { 53 | l.AutoCommitChan <- batch 54 | } 55 | }(logBatch), 56 | ) 57 | } 58 | 59 | // 把新日志追加到批次中 60 | logBatch.Logs = append(logBatch.Logs, jobLog) 61 | 62 | // 如果批次满了, 就立即发送 63 | if len(logBatch.Logs) >= 100 { 64 | // 发送日志 65 | l.SaveLogs(logBatch) 66 | // 清空logBatch 67 | logBatch = nil 68 | // 取消定时器 69 | commitTimer.Stop() 70 | } 71 | case timeoutBatch = <-l.AutoCommitChan: // 过期的批次 72 | // 判断过期批次是否仍旧是当前的批次 73 | if timeoutBatch != logBatch { 74 | continue // 跳过已经被提交的批次 75 | } 76 | // 把批次写入到mongo中 77 | l.SaveLogs(timeoutBatch) 78 | // 清空logBatch 79 | logBatch = nil 80 | } 81 | } 82 | } 83 | 84 | // 发送日志 85 | func (l *LogSink) Append(jobLog *common.JobLog) { 86 | select { 87 | case l.LogChan <- jobLog: 88 | default: 89 | // 队列满了就丢弃 90 | } 91 | } 92 | 93 | func NewLogSink() (err error) { 94 | GlobalLogSink = &LogSink{ 95 | LogCollection: global.MongoDBEngine.Database("cron").Collection("log"), 96 | LogChan: make(chan *common.JobLog, 1000), 97 | AutoCommitChan: make(chan *common.LogBatch, 1000), 98 | } 99 | go GlobalLogSink.writeLoop() 100 | return 101 | } 102 | -------------------------------------------------------------------------------- /internal/crontab/worker/main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "go-crawler-distributed/initConf" 6 | "go-crawler-distributed/internal/crontab/worker" 7 | "log" 8 | "time" 9 | ) 10 | 11 | /** 12 | * @Author: super 13 | * @Date: 2021-02-08 23:04 14 | * @Description: 15 | **/ 16 | 17 | func main() { 18 | initConf.Init("/Users/super/develop/go-crawler-distributed/configs/") 19 | if err := worker.NewScheduler(); err != nil { 20 | log.Printf("init NewScheduler err: %v\n", err) 21 | return 22 | } 23 | if err := worker.NewExecutor(); err != nil { 24 | log.Printf("init NewExecutor err: %v\n", err) 25 | return 26 | } 27 | if err := worker.NewLogSink(); err != nil { 28 | log.Printf("init NewLogSink err: %v\n", err) 29 | return 30 | } 31 | if err := worker.WatchJobs(context.Background()); err != nil { 32 | log.Printf("init WatchJobs err: %v\n", err) 33 | return 34 | } 35 | worker.WatchKiller(context.Background()) 36 | go worker.KeepOnline() 37 | 38 | // 正常退出 39 | for { 40 | time.Sleep(1 * time.Second) 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /internal/crontab/worker/scheduler.go: -------------------------------------------------------------------------------- 1 | package worker 2 | 3 | import ( 4 | "fmt" 5 | "go-crawler-distributed/internal/crontab/common" 6 | "time" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2021-02-08 19:47 12 | * @Description: 13 | **/ 14 | 15 | type Scheduler struct { 16 | JobEventChan chan *common.JobEvent 17 | JobPlanTable map[string]*common.JobSchedulePlan //任务调度计划表 18 | JobExecutingTable map[string]*common.JobExecuteInfo 19 | JobResultChan chan *common.JobExecuteResult // 任务结果队列 20 | } 21 | 22 | var ( 23 | GlobalScheduler *Scheduler 24 | ) 25 | 26 | //处理任务事件 27 | func (s *Scheduler) handleJobEvent(jobEvent *common.JobEvent) { 28 | var ( 29 | jobSchedulePlan *common.JobSchedulePlan 30 | jobExcuteInfo *common.JobExecuteInfo 31 | jobExcuting bool 32 | jobExisted bool 33 | err error 34 | ) 35 | switch jobEvent.EventType { 36 | //保存任务事件 37 | case common.JOB_EVENT_SAVE: 38 | if jobSchedulePlan, err = common.BuildJobSchedulePlan(jobEvent.Job); err != nil { 39 | return 40 | } 41 | s.JobPlanTable[jobEvent.Job.Name] = jobSchedulePlan 42 | //删除任务事件 43 | case common.JOB_EVENT_DELETE: 44 | if jobSchedulePlan, jobExisted = s.JobPlanTable[jobEvent.Job.Name]; jobExisted { 45 | delete(s.JobPlanTable, jobEvent.Job.Name) 46 | } 47 | case common.JOB_EVENT_KILL: 48 | //通过context取消任务 49 | if jobExcuteInfo, jobExcuting = s.JobExecutingTable[jobEvent.Job.Name]; jobExcuting { 50 | jobExcuteInfo.CancelFunc() 51 | } 52 | } 53 | } 54 | 55 | // 任务虽然被调度了,但是可能因为一些原因执行很久,加入1s执行一次的任务,单次任务执行了1分钟 56 | // 当前任务就会被调度60次却只执行1次 57 | func (s *Scheduler) TryStartJob(jobPlan *common.JobSchedulePlan) { 58 | var ( 59 | jobExcuteInfo *common.JobExecuteInfo 60 | jobExcuting bool 61 | ) 62 | if jobExcuteInfo, jobExcuting = s.JobExecutingTable[jobPlan.Job.Name]; jobExcuting { 63 | return 64 | } 65 | jobExcuteInfo = common.BuildJobExecuteInfo(jobPlan) 66 | s.JobExecutingTable[jobPlan.Job.Name] = jobExcuteInfo 67 | fmt.Println("执行任务", jobExcuteInfo.Job.Name, jobExcuteInfo.PlanTime, jobExcuteInfo.RealTime) 68 | GlobalExecutor.ExecuteJob(jobExcuteInfo) 69 | } 70 | 71 | func (s *Scheduler) TrySchedule() (scheduleAfter time.Duration) { 72 | var ( 73 | jobPlan *common.JobSchedulePlan 74 | now time.Time 75 | nearTime *time.Time 76 | ) 77 | 78 | if len(s.JobPlanTable) == 0 { 79 | scheduleAfter = 1 * time.Second 80 | return 81 | } 82 | 83 | now = time.Now() 84 | for _, jobPlan = range s.JobPlanTable { 85 | if jobPlan.NextTime.Before(now) || jobPlan.NextTime.Equal(now) { 86 | s.TryStartJob(jobPlan) 87 | jobPlan.NextTime = jobPlan.Schedule.Next(now) 88 | } 89 | 90 | if nearTime == nil || jobPlan.NextTime.Before(*nearTime) { 91 | nearTime = &jobPlan.NextTime 92 | } 93 | } 94 | scheduleAfter = (*nearTime).Sub(now) 95 | return 96 | } 97 | 98 | func (s *Scheduler) handleJobResult(result *common.JobExecuteResult) { 99 | delete(s.JobExecutingTable, result.ExecuteInfo.Job.Name) 100 | 101 | //生成执行日志 102 | if result.Err != common.ERR_LOCK_ALREDAY_REQUIRED { 103 | jobLog := &common.JobLog{ 104 | JobName: result.ExecuteInfo.Job.Name, 105 | Command: result.ExecuteInfo.Job.Command, 106 | Output: string(result.Output), 107 | PlanTime: result.ExecuteInfo.PlanTime.UnixNano() / 1000 / 1000, 108 | ScheduleTime: result.ExecuteInfo.RealTime.UnixNano() / 1000 / 1000, 109 | StartTime: result.StartTime.UnixNano() / 1000 / 1000, 110 | EndTime: result.EndTime.UnixNano() / 1000 / 1000, 111 | } 112 | if result.Err != nil { 113 | jobLog.Err = result.Err.Error() 114 | } else { 115 | jobLog.Err = "" 116 | } 117 | GlobalLogSink.Append(jobLog) 118 | } 119 | } 120 | 121 | func (s *Scheduler) schedulerLoop() { 122 | var ( 123 | jobEvent *common.JobEvent 124 | scheduleAfter time.Duration 125 | scheduleTimer *time.Timer 126 | jobResult *common.JobExecuteResult 127 | ) 128 | 129 | scheduleAfter = s.TrySchedule() 130 | scheduleTimer = time.NewTimer(scheduleAfter) 131 | 132 | for { 133 | select { 134 | //监听任务变化 135 | case jobEvent = <-s.JobEventChan: 136 | //对内存中的任务进行增删改查 137 | s.handleJobEvent(jobEvent) 138 | case <-scheduleTimer.C: 139 | case jobResult = <-s.JobResultChan: //监听任务执行结果 140 | s.handleJobResult(jobResult) 141 | } 142 | scheduleAfter = s.TrySchedule() 143 | scheduleTimer.Reset(scheduleAfter) 144 | } 145 | } 146 | 147 | func (s *Scheduler) PushJobEvent(jobEvent *common.JobEvent) { 148 | s.JobEventChan <- jobEvent 149 | } 150 | 151 | func (s *Scheduler) PushJobResult(jobResult *common.JobExecuteResult) { 152 | s.JobResultChan <- jobResult 153 | } 154 | 155 | func NewScheduler() (err error) { 156 | GlobalScheduler = &Scheduler{ 157 | JobEventChan: make(chan *common.JobEvent, 10000), 158 | JobPlanTable: make(map[string]*common.JobSchedulePlan), 159 | JobExecutingTable: make(map[string]*common.JobExecuteInfo), 160 | JobResultChan: make(chan *common.JobExecuteResult, 1000), 161 | } 162 | go GlobalScheduler.schedulerLoop() 163 | return 164 | } 165 | -------------------------------------------------------------------------------- /internal/dao/article.go: -------------------------------------------------------------------------------- 1 | package dao 2 | 3 | import ( 4 | "github.com/jinzhu/gorm" 5 | "go-crawler-distributed/internal/model" 6 | ) 7 | 8 | /** 9 | * @Author: super 10 | * @Date: 2021-01-05 15:55 11 | * @Description: 12 | **/ 13 | 14 | type IArticle interface { 15 | SaveArticle(article model.Article) (string, error) 16 | } 17 | 18 | type ArticleManager struct { 19 | table string 20 | conn *gorm.DB 21 | } 22 | 23 | func NewArticleManager(table string, conn *gorm.DB) IArticle { 24 | return &ArticleManager{table: table, conn: conn} 25 | } 26 | 27 | func (m *ArticleManager) SaveArticle(article model.Article) (string, error) { 28 | return "", nil 29 | } 30 | -------------------------------------------------------------------------------- /internal/dao/book.go: -------------------------------------------------------------------------------- 1 | package dao 2 | 3 | import ( 4 | "github.com/jinzhu/gorm" 5 | "go-crawler-distributed/internal/model" 6 | ) 7 | 8 | /** 9 | * @Author: super 10 | * @Date: 2021-01-05 18:55 11 | * @Description: 12 | **/ 13 | 14 | type IBook interface { 15 | SaveBook(book model.Book) (string, error) 16 | } 17 | 18 | type BookManager struct { 19 | table string 20 | conn *gorm.DB 21 | } 22 | 23 | func NewBookManager(table string, conn *gorm.DB) IBook { 24 | return &BookManager{table: table, conn: conn} 25 | } 26 | 27 | func (m *BookManager) SaveBook(book model.Book) (string, error) { 28 | return "", nil 29 | } 30 | -------------------------------------------------------------------------------- /internal/dao/dao.go: -------------------------------------------------------------------------------- 1 | package dao 2 | 3 | import "github.com/jinzhu/gorm" 4 | 5 | /** 6 | * @Author: super 7 | * @Date: 2020-09-22 09:35 8 | * @Description: 用于统一配置DB引擎 9 | **/ 10 | 11 | type Dao struct { 12 | engine *gorm.DB 13 | } 14 | 15 | func New(engine *gorm.DB) *Dao { 16 | return &Dao{engine: engine} 17 | } 18 | -------------------------------------------------------------------------------- /internal/dao/forbes.go: -------------------------------------------------------------------------------- 1 | package dao 2 | 3 | import ( 4 | "errors" 5 | "github.com/jinzhu/gorm" 6 | "go-crawler-distributed/pkg/app" 7 | 8 | "go-crawler-distributed/internal/model" 9 | ) 10 | 11 | /** 12 | * @Author: super 13 | * @Date: 2020-12-30 11:21 14 | * @Description: 15 | **/ 16 | 17 | type Forbes struct { 18 | NameEn string `json:"name_en"` 19 | Wealth int `json:"wealth"` 20 | SourceOfWealth string `json:"source_of_wealth"` 21 | Region string `json:"region"` 22 | ModifiedOn string `json:"modified_on"` 23 | ID string `json:"id"` 24 | Rank int `json:"rank"` 25 | Name string `json:"name"` 26 | } 27 | 28 | type IForbes interface { 29 | SelectAll() ([]*Forbes, error) 30 | SelectList(page, pageSize int) ([]*Forbes, error) 31 | } 32 | 33 | type ForbesManager struct { 34 | table string 35 | conn *gorm.DB 36 | } 37 | 38 | func NewForbesManager(table string, conn *gorm.DB) IForbes { 39 | return &ForbesManager{table: table, conn: conn} 40 | } 41 | 42 | func (m *ForbesManager) SelectAll() ([]*Forbes, error) { 43 | var f []*model.Forbes 44 | if err := m.conn.Find(&f).Error; err != nil { 45 | return nil, errors.New("select all forbes error") 46 | } 47 | forbess := make([]*Forbes, 0) 48 | for _, forbes := range f { 49 | temp := &Forbes{ 50 | ID: forbes.ID, 51 | Rank: forbes.Rank, 52 | Name: forbes.Name, 53 | NameEn: forbes.NameEn, 54 | Wealth: forbes.Wealth, 55 | SourceOfWealth: forbes.SourceOfWealth, 56 | Region: forbes.Region, 57 | ModifiedOn: forbes.ModifiedOn, 58 | } 59 | forbess = append(forbess, temp) 60 | } 61 | return forbess, nil 62 | } 63 | 64 | func (m *ForbesManager) SelectList(page, pageSize int) ([]*Forbes, error) { 65 | pageOffset := app.GetPageOffset(page, pageSize) 66 | if pageOffset < 0 && pageSize < 0 { 67 | pageOffset = 0 68 | pageSize = 5 69 | } 70 | fields := []string{"id", "rank", "name", "name_en", "wealth", "source_of_wealth", "region", "modified_on"} 71 | rows, err := m.conn.Offset(pageOffset).Limit(pageSize).Select(fields).Table(m.table).Rows() 72 | if err != nil { 73 | return nil, err 74 | } 75 | defer rows.Close() 76 | 77 | var forbess []*Forbes 78 | for rows.Next() { 79 | forbes := &Forbes{} 80 | if err := rows.Scan(&forbes.ID, 81 | &forbes.Rank, 82 | &forbes.Name, 83 | &forbes.NameEn, 84 | &forbes.Wealth, 85 | &forbes.SourceOfWealth, 86 | &forbes.Region, 87 | &forbes.ModifiedOn); err != nil { 88 | return nil, err 89 | } 90 | forbess = append(forbess, forbes) 91 | } 92 | return forbess, nil 93 | } 94 | -------------------------------------------------------------------------------- /internal/middleware/access_log.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "bytes" 5 | "go-crawler-distributed/global" 6 | "go-crawler-distributed/pkg/logger" 7 | "time" 8 | 9 | "github.com/gin-gonic/gin" 10 | ) 11 | 12 | /** 13 | * @Author: super 14 | * @Date: 2020-09-23 20:33 15 | * @Description: 处理访问日志中间件,记录请求参数,响应与响应时间 16 | **/ 17 | 18 | type AccessLogWriter struct { 19 | gin.ResponseWriter 20 | body *bytes.Buffer 21 | } 22 | 23 | func (w AccessLogWriter) Write(p []byte) (int, error) { 24 | if n, err := w.body.Write(p); err != nil { 25 | return n, err 26 | } 27 | return w.ResponseWriter.Write(p) 28 | } 29 | 30 | func AccessLog() gin.HandlerFunc { 31 | return func(c *gin.Context) { 32 | bodyWriter := &AccessLogWriter{body: bytes.NewBufferString(""), ResponseWriter: c.Writer} 33 | c.Writer = bodyWriter 34 | 35 | beginTime := time.Now().Unix() 36 | c.Next() 37 | endTime := time.Now().Unix() 38 | 39 | fields := logger.Fields{ 40 | "request": c.Request.PostForm.Encode(), 41 | "response": bodyWriter.body.String(), 42 | } 43 | s := "access log: method: %s, status_code: %d, " + 44 | "begin_time: %d, end_time: %d" 45 | global.Logger.WithFields(fields).Infof(c, s, 46 | c.Request.Method, 47 | bodyWriter.Status(), 48 | beginTime, 49 | endTime, 50 | ) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /internal/middleware/context_timeout.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/gin-gonic/gin" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2020-09-23 21:01 13 | * @Description: 用于处理响应超时,请求超过规定时间则停止执行 14 | **/ 15 | 16 | func ContextTimeout(t time.Duration) func(c *gin.Context) { 17 | return func(c *gin.Context) { 18 | ctx, cancel := context.WithTimeout(c.Request.Context(), t) 19 | defer cancel() 20 | 21 | c.Request = c.Request.WithContext(ctx) 22 | c.Next() 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /internal/middleware/recovery.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "fmt" 5 | "go-crawler-distributed/global" 6 | "go-crawler-distributed/pkg/app" 7 | "go-crawler-distributed/pkg/email" 8 | "go-crawler-distributed/pkg/errcode" 9 | "time" 10 | 11 | "github.com/gin-gonic/gin" 12 | ) 13 | 14 | /** 15 | * @Author: super 16 | * @Date: 2020-09-23 20:45 17 | * @Description: 自定义recovery,主要用于记录异常发生的时间以及错误信息 18 | **/ 19 | 20 | func Recovery() gin.HandlerFunc { 21 | mailer := email.NewEmail(&email.SMTPInfo{ 22 | Host: global.EmailSetting.Host, 23 | Port: global.EmailSetting.Port, 24 | IsSSL: global.EmailSetting.IsSSL, 25 | UserName: global.EmailSetting.UserName, 26 | Password: global.EmailSetting.Password, 27 | From: global.EmailSetting.From, 28 | }) 29 | return func(c *gin.Context) { 30 | defer func() { 31 | if err := recover(); err != nil { 32 | global.Logger.WithCallersFrames().Errorf(c, "panic recover err: %v", err) 33 | 34 | err := mailer.SendMail( 35 | global.EmailSetting.To, 36 | fmt.Sprintf("异常抛出,发生时间: %d", time.Now().Unix()), 37 | fmt.Sprintf("错误信息: %v", err), 38 | ) 39 | if err != nil { 40 | global.Logger.Panicf(c, "mail.SendMail err: %v", err) 41 | } 42 | 43 | app.NewResponse(c).ToErrorResponse(errcode.ServerError) 44 | c.Abort() 45 | } 46 | }() 47 | c.Next() 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /internal/middleware/tracer.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "context" 5 | "go-crawler-distributed/global" 6 | 7 | "github.com/gin-gonic/gin" 8 | "github.com/opentracing/opentracing-go" 9 | "github.com/opentracing/opentracing-go/ext" 10 | "github.com/uber/jaeger-client-go" 11 | ) 12 | 13 | /** 14 | * @Author: super 15 | * @Date: 2020-09-24 08:14 16 | * @Description: 调用链追踪中间件,调用结果可在jaeger ui看到 17 | **/ 18 | 19 | func Tracing() func(c *gin.Context) { 20 | return func(c *gin.Context) { 21 | var newCtx context.Context 22 | var span opentracing.Span 23 | spanCtx, err := opentracing.GlobalTracer().Extract(opentracing.HTTPHeaders, opentracing.HTTPHeadersCarrier(c.Request.Header)) 24 | if err != nil { 25 | span, newCtx = opentracing.StartSpanFromContextWithTracer(c.Request.Context(), global.Tracer, c.Request.URL.Path) 26 | } else { 27 | span, newCtx = opentracing.StartSpanFromContextWithTracer( 28 | c.Request.Context(), 29 | global.Tracer, 30 | c.Request.URL.Path, 31 | opentracing.ChildOf(spanCtx), 32 | opentracing.Tag{Key: string(ext.Component), Value: "HTTP"}, 33 | ) 34 | } 35 | defer span.Finish() 36 | 37 | var traceID string 38 | var spanID string 39 | var spanContext = span.Context() 40 | switch spanContext.(type) { 41 | case jaeger.SpanContext: 42 | jaegerContext := spanContext.(jaeger.SpanContext) 43 | traceID = jaegerContext.TraceID().String() 44 | spanID = jaegerContext.SpanID().String() 45 | } 46 | c.Set("X-Trace-ID", traceID) 47 | c.Set("X-Span-ID", spanID) 48 | c.Request = c.Request.WithContext(newCtx) 49 | c.Next() 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /internal/middleware/translations.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "github.com/gin-gonic/gin/binding" 6 | "github.com/go-playground/locales/en" 7 | "github.com/go-playground/locales/zh" 8 | "github.com/go-playground/locales/zh_Hant_TW" 9 | "github.com/go-playground/universal-translator" 10 | validator "github.com/go-playground/validator/v10" 11 | en_translations "github.com/go-playground/validator/v10/translations/en" 12 | zh_translations "github.com/go-playground/validator/v10/translations/zh" 13 | ) 14 | 15 | /** 16 | * @Author: super 17 | * @Date: 2020-09-18 15:06 18 | * @Description: 翻译中间件 19 | **/ 20 | 21 | func Translations() gin.HandlerFunc { 22 | return func(c *gin.Context) { 23 | uni := ut.New(en.New(), zh.New(), zh_Hant_TW.New()) 24 | locale := c.GetHeader("locale") 25 | trans, _ := uni.GetTranslator(locale) 26 | v, ok := binding.Validator.Engine().(*validator.Validate) 27 | if ok { 28 | switch locale { 29 | case "zh": 30 | _ = zh_translations.RegisterDefaultTranslations(v, trans) 31 | break 32 | case "en": 33 | _ = en_translations.RegisterDefaultTranslations(v, trans) 34 | break 35 | default: 36 | _ = zh_translations.RegisterDefaultTranslations(v, trans) 37 | break 38 | } 39 | c.Set("trans", trans) 40 | } 41 | 42 | c.Next() 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /internal/model/article.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import "fmt" 4 | 5 | /** 6 | * @Author: super 7 | * @Date: 2020-09-01 17:23 8 | * @Description: 9 | **/ 10 | 11 | type Article struct { 12 | Title string `json:"title"` 13 | Url string `json:"url"` 14 | Genres []string `json:"genres"` 15 | Content string `json:"content"` 16 | } 17 | 18 | // TableName sets the insert table name for this struct type 19 | func (a *Article) TableName() string { 20 | return "articles" 21 | } 22 | 23 | func (article Article) String() string { 24 | return fmt.Sprintf("title: %s\n"+ 25 | "url: %s\n"+ 26 | "geners: %v\n"+ 27 | "content: %s\n", 28 | article.Title, 29 | article.Url, 30 | article.Genres, 31 | article.Content) 32 | } 33 | -------------------------------------------------------------------------------- /internal/model/article_easyjson.go: -------------------------------------------------------------------------------- 1 | // Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT. 2 | 3 | package model 4 | 5 | import ( 6 | json "encoding/json" 7 | easyjson "github.com/mailru/easyjson" 8 | jlexer "github.com/mailru/easyjson/jlexer" 9 | jwriter "github.com/mailru/easyjson/jwriter" 10 | ) 11 | 12 | // suppress unused package warning 13 | var ( 14 | _ *json.RawMessage 15 | _ *jlexer.Lexer 16 | _ *jwriter.Writer 17 | _ easyjson.Marshaler 18 | ) 19 | 20 | func easyjson6de889b8DecodeGoCrawlerDistributedModel(in *jlexer.Lexer, out *Article) { 21 | isTopLevel := in.IsStart() 22 | if in.IsNull() { 23 | if isTopLevel { 24 | in.Consumed() 25 | } 26 | in.Skip() 27 | return 28 | } 29 | in.Delim('{') 30 | for !in.IsDelim('}') { 31 | key := in.UnsafeFieldName(false) 32 | in.WantColon() 33 | if in.IsNull() { 34 | in.Skip() 35 | in.WantComma() 36 | continue 37 | } 38 | switch key { 39 | case "title": 40 | out.Title = string(in.String()) 41 | case "url": 42 | out.Url = string(in.String()) 43 | case "genres": 44 | if in.IsNull() { 45 | in.Skip() 46 | out.Genres = nil 47 | } else { 48 | in.Delim('[') 49 | if out.Genres == nil { 50 | if !in.IsDelim(']') { 51 | out.Genres = make([]string, 0, 4) 52 | } else { 53 | out.Genres = []string{} 54 | } 55 | } else { 56 | out.Genres = (out.Genres)[:0] 57 | } 58 | for !in.IsDelim(']') { 59 | var v1 string 60 | v1 = string(in.String()) 61 | out.Genres = append(out.Genres, v1) 62 | in.WantComma() 63 | } 64 | in.Delim(']') 65 | } 66 | case "content": 67 | out.Content = string(in.String()) 68 | default: 69 | in.SkipRecursive() 70 | } 71 | in.WantComma() 72 | } 73 | in.Delim('}') 74 | if isTopLevel { 75 | in.Consumed() 76 | } 77 | } 78 | func easyjson6de889b8EncodeGoCrawlerDistributedModel(out *jwriter.Writer, in Article) { 79 | out.RawByte('{') 80 | first := true 81 | _ = first 82 | { 83 | const prefix string = ",\"title\":" 84 | out.RawString(prefix[1:]) 85 | out.String(string(in.Title)) 86 | } 87 | { 88 | const prefix string = ",\"url\":" 89 | out.RawString(prefix) 90 | out.String(string(in.Url)) 91 | } 92 | { 93 | const prefix string = ",\"genres\":" 94 | out.RawString(prefix) 95 | if in.Genres == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 { 96 | out.RawString("null") 97 | } else { 98 | out.RawByte('[') 99 | for v2, v3 := range in.Genres { 100 | if v2 > 0 { 101 | out.RawByte(',') 102 | } 103 | out.String(string(v3)) 104 | } 105 | out.RawByte(']') 106 | } 107 | } 108 | { 109 | const prefix string = ",\"content\":" 110 | out.RawString(prefix) 111 | out.String(string(in.Content)) 112 | } 113 | out.RawByte('}') 114 | } 115 | 116 | // MarshalJSON supports json.Marshaler interface 117 | func (v Article) MarshalJSON() ([]byte, error) { 118 | w := jwriter.Writer{} 119 | easyjson6de889b8EncodeGoCrawlerDistributedModel(&w, v) 120 | return w.Buffer.BuildBytes(), w.Error 121 | } 122 | 123 | // MarshalEasyJSON supports easyjson.Marshaler interface 124 | func (v Article) MarshalEasyJSON(w *jwriter.Writer) { 125 | easyjson6de889b8EncodeGoCrawlerDistributedModel(w, v) 126 | } 127 | 128 | // UnmarshalJSON supports json.Unmarshaler interface 129 | func (v *Article) UnmarshalJSON(data []byte) error { 130 | r := jlexer.Lexer{Data: data} 131 | easyjson6de889b8DecodeGoCrawlerDistributedModel(&r, v) 132 | return r.Error() 133 | } 134 | 135 | // UnmarshalEasyJSON supports easyjson.Unmarshaler interface 136 | func (v *Article) UnmarshalEasyJSON(l *jlexer.Lexer) { 137 | easyjson6de889b8DecodeGoCrawlerDistributedModel(l, v) 138 | } 139 | -------------------------------------------------------------------------------- /internal/model/book.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import "fmt" 4 | 5 | /** 6 | * @Author: super 7 | * @Date: 2020-08-14 15:29 8 | * @Description: 9 | **/ 10 | type Book struct { 11 | BookID int `gorm:"column:book_id" gorm:"PRIMARY_KEY" json:"book_id"` 12 | Title string `gorm:"column:title" json:"title"` 13 | SubTitle string `gorm:"column:sub_title" json:"sub_title"` 14 | Img string `gorm:"column:img" json:"img"` 15 | Author string `gorm:"column:author" json:"author"` 16 | Publish string `gorm:"column:publish" json:"publish"` 17 | Producer string `gorm:"column:producer" json:"producer"` 18 | PublishYear string `gorm:"column:publish_year" gorm:"type:date" json:"publish_year"` 19 | Pages int `gorm:"column:pages" json:"pages"` 20 | Price float64 `gorm:"column:price" json:"price"` 21 | Layout string `gorm:"column:layout" json:"layout"` 22 | Series string `gorm:"column:series" json:"series"` 23 | ISBN string `gorm:"column:isbn" json:"isbn"` 24 | Score float64 `gorm:"column:score" json:"score"` 25 | OriginalName string `gorm:"column:original_name" json:"original_name"` 26 | Comments int `gorm:"column:comments" json:"comments"` 27 | CommentUrl string `gorm:"column:comment_url" json:"comment_url"` 28 | Url string `gorm:"column:url" json:"url"` 29 | } 30 | 31 | // TableName sets the insert table name for this struct type 32 | func (book *Book) TableName() string { 33 | return "books" 34 | } 35 | 36 | func (book Book) String() string { 37 | return fmt.Sprintf("book_id: %d\n"+ 38 | "title: %s\n"+ 39 | "sub_title: %s\n"+ 40 | "img: %s\n"+ 41 | "author: %s\n"+ 42 | "publish: %s\n"+ 43 | "producer: %s\n"+ 44 | "publish_year: %s\n"+ 45 | "pages: %d\n"+ 46 | "price: %f\n"+ 47 | "layout: %s\n"+ 48 | "series: %s\n"+ 49 | "isbn: %s\n"+ 50 | "score: %f\n"+ 51 | "original_name: %s\n"+ 52 | "comments: %d\n"+ 53 | "comment_url: %s\n"+ 54 | "url: %s", 55 | book.BookID, book.Title, book.SubTitle, book.Img, 56 | book.Author, book.Publish, book.Producer, book.PublishYear, 57 | book.Pages, book.Price, book.Layout, book.Series, book.ISBN, 58 | book.Score, book.OriginalName, book.Comments, book.CommentUrl, book.Url) 59 | } 60 | -------------------------------------------------------------------------------- /internal/model/book_easyjson.go: -------------------------------------------------------------------------------- 1 | // Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT. 2 | 3 | package model 4 | 5 | import ( 6 | json "encoding/json" 7 | easyjson "github.com/mailru/easyjson" 8 | jlexer "github.com/mailru/easyjson/jlexer" 9 | jwriter "github.com/mailru/easyjson/jwriter" 10 | ) 11 | 12 | // suppress unused package warning 13 | var ( 14 | _ *json.RawMessage 15 | _ *jlexer.Lexer 16 | _ *jwriter.Writer 17 | _ easyjson.Marshaler 18 | ) 19 | 20 | func easyjson67646b7bDecodeGoCrawlerDistributedModel(in *jlexer.Lexer, out *Book) { 21 | isTopLevel := in.IsStart() 22 | if in.IsNull() { 23 | if isTopLevel { 24 | in.Consumed() 25 | } 26 | in.Skip() 27 | return 28 | } 29 | in.Delim('{') 30 | for !in.IsDelim('}') { 31 | key := in.UnsafeFieldName(false) 32 | in.WantColon() 33 | if in.IsNull() { 34 | in.Skip() 35 | in.WantComma() 36 | continue 37 | } 38 | switch key { 39 | case "book_id": 40 | out.BookID = int(in.Int()) 41 | case "title": 42 | out.Title = string(in.String()) 43 | case "sub_title": 44 | out.SubTitle = string(in.String()) 45 | case "img": 46 | out.Img = string(in.String()) 47 | case "author": 48 | out.Author = string(in.String()) 49 | case "publish": 50 | out.Publish = string(in.String()) 51 | case "producer": 52 | out.Producer = string(in.String()) 53 | case "publish_year": 54 | out.PublishYear = string(in.String()) 55 | case "pages": 56 | out.Pages = int(in.Int()) 57 | case "price": 58 | out.Price = float64(in.Float64()) 59 | case "layout": 60 | out.Layout = string(in.String()) 61 | case "series": 62 | out.Series = string(in.String()) 63 | case "isbn": 64 | out.ISBN = string(in.String()) 65 | case "score": 66 | out.Score = float64(in.Float64()) 67 | case "original_name": 68 | out.OriginalName = string(in.String()) 69 | case "comments": 70 | out.Comments = int(in.Int()) 71 | case "comment_url": 72 | out.CommentUrl = string(in.String()) 73 | case "url": 74 | out.Url = string(in.String()) 75 | default: 76 | in.SkipRecursive() 77 | } 78 | in.WantComma() 79 | } 80 | in.Delim('}') 81 | if isTopLevel { 82 | in.Consumed() 83 | } 84 | } 85 | func easyjson67646b7bEncodeGoCrawlerDistributedModel(out *jwriter.Writer, in Book) { 86 | out.RawByte('{') 87 | first := true 88 | _ = first 89 | { 90 | const prefix string = ",\"book_id\":" 91 | out.RawString(prefix[1:]) 92 | out.Int(int(in.BookID)) 93 | } 94 | { 95 | const prefix string = ",\"title\":" 96 | out.RawString(prefix) 97 | out.String(string(in.Title)) 98 | } 99 | { 100 | const prefix string = ",\"sub_title\":" 101 | out.RawString(prefix) 102 | out.String(string(in.SubTitle)) 103 | } 104 | { 105 | const prefix string = ",\"img\":" 106 | out.RawString(prefix) 107 | out.String(string(in.Img)) 108 | } 109 | { 110 | const prefix string = ",\"author\":" 111 | out.RawString(prefix) 112 | out.String(string(in.Author)) 113 | } 114 | { 115 | const prefix string = ",\"publish\":" 116 | out.RawString(prefix) 117 | out.String(string(in.Publish)) 118 | } 119 | { 120 | const prefix string = ",\"producer\":" 121 | out.RawString(prefix) 122 | out.String(string(in.Producer)) 123 | } 124 | { 125 | const prefix string = ",\"publish_year\":" 126 | out.RawString(prefix) 127 | out.String(string(in.PublishYear)) 128 | } 129 | { 130 | const prefix string = ",\"pages\":" 131 | out.RawString(prefix) 132 | out.Int(int(in.Pages)) 133 | } 134 | { 135 | const prefix string = ",\"price\":" 136 | out.RawString(prefix) 137 | out.Float64(float64(in.Price)) 138 | } 139 | { 140 | const prefix string = ",\"layout\":" 141 | out.RawString(prefix) 142 | out.String(string(in.Layout)) 143 | } 144 | { 145 | const prefix string = ",\"series\":" 146 | out.RawString(prefix) 147 | out.String(string(in.Series)) 148 | } 149 | { 150 | const prefix string = ",\"isbn\":" 151 | out.RawString(prefix) 152 | out.String(string(in.ISBN)) 153 | } 154 | { 155 | const prefix string = ",\"score\":" 156 | out.RawString(prefix) 157 | out.Float64(float64(in.Score)) 158 | } 159 | { 160 | const prefix string = ",\"original_name\":" 161 | out.RawString(prefix) 162 | out.String(string(in.OriginalName)) 163 | } 164 | { 165 | const prefix string = ",\"comments\":" 166 | out.RawString(prefix) 167 | out.Int(int(in.Comments)) 168 | } 169 | { 170 | const prefix string = ",\"comment_url\":" 171 | out.RawString(prefix) 172 | out.String(string(in.CommentUrl)) 173 | } 174 | { 175 | const prefix string = ",\"url\":" 176 | out.RawString(prefix) 177 | out.String(string(in.Url)) 178 | } 179 | out.RawByte('}') 180 | } 181 | 182 | // MarshalJSON supports json.Marshaler interface 183 | func (v Book) MarshalJSON() ([]byte, error) { 184 | w := jwriter.Writer{} 185 | easyjson67646b7bEncodeGoCrawlerDistributedModel(&w, v) 186 | return w.Buffer.BuildBytes(), w.Error 187 | } 188 | 189 | // MarshalEasyJSON supports easyjson.Marshaler interface 190 | func (v Book) MarshalEasyJSON(w *jwriter.Writer) { 191 | easyjson67646b7bEncodeGoCrawlerDistributedModel(w, v) 192 | } 193 | 194 | // UnmarshalJSON supports json.Unmarshaler interface 195 | func (v *Book) UnmarshalJSON(data []byte) error { 196 | r := jlexer.Lexer{Data: data} 197 | easyjson67646b7bDecodeGoCrawlerDistributedModel(&r, v) 198 | return r.Error() 199 | } 200 | 201 | // UnmarshalEasyJSON supports easyjson.Unmarshaler interface 202 | func (v *Book) UnmarshalEasyJSON(l *jlexer.Lexer) { 203 | easyjson67646b7bDecodeGoCrawlerDistributedModel(l, v) 204 | } 205 | -------------------------------------------------------------------------------- /internal/model/db.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-11-18 15:07 6 | * @Description: 7 | **/ 8 | 9 | type Model struct { 10 | CreatedOn string `gorm:"column:created_on" json:"created_on"` 11 | CreatedBy string `gorm:"column:created_by" json:"created_by"` 12 | DeletedOn string `gorm:"column:deleted_on" json:"deleted_on"` 13 | ModifiedBy string `gorm:"column:modified_by" json:"modified_by"` 14 | ModifiedOn string `gorm:"column:modified_on" json:"modified_on"` 15 | ID string `gorm:"column:id;primary_key" json:"id"` 16 | IsDel int `gorm:"column:is_del" json:"is_del"` 17 | } 18 | -------------------------------------------------------------------------------- /internal/model/forbes.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-12-30 11:18 6 | * @Description: 福布斯排行榜 7 | **/ 8 | 9 | type Forbes struct { 10 | NameEn string `gorm:"column:name_en" json:"name_en"` 11 | Wealth int `gorm:"column:wealth" json:"wealth"` 12 | SourceOfWealth string `gorm:"column:source_of_wealth" json:"source_of_wealth"` 13 | Region string `gorm:"column:region" json:"region"` 14 | ModifiedOn string `gorm:"column:modified_on" json:"modified_on"` 15 | ID string `gorm:"column:id;primary_key" json:"id"` 16 | Rank int `gorm:"column:rank" json:"rank"` 17 | Name string `gorm:"column:name" json:"name"` 18 | } 19 | 20 | // TableName sets the insert table name for this struct type 21 | func (f *Forbes) TableName() string { 22 | return "forbes_list" 23 | } 24 | -------------------------------------------------------------------------------- /internal/routers/job/job.go: -------------------------------------------------------------------------------- 1 | package job 2 | 3 | import ( 4 | "fmt" 5 | "github.com/gin-gonic/gin" 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/internal/crontab/common" 8 | "go-crawler-distributed/internal/crontab/master" 9 | "go-crawler-distributed/internal/service" 10 | "go-crawler-distributed/pkg/app" 11 | "go-crawler-distributed/pkg/errcode" 12 | "net/http" 13 | ) 14 | 15 | /** 16 | * @Author: super 17 | * @Date: 2021-02-06 16:44 18 | * @Description: 19 | **/ 20 | 21 | // 将任务保存到etcd中 22 | func SaveJob(c *gin.Context) { 23 | name, _ := c.GetPostForm("name") 24 | fmt.Println("1", c.Param("name")) 25 | fmt.Println("2", name) 26 | fmt.Println("3", c.PostForm("name")) 27 | fmt.Println() 28 | param := service.SaveJobRequest{} 29 | response := app.NewResponse(c) 30 | valid, errs := app.BindAndValid(c, ¶m) 31 | if !valid { 32 | global.Logger.Errorf(c, "app.BindAndValid errs: %v", errs) 33 | response.ToErrorResponse(errcode.InvalidParams.WithDetails(errs.Errors()...)) 34 | return 35 | } 36 | job := &common.Job{ 37 | Name: param.Name, 38 | Command: param.Command, 39 | CronExpr: param.CronExpr, 40 | } 41 | 42 | oldJob, err := master.EtcdSaveJob(c, job) 43 | if err != nil { 44 | global.Logger.Errorf(c, "app.EtcdSaveJob err: %v", err) 45 | response.ToErrorResponse(errcode.ErrorSaveFail) 46 | return 47 | } 48 | response.ToResponse(oldJob, "存储任务成功", http.StatusOK) 49 | } 50 | 51 | func DeleteJob(c *gin.Context) { 52 | param := service.DeleteJobRequest{} 53 | response := app.NewResponse(c) 54 | valid, errs := app.BindAndValid(c, ¶m) 55 | if !valid { 56 | global.Logger.Errorf(c, "app.BindAndValid errs: %v", errs) 57 | response.ToErrorResponse(errcode.InvalidParams.WithDetails(errs.Errors()...)) 58 | return 59 | } 60 | 61 | oldJob, err := master.EtcdDeleteJob(c, param.Name) 62 | if err != nil { 63 | global.Logger.Errorf(c, "app.EtcdDeleteJob err: %v", err) 64 | response.ToErrorResponse(errcode.ErrorDeleteFail) 65 | return 66 | } 67 | response.ToResponse(oldJob, "删除任务成功", http.StatusOK) 68 | } 69 | 70 | func ListJobs(c *gin.Context) { 71 | response := app.NewResponse(c) 72 | jobs, err := master.EtcdListJobs(c) 73 | if err != nil { 74 | global.Logger.Errorf(c, "app.EtcdListJobs err: %v", err) 75 | response.ToErrorResponse(errcode.ErrorListFail) 76 | return 77 | } 78 | response.ToResponse(jobs, "获取任务列表成功", http.StatusOK) 79 | } 80 | 81 | func KillJob(c *gin.Context) { 82 | param := service.KillJobRequest{} 83 | response := app.NewResponse(c) 84 | valid, errs := app.BindAndValid(c, ¶m) 85 | if !valid { 86 | global.Logger.Errorf(c, "app.BindAndValid errs: %v", errs) 87 | response.ToErrorResponse(errcode.InvalidParams.WithDetails(errs.Errors()...)) 88 | return 89 | } 90 | 91 | err := master.EtcdKillJob(c, param.Name) 92 | if err != nil { 93 | global.Logger.Errorf(c, "app.EtcdKillJob err: %v", err) 94 | response.ToErrorResponse(errcode.ErrorDeleteFail) 95 | return 96 | } 97 | response.ToResponse(gin.H{}, "杀死任务成功", http.StatusOK) 98 | } 99 | 100 | func JobLog(c *gin.Context) { 101 | param := service.JobLogRequest{} 102 | pager := app.Pager{Page: app.GetPage(c), PageSize: app.GetPageSize(c)} 103 | response := app.NewResponse(c) 104 | valid, errs := app.BindAndValid(c, ¶m) 105 | if !valid { 106 | global.Logger.Errorf(c, "app.BindAndValid errs: %v", errs) 107 | response.ToErrorResponse(errcode.InvalidParams.WithDetails(errs.Errors()...)) 108 | return 109 | } 110 | result, err := service.GetLogList(¶m, &pager) 111 | if err != nil { 112 | global.Logger.Errorf(c, "service.GetLogList err: %v", err) 113 | response.ToErrorResponse(errcode.ErrorLogListFail) 114 | return 115 | } 116 | response.ToResponse(result, "获取日志列表成功", http.StatusOK) 117 | } 118 | 119 | func WorkerList(c *gin.Context) { 120 | response := app.NewResponse(c) 121 | workers, err := master.ListWorkers() 122 | if err != nil { 123 | global.Logger.Errorf(c, "appWorkerList err: %v", err) 124 | response.ToErrorResponse(errcode.ErrorWorkerListFail) 125 | return 126 | } 127 | response.ToResponse(workers, "获取worker列表成功", http.StatusOK) 128 | } -------------------------------------------------------------------------------- /internal/routers/router.go: -------------------------------------------------------------------------------- 1 | package routers 2 | 3 | import ( 4 | "github.com/gin-contrib/cors" 5 | "github.com/gin-gonic/gin" 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/internal/middleware" 8 | "go-crawler-distributed/internal/routers/job" 9 | "go-crawler-distributed/internal/routers/sd" 10 | ) 11 | 12 | /** 13 | * @Author: super 14 | * @Date: 2021-02-06 16:34 15 | * @Description: 16 | **/ 17 | 18 | func NewRouter() *gin.Engine { 19 | r := gin.New() 20 | r.Use(cors.Default()) 21 | if global.ServerSetting.RunMode == "debug" { 22 | r.Use(gin.Logger()) 23 | r.Use(gin.Recovery()) 24 | } else { 25 | r.Use(middleware.AccessLog()) 26 | r.Use(middleware.Recovery()) 27 | } 28 | r.Use(middleware.Tracing()) 29 | r.Use(middleware.ContextTimeout(global.AppSetting.DefaultContextTimeout)) 30 | r.Use(middleware.Translations()) 31 | 32 | svcd := r.Group("/sd") 33 | { 34 | svcd.GET("/health", sd.HealthCheck) 35 | svcd.GET("/disk", sd.DiskCheck) 36 | svcd.GET("/cpu", sd.CPUCheck) 37 | svcd.GET("/ram", sd.RAMCheck) 38 | } 39 | 40 | jobGroup := r.Group("/job") 41 | { 42 | jobGroup.POST("/save", job.SaveJob) 43 | jobGroup.POST("/delete", job.DeleteJob) 44 | jobGroup.GET("/list", job.ListJobs) 45 | jobGroup.POST("/kill", job.KillJob) 46 | jobGroup.GET("/log", job.JobLog) 47 | } 48 | r.GET("/worker/list", job.WorkerList) 49 | 50 | return r 51 | } 52 | -------------------------------------------------------------------------------- /internal/routers/sd/check.go: -------------------------------------------------------------------------------- 1 | package sd 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | 7 | "github.com/gin-gonic/gin" 8 | "github.com/shirou/gopsutil/cpu" 9 | "github.com/shirou/gopsutil/disk" 10 | "github.com/shirou/gopsutil/load" 11 | "github.com/shirou/gopsutil/mem" 12 | ) 13 | 14 | /** 15 | * @Author: super 16 | * @Date: 2020-08-26 15:14 17 | * @Description: 用于服务的健康检查 18 | **/ 19 | 20 | const ( 21 | B = 1 22 | KB = 1024 * B 23 | MB = 1024 * KB 24 | GB = 1024 * MB 25 | ) 26 | 27 | // HealthCheck shows `OK` as the ping-pong result. 28 | func HealthCheck(c *gin.Context) { 29 | message := "OK" 30 | c.String(http.StatusOK, "\n"+message) 31 | } 32 | 33 | // DiskCheck checks the disk usage. 34 | func DiskCheck(c *gin.Context) { 35 | u, _ := disk.Usage("/") 36 | 37 | usedMB := int(u.Used) / MB 38 | usedGB := int(u.Used) / GB 39 | totalMB := int(u.Total) / MB 40 | totalGB := int(u.Total) / GB 41 | usedPercent := int(u.UsedPercent) 42 | 43 | status := http.StatusOK 44 | text := "OK" 45 | 46 | if usedPercent >= 95 { 47 | status = http.StatusOK 48 | text = "CRITICAL" 49 | } else if usedPercent >= 90 { 50 | status = http.StatusTooManyRequests 51 | text = "WARNING" 52 | } 53 | 54 | message := fmt.Sprintf("%s - Free space: %dMB (%dGB) / %dMB (%dGB) | Used: %d%%", text, usedMB, usedGB, totalMB, totalGB, usedPercent) 55 | c.String(status, "\n"+message) 56 | } 57 | 58 | // CPUCheck checks the cpu usage. 59 | func CPUCheck(c *gin.Context) { 60 | cores, _ := cpu.Counts(false) 61 | 62 | a, _ := load.Avg() 63 | l1 := a.Load1 64 | l5 := a.Load5 65 | l15 := a.Load15 66 | 67 | status := http.StatusOK 68 | text := "OK" 69 | 70 | if l5 >= float64(cores-1) { 71 | status = http.StatusInternalServerError 72 | text = "CRITICAL" 73 | } else if l5 >= float64(cores-2) { 74 | status = http.StatusTooManyRequests 75 | text = "WARNING" 76 | } 77 | 78 | message := fmt.Sprintf("%s - Load average: %.2f, %.2f, %.2f | Cores: %d", text, l1, l5, l15, cores) 79 | c.String(status, "\n"+message) 80 | } 81 | 82 | // RAMCheck checks the disk usage. 83 | func RAMCheck(c *gin.Context) { 84 | u, _ := mem.VirtualMemory() 85 | 86 | usedMB := int(u.Used) / MB 87 | usedGB := int(u.Used) / GB 88 | totalMB := int(u.Total) / MB 89 | totalGB := int(u.Total) / GB 90 | usedPercent := int(u.UsedPercent) 91 | 92 | status := http.StatusOK 93 | text := "OK" 94 | 95 | if usedPercent >= 95 { 96 | status = http.StatusInternalServerError 97 | text = "CRITICAL" 98 | } else if usedPercent >= 90 { 99 | status = http.StatusTooManyRequests 100 | text = "WARNING" 101 | } 102 | 103 | message := fmt.Sprintf("%s - Free space: %dMB (%dGB) / %dMB (%dGB) | Used: %d%%", text, usedMB, usedGB, totalMB, totalGB, usedPercent) 104 | c.String(status, "\n"+message) 105 | } 106 | -------------------------------------------------------------------------------- /internal/service/job.go: -------------------------------------------------------------------------------- 1 | package service 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/internal/crontab/common" 8 | "go-crawler-distributed/pkg/app" 9 | "go.mongodb.org/mongo-driver/mongo/options" 10 | ) 11 | 12 | /** 13 | * @Author: super 14 | * @Date: 2021-02-06 19:18 15 | * @Description: 16 | **/ 17 | 18 | type SaveJobRequest struct { 19 | Name string `json:"name" form:"name" binding:"required,min=2,max=4294967295"` 20 | Command string `json:"command" form:"command" binding:"required,min=2,max=4294967295"` 21 | CronExpr string `json:"cronExpr" form:"cronExpr" binding:"required,min=2,max=4294967295"` 22 | } 23 | 24 | type DeleteJobRequest struct { 25 | Name string `json:"name" form:"name" binding:"required,min=2,max=4294967295"` 26 | } 27 | 28 | type KillJobRequest struct { 29 | Name string `json:"name" form:"name" binding:"required,min=2,max=4294967295"` 30 | } 31 | 32 | type JobLogRequest struct { 33 | Name string `json:"name" form:"name" binding:"required,min=2,max=4294967295"` 34 | } 35 | 36 | type IJobLogService interface { 37 | GetLogList(param *JobLogRequest, pager *app.Pager) (*common.JobLog, error) 38 | } 39 | 40 | func GetLogList(param *JobLogRequest, pager *app.Pager) ([]*common.JobLog, error) { 41 | filter := &common.JobLogFilter{ 42 | JobName: param.Name, 43 | } 44 | fmt.Println(param.Name) 45 | logSort := &common.SortLogByStartTime{ 46 | SortOrder: -1, 47 | } 48 | if pager.PageSize == 0 { 49 | pager.PageSize = 20 50 | } 51 | collection := global.MongoDBEngine.Database("cron").Collection("log") 52 | 53 | skip := int64(pager.Page) 54 | limit := int64(pager.PageSize) 55 | op := &options.FindOptions{ 56 | Sort: logSort, 57 | Skip: &skip, 58 | Limit: &limit, 59 | } 60 | cursor, err := collection.Find(context.TODO(), filter, op) 61 | if err != nil { 62 | return nil, err 63 | } 64 | defer cursor.Close(context.TODO()) 65 | 66 | result := make([]*common.JobLog, 0) 67 | for cursor.Next(context.TODO()) { 68 | jobLog := &common.JobLog{} 69 | if err := cursor.Decode(jobLog); err != nil { 70 | continue 71 | } 72 | result = append(result, jobLog) 73 | } 74 | return result, nil 75 | } 76 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "flag" 7 | "go-crawler-distributed/global" 8 | "go-crawler-distributed/initConf" 9 | "go-crawler-distributed/internal/routers" 10 | "log" 11 | "net/http" 12 | "time" 13 | ) 14 | 15 | /** 16 | * @Author: super 17 | * @Date: 2020-08-21 20:37 18 | * @Description: 19 | **/ 20 | var ( 21 | port string 22 | runMode string 23 | config string 24 | isVersion bool 25 | ) 26 | 27 | func init() { 28 | err := setupFlag() 29 | if err != nil { 30 | log.Printf("init setupSetting err: %v\n", err) 31 | } 32 | initConf.Init(config) 33 | } 34 | 35 | func main() { 36 | router := routers.NewRouter() 37 | s := &http.Server{ 38 | Addr: ":" + global.ServerSetting.HttpPort, 39 | Handler: router, 40 | ReadTimeout: global.ServerSetting.ReadTimeout * time.Second, 41 | WriteTimeout: global.ServerSetting.WriteTimeout * time.Second, 42 | MaxHeaderBytes: 1 << 20, 43 | } 44 | 45 | go func() { 46 | if err := pingServer(); err != nil { 47 | global.Logger.Errorf(context.Background(), "The server has no response, or it might took too long to start up.") 48 | } 49 | global.Logger.Info(context.Background(), "The server has been deployed successfully.") 50 | }() 51 | 52 | global.Logger.Infof(context.Background(), "Start to listening the incoming requests on http address :%s", global.ServerSetting.HttpPort) 53 | err := s.ListenAndServe() 54 | if err != nil { 55 | global.Logger.Fatalf(context.Background(), "start listen server err: %v", err) 56 | } 57 | } 58 | 59 | func setupFlag() error { 60 | flag.StringVar(&port, "port", "", "启动端口") 61 | flag.StringVar(&runMode, "mode", "", "启动模式") 62 | flag.StringVar(&config, "config", "configs/", "指定要使用的配置文件路径") 63 | flag.BoolVar(&isVersion, "version", false, "编译信息") 64 | flag.Parse() 65 | 66 | return nil 67 | } 68 | 69 | // pingServer pings the http server to make sure the router is working. 70 | func pingServer() error { 71 | for i := 0; i < 3; i++ { 72 | time.Sleep(time.Second) 73 | // Ping the server by sending a GET request to `/health`. 74 | resp, err := http.Get(":" + global.ServerSetting.HttpPort + "/sd/health") 75 | if err == nil && resp.StatusCode == 200 { 76 | return nil 77 | } 78 | // Sleep for a second to continue the next ping. 79 | global.Logger.Info(context.Background(), "Waiting for the server, retry in 1 second.") 80 | } 81 | return errors.New("cannot connect to the server") 82 | } 83 | -------------------------------------------------------------------------------- /pkg/app/app.go: -------------------------------------------------------------------------------- 1 | package app 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "go-crawler-distributed/pkg/errcode" 6 | 7 | "net/http" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2020-09-18 10:11 13 | * @Description: 设置统一响应与分页 14 | **/ 15 | 16 | type Response struct { 17 | Ctx *gin.Context 18 | } 19 | 20 | type Meta struct { 21 | Msg string `json:"msg"` 22 | Status int `json:"status"` 23 | } 24 | 25 | type Pager struct { 26 | // 页码 27 | Page int `json:"page"` 28 | // 每页数量 29 | PageSize int `json:"page_size"` 30 | // 总行数 31 | TotalRows int `json:"total_rows"` 32 | } 33 | 34 | func NewResponse(ctx *gin.Context) *Response { 35 | return &Response{ 36 | Ctx: ctx, 37 | } 38 | } 39 | 40 | func (r *Response) ToResponse(data interface{}, msg string, status int) { 41 | if data == nil { 42 | data = gin.H{} 43 | } else { 44 | data = gin.H{ 45 | "data": data, 46 | "meta": Meta{ 47 | Msg: msg, 48 | Status: status, 49 | }, 50 | } 51 | } 52 | r.Ctx.JSON(http.StatusOK, data) 53 | } 54 | 55 | func (r *Response) ToErrorResponse(err *errcode.Error) { 56 | response := gin.H{ 57 | "data": gin.H{}, 58 | "meta": Meta{ 59 | Msg: err.Msg(), 60 | Status: err.Code(), 61 | }, 62 | } 63 | details := err.Details() 64 | if len(details) > 0 { 65 | response["details"] = details 66 | } 67 | 68 | r.Ctx.JSON(err.StatusCode(), response) 69 | } 70 | -------------------------------------------------------------------------------- /pkg/app/form.go: -------------------------------------------------------------------------------- 1 | package app 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/gin-gonic/gin" 7 | ut "github.com/go-playground/universal-translator" 8 | val "github.com/go-playground/validator/v10" 9 | ) 10 | 11 | /** 12 | * @Author: super 13 | * @Date: 2020-09-18 15:26 14 | * @Description: 统一参数校验 15 | **/ 16 | 17 | type ValidError struct { 18 | Key string 19 | Message string 20 | } 21 | 22 | type ValidErrors []*ValidError 23 | 24 | func (v *ValidError) Error() string { 25 | return v.Message 26 | } 27 | 28 | func (v ValidErrors) Error() string { 29 | return strings.Join(v.Errors(), ",") 30 | } 31 | 32 | func (v ValidErrors) Errors() []string { 33 | var errs []string 34 | for _, err := range v { 35 | errs = append(errs, err.Error()) 36 | } 37 | 38 | return errs 39 | } 40 | 41 | func BindAndValid(c *gin.Context, v interface{}) (bool, ValidErrors) { 42 | var errs ValidErrors 43 | err := c.ShouldBind(v) 44 | if err != nil { 45 | v := c.Value("trans") 46 | trans, _ := v.(ut.Translator) 47 | verrs, ok := err.(val.ValidationErrors) 48 | if !ok { 49 | return false, errs 50 | } 51 | 52 | for key, value := range verrs.Translate(trans) { 53 | errs = append(errs, &ValidError{ 54 | Key: key, 55 | Message: value, 56 | }) 57 | } 58 | 59 | return false, errs 60 | } 61 | 62 | return true, nil 63 | } 64 | -------------------------------------------------------------------------------- /pkg/app/pagination.go: -------------------------------------------------------------------------------- 1 | package app 2 | 3 | import ( 4 | "github.com/gin-gonic/gin" 5 | "go-crawler-distributed/global" 6 | "go-crawler-distributed/pkg/convert" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-09-18 10:11 12 | * @Description: 获取与分页相关的内容 13 | **/ 14 | 15 | func GetPage(c *gin.Context) int { 16 | page := convert.StrTo(c.Query("page")).MustInt() 17 | if page <= 0 { 18 | return 1 19 | } 20 | 21 | return page 22 | } 23 | 24 | func GetPageSize(c *gin.Context) int { 25 | pageSize := convert.StrTo(c.Query("page_size")).MustInt() 26 | if pageSize <= 0 { 27 | return global.AppSetting.DefaultPageSize 28 | } 29 | if pageSize > global.AppSetting.MaxPageSize { 30 | return global.AppSetting.MaxPageSize 31 | } 32 | 33 | return pageSize 34 | } 35 | 36 | func GetPageOffset(page, pageSize int) int { 37 | result := 0 38 | if page > 0 { 39 | result = (page - 1) * pageSize 40 | } 41 | 42 | return result 43 | } 44 | -------------------------------------------------------------------------------- /pkg/cache/cache.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "github.com/garyburd/redigo/redis" 5 | 6 | "go-crawler-distributed/pkg/setting" 7 | 8 | "time" 9 | ) 10 | 11 | /** 12 | * @Author: super 13 | * @Date: 2020-11-18 11:35 14 | * @Description: 根据配置创建redis连接池 15 | **/ 16 | 17 | func NewRedisEngine(cacheSetting *setting.CacheSettingS) (*redis.Pool, error) { 18 | return &redis.Pool{ 19 | MaxIdle: cacheSetting.MaxIdle, 20 | MaxActive: cacheSetting.MaxActive, 21 | IdleTimeout: 300 * time.Second, 22 | // 如果空闲列表中没有可用的连接,且当前Active连接数 < MaxActive, 则等待 23 | Wait: true, 24 | Dial: func() (redis.Conn, error) { 25 | conn, err := redis.Dial("tcp", cacheSetting.Host) 26 | if err != nil { 27 | return nil, err 28 | } 29 | return conn, nil 30 | }, 31 | TestOnBorrow: func(conn redis.Conn, t time.Time) error { 32 | if time.Since(t) < time.Minute { 33 | return nil 34 | } 35 | _, err := conn.Do("PING") 36 | return err 37 | }, 38 | }, nil 39 | } 40 | -------------------------------------------------------------------------------- /pkg/cache/cacheOperation.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "github.com/garyburd/redigo/redis" 5 | 6 | "go-crawler-distributed/global" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-12-29 11:20 12 | * @Description: 13 | **/ 14 | 15 | func SetString(key string, value string) (string, error) { 16 | c := global.RedisEngine.Get() 17 | defer c.Close() 18 | str, err := redis.String(c.Do("set", key, value)) 19 | if err != nil { 20 | return "", err 21 | } 22 | return str, err 23 | } 24 | 25 | func GetString(key string) (string, error) { 26 | c := global.RedisEngine.Get() 27 | defer c.Close() 28 | str, err := redis.String(c.Do("get", key)) 29 | if err != nil { 30 | return "", err 31 | } 32 | return str, err 33 | } 34 | 35 | func AddElementToSet(key string, value string) (int, error) { 36 | c := global.RedisEngine.Get() 37 | defer c.Close() 38 | result, err := redis.Int(c.Do("sadd", key, value)) 39 | if err != nil { 40 | return -1, err 41 | } 42 | return result, err 43 | } 44 | 45 | func ElementIsInSet(key string, value string) (bool, error) { 46 | c := global.RedisEngine.Get() 47 | defer c.Close() 48 | result, err := redis.Int(c.Do("sismember", key, value)) 49 | if err != nil { 50 | return false, err 51 | } 52 | if result == 1 { 53 | return true, err 54 | } 55 | return false, err 56 | } 57 | 58 | func GetAllElementFromSet(key string) ([]string, error) { 59 | c := global.RedisEngine.Get() 60 | defer c.Close() 61 | return redis.Strings(c.Do("smembers", key)) 62 | } 63 | 64 | func DelAllElementFromSet(key string) (int, error) { 65 | c := global.RedisEngine.Get() 66 | defer c.Close() 67 | return redis.Int(c.Do("DEL", key)) 68 | } 69 | -------------------------------------------------------------------------------- /pkg/cache/cache_test.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "fmt" 5 | "go-crawler-distributed/global" 6 | "go-crawler-distributed/pkg/setting" 7 | "strings" 8 | "testing" 9 | ) 10 | 11 | /** 12 | * @Author: super 13 | * @Date: 2020-12-29 11:31 14 | * @Description: 15 | **/ 16 | 17 | func TestAddElementToSet(t *testing.T) { 18 | newSetting, err := setting.NewSetting(strings.Split("/Users/super/develop/superTools-frontground-backend/configs", ",")...) 19 | if err != nil { 20 | t.Error(err) 21 | } 22 | err = newSetting.ReadSection("Cache", &global.CacheSetting) 23 | if err != nil { 24 | t.Error(err) 25 | } 26 | global.RedisEngine, err = NewRedisEngine(global.CacheSetting) 27 | if err != nil { 28 | t.Error(err) 29 | } 30 | result, err := AddElementToSet("hello", "1") 31 | if err != nil { 32 | t.Error(err) 33 | } 34 | fmt.Println(result) 35 | } 36 | 37 | func BenchmarkAddElementToSet(b *testing.B) { 38 | newSetting, err := setting.NewSetting(strings.Split("/Users/super/develop/superTools-frontground-backend/configs", ",")...) 39 | if err != nil { 40 | b.Error(err) 41 | } 42 | err = newSetting.ReadSection("Cache", &global.CacheSetting) 43 | if err != nil { 44 | b.Error(err) 45 | } 46 | global.RedisEngine, err = NewRedisEngine(global.CacheSetting) 47 | if err != nil { 48 | b.Error(err) 49 | } 50 | for i := 0; i < b.N; i++ { 51 | _, err := AddElementToSet("hello", "1") 52 | if err != nil { 53 | b.Error(err) 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /pkg/consistentHash/consistent.go: -------------------------------------------------------------------------------- 1 | package consistentHash 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-12-16 16:56 6 | * @Description: 一致性哈希算法实现 7 | **/ 8 | import ( 9 | "errors" 10 | "hash/crc32" 11 | "sort" 12 | "strconv" 13 | "sync" 14 | ) 15 | 16 | //实现一致性哈希算法 17 | //声明新切片类型 18 | type units []uint32 19 | 20 | //返回切片长度 21 | func (x units) Len() int { 22 | return len(x) 23 | } 24 | 25 | //比对两个数大小 26 | func (x units) Less(i, j int) bool { 27 | return x[i] < x[j] 28 | } 29 | 30 | //切片中两个值的交换 31 | func (x units) Swap(i, j int) { 32 | x[i], x[j] = x[j], x[i] 33 | } 34 | 35 | //当hash环上没有数据时,提示错误 36 | var errEmpty = errors.New("hash环没有数据") 37 | 38 | //创建结构体保存一致性hash信息 39 | type Consistent struct { 40 | //hash环,key为哈希值,值存放节点的信息 41 | circle map[uint32]string 42 | //已经排序的节点hash切片 43 | sortedHashes units 44 | //虚拟节点个数,用来增加hash的平衡性 45 | VirtualNode int 46 | //map 读写锁 47 | sync.RWMutex 48 | } 49 | 50 | //创建一致性hash算法结构体,设置默认节点数量 51 | func NewConsistent() *Consistent { 52 | return &Consistent{ 53 | //初始化变量 54 | circle: make(map[uint32]string), 55 | //设置虚拟节点个数 56 | VirtualNode: 20, 57 | } 58 | } 59 | 60 | //自动生成key值 61 | func (c *Consistent) generateKey(element string, index int) string { 62 | //副本key生成逻辑 63 | return element + strconv.Itoa(index) 64 | } 65 | 66 | //获取hash位置 67 | func (c *Consistent) hashkey(key string) uint32 { 68 | if len(key) < 64 { 69 | //声明一个数组长度为64 70 | var srcatch [64]byte 71 | //拷贝数据到数组中 72 | copy(srcatch[:], key) 73 | //使用IEEE 多项式返回数据的CRC-32校验和 74 | return crc32.ChecksumIEEE(srcatch[:len(key)]) 75 | } 76 | return crc32.ChecksumIEEE([]byte(key)) 77 | } 78 | 79 | //更新排序,方便查找 80 | func (c *Consistent) updateSortedHashes() { 81 | hashes := c.sortedHashes[:0] 82 | //判断切片容量,是否过大,如果过大则重置 83 | if cap(c.sortedHashes)/(c.VirtualNode*4) > len(c.circle) { 84 | hashes = nil 85 | } 86 | 87 | //添加hashes 88 | for k := range c.circle { 89 | hashes = append(hashes, k) 90 | } 91 | 92 | //对所有节点hash值进行排序, 93 | //方便之后进行二分查找 94 | sort.Sort(hashes) 95 | //重新赋值 96 | c.sortedHashes = hashes 97 | 98 | } 99 | 100 | //向hash环中添加节点 101 | func (c *Consistent) Add(element string) { 102 | //加锁 103 | c.Lock() 104 | //解锁 105 | defer c.Unlock() 106 | c.add(element) 107 | } 108 | 109 | //添加节点 110 | func (c *Consistent) add(element string) { 111 | //循环虚拟节点,设置副本 112 | for i := 0; i < c.VirtualNode; i++ { 113 | //根据生成的节点添加到hash环中 114 | c.circle[c.hashkey(c.generateKey(element, i))] = element 115 | } 116 | //更新排序 117 | c.updateSortedHashes() 118 | } 119 | 120 | //删除节点 121 | func (c *Consistent) remove(element string) { 122 | for i := 0; i < c.VirtualNode; i++ { 123 | delete(c.circle, c.hashkey(c.generateKey(element, i))) 124 | } 125 | c.updateSortedHashes() 126 | } 127 | 128 | //删除一个节点 129 | func (c *Consistent) Remove(element string) { 130 | c.Lock() 131 | defer c.Unlock() 132 | c.remove(element) 133 | } 134 | 135 | //顺时针查找最近的节点 136 | func (c *Consistent) search(key uint32) int { 137 | //查找算法 138 | f := func(x int) bool { 139 | return c.sortedHashes[x] > key 140 | } 141 | //使用"二分查找"算法来搜索指定切片满足条件的最小值 142 | i := sort.Search(len(c.sortedHashes), f) 143 | //如果超出范围则设置i=0 144 | if i >= len(c.sortedHashes) { 145 | i = 0 146 | } 147 | return i 148 | } 149 | 150 | //根据数据标示获取最近的服务器节点信息 151 | func (c *Consistent) Get(name string) (string, error) { 152 | //添加锁 153 | c.RLock() 154 | //解锁 155 | defer c.RUnlock() 156 | //如果为零则返回错误 157 | if len(c.circle) == 0 { 158 | return "", errEmpty 159 | } 160 | //计算hash值 161 | key := c.hashkey(name) 162 | i := c.search(key) 163 | return c.circle[c.sortedHashes[i]], nil 164 | } 165 | -------------------------------------------------------------------------------- /pkg/convert/convert.go: -------------------------------------------------------------------------------- 1 | package convert 2 | 3 | import "strconv" 4 | 5 | /** 6 | * @Author: super 7 | * @Date: 2020-09-18 10:10 8 | * @Description: 用于处理数据转换 9 | **/ 10 | 11 | type StrTo string 12 | 13 | func (s StrTo) String() string { 14 | return string(s) 15 | } 16 | 17 | func (s StrTo) Int() (int, error) { 18 | v, err := strconv.Atoi(s.String()) 19 | return v, err 20 | } 21 | 22 | func (s StrTo) MustInt() int { 23 | v, _ := s.Int() 24 | return v 25 | } 26 | 27 | func (s StrTo) UInt32() (uint32, error) { 28 | v, err := strconv.Atoi(s.String()) 29 | return uint32(v), err 30 | } 31 | 32 | func (s StrTo) MustUInt32() uint32 { 33 | v, _ := s.UInt32() 34 | return v 35 | } 36 | 37 | func (s StrTo) MustInt64() int64 { 38 | v := s.MustInt() 39 | return int64(v) 40 | } 41 | -------------------------------------------------------------------------------- /pkg/db/db.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "fmt" 5 | "go-crawler-distributed/global" 6 | "go-crawler-distributed/pkg/otgorm" 7 | "go-crawler-distributed/pkg/setting" 8 | "time" 9 | 10 | "github.com/jinzhu/gorm" 11 | _ "github.com/jinzhu/gorm/dialects/mysql" 12 | ) 13 | 14 | /** 15 | * @Author: super 16 | * @Date: 2020-09-16 07:42 17 | * @Description: 统一定义数据库公共字段 18 | **/ 19 | 20 | const ( 21 | STATE_OPEN = 1 22 | STATE_CLOSE = 0 23 | ) 24 | 25 | //根据配置获取对应的db连接 26 | func NewDBEngine(databaseSetting *setting.DatabaseSettingS) (*gorm.DB, error) { 27 | db, err := gorm.Open(databaseSetting.DBType, fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=%s&parseTime=%t&loc=Local", 28 | databaseSetting.UserName, 29 | databaseSetting.Password, 30 | databaseSetting.Host, 31 | databaseSetting.DBName, 32 | databaseSetting.Charset, 33 | databaseSetting.ParseTime, 34 | )) 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | if global.ServerSetting.RunMode == "debug" { 40 | db.LogMode(true) 41 | } 42 | db.SingularTable(true) 43 | db.Callback().Create().Replace("gorm:update_time_stamp", updateTimeStampForCreateCallback) 44 | db.Callback().Update().Replace("gorm:update_time_stamp", updateTimeStampForUpdateCallback) 45 | db.Callback().Delete().Replace("gorm:delete", deleteCallback) 46 | db.DB().SetMaxIdleConns(databaseSetting.MaxIdleConns) 47 | db.DB().SetMaxOpenConns(databaseSetting.MaxOpenConns) 48 | otgorm.AddGormCallbacks(db) 49 | return db, nil 50 | } 51 | 52 | //以下内容用于定义回调,统一填充公共字段 53 | func updateTimeStampForCreateCallback(scope *gorm.Scope) { 54 | if !scope.HasError() { 55 | nowTime := time.Now().Format("2006-01-02 15:04:05") 56 | 57 | if createTimeField, ok := scope.FieldByName("CreatedOn"); ok { 58 | if createTimeField.IsBlank { 59 | _ = createTimeField.Set(nowTime) 60 | } 61 | } 62 | 63 | if modifyTimeField, ok := scope.FieldByName("ModifiedOn"); ok { 64 | if modifyTimeField.IsBlank { 65 | _ = modifyTimeField.Set(nowTime) 66 | } 67 | } 68 | } 69 | } 70 | 71 | func updateTimeStampForUpdateCallback(scope *gorm.Scope) { 72 | if _, ok := scope.Get("gorm:update_column"); !ok { 73 | nowTime := time.Now().Format("2006-01-02 15:04:05") 74 | _ = scope.SetColumn("ModifiedOn", nowTime) 75 | } 76 | } 77 | 78 | func deleteCallback(scope *gorm.Scope) { 79 | if !scope.HasError() { 80 | var extraOption string 81 | if str, ok := scope.Get("gorm:delete_option"); ok { 82 | extraOption = fmt.Sprint(str) 83 | } 84 | 85 | deletedOnField, hasDeletedOnField := scope.FieldByName("DeletedOn") 86 | isDelField, hasIsDelField := scope.FieldByName("IsDel") 87 | if !scope.Search.Unscoped && hasDeletedOnField && hasIsDelField { 88 | nowTime := time.Now().Format("2006-01-02 15:04:05") 89 | scope.Raw(fmt.Sprintf( 90 | "UPDATE %v SET %v=%v,%v=%v%v%v", 91 | scope.QuotedTableName(), 92 | scope.Quote(deletedOnField.DBName), 93 | scope.AddToVars(nowTime), 94 | scope.Quote(isDelField.DBName), 95 | scope.AddToVars(1), 96 | addExtraSpaceIfExist(scope.CombinedConditionSql()), 97 | addExtraSpaceIfExist(extraOption), 98 | )).Exec() 99 | } else { 100 | scope.Raw(fmt.Sprintf( 101 | "DELETE FROM %v%v%v", 102 | scope.QuotedTableName(), 103 | addExtraSpaceIfExist(scope.CombinedConditionSql()), 104 | addExtraSpaceIfExist(extraOption), 105 | )).Exec() 106 | } 107 | } 108 | } 109 | 110 | func addExtraSpaceIfExist(str string) string { 111 | if str != "" { 112 | return " " + str 113 | } 114 | return "" 115 | } 116 | -------------------------------------------------------------------------------- /pkg/elastic/elastic.go: -------------------------------------------------------------------------------- 1 | package elastic 2 | 3 | import ( 4 | "github.com/olivere/elastic/v7" 5 | 6 | "go-crawler-distributed/pkg/setting" 7 | 8 | "time" 9 | ) 10 | 11 | /** 12 | * @Author: super 13 | * @Date: 2020-12-29 11:51 14 | * @Description: 15 | **/ 16 | 17 | func NewElasticEngine(elasticSetting *setting.ElasticSettingS) (*elastic.Client, error) { 18 | var client *elastic.Client 19 | for i := 0; i < 10; i++ { 20 | // Ping the server by sending a GET request to `/health`. 21 | var err error 22 | client, err = elastic.NewClient( 23 | elastic.SetURL(elasticSetting.Url), 24 | elastic.SetSniff(false)) 25 | if err == nil { 26 | return nil, err 27 | } 28 | time.Sleep(time.Second) 29 | } 30 | return client, nil 31 | } 32 | -------------------------------------------------------------------------------- /pkg/elastic/elasticOpeartion.go: -------------------------------------------------------------------------------- 1 | package elastic 2 | 3 | import ( 4 | "context" 5 | "github.com/olivere/elastic/v7" 6 | "go-crawler-distributed/internal/model" 7 | "reflect" 8 | 9 | "go-crawler-distributed/global" 10 | ) 11 | 12 | /** 13 | * @Author: super 14 | * @Date: 2020-12-29 11:58 15 | * @Description: 16 | **/ 17 | const Mapping = ` 18 | { 19 | "mappings": { 20 | "properties": { 21 | "title": { 22 | "type": "text" 23 | }, 24 | "url": { 25 | "type": "text" 26 | }, 27 | "genres": { 28 | "type": "keyword" 29 | }, 30 | "content": { 31 | "type": "text" 32 | } 33 | } 34 | } 35 | }` 36 | 37 | //判断index是否存在 38 | func IndexExist(index string) (bool, error) { 39 | client := global.ElasticEngine 40 | 41 | exist, err := client.IndexExists(index).Do(context.Background()) 42 | if err != nil { 43 | return exist, err 44 | } 45 | if !exist { 46 | _, err := client.CreateIndex(index).BodyString(Mapping).Do(context.Background()) 47 | if err != nil { 48 | return false, err 49 | } 50 | } 51 | return true, err 52 | } 53 | 54 | //保存信息 55 | func SaveInfo(table string, data interface{}) (string, error) { 56 | client := global.ElasticEngine 57 | // https://www.letianbiji.com/elasticsearch/es7-quick-start.html 58 | // 在v7中Type被注释 59 | // ES 实例:对应 MySQL 实例中的一个 Database。 60 | // Index 对应 MySQL 中的 Table 61 | // Document 对应 MySQL 中表的记录。 62 | response, err := client.Index().Index(table).BodyJson(data).Do(context.Background()) 63 | if err != nil { 64 | return "", err 65 | } 66 | return response.Id, nil 67 | } 68 | 69 | //获取信息 70 | func GetInfo(table string, id string) (*model.Article, error) { 71 | client := global.ElasticEngine 72 | result, err := client.Get().Index(table).Id(id).Do(context.Background()) 73 | if err != nil { 74 | return nil, err 75 | } 76 | article := &model.Article{} 77 | err = article.UnmarshalJSON(result.Source) 78 | if err != nil { 79 | return nil, err 80 | } 81 | return article, nil 82 | } 83 | 84 | //搜索信息 85 | func SearchInfo(table string, fieldName string, fieldValue string) ([]*model.Article, error) { 86 | query := elastic.NewTermQuery(fieldName, fieldValue) 87 | client := global.ElasticEngine 88 | result, err := client.Search().Index(table).Query(query).Do(context.Background()) 89 | if err != nil { 90 | return nil, err 91 | } 92 | articles := make([]*model.Article, 0) 93 | article := model.Article{} 94 | total := result.TotalHits() 95 | if total > 0 { 96 | for _, item := range result.Each(reflect.TypeOf(article)) { 97 | if t, ok := item.(model.Article); ok { 98 | articles = append(articles, &t) 99 | } 100 | } 101 | } 102 | return articles, nil 103 | } 104 | -------------------------------------------------------------------------------- /pkg/email/email.go: -------------------------------------------------------------------------------- 1 | package email 2 | 3 | import ( 4 | "crypto/tls" 5 | "gopkg.in/gomail.v2" 6 | ) 7 | 8 | /** 9 | * @Author: super 10 | * @Date: 2020-09-23 20:37 11 | * @Description: 使用gomail库发送邮件 12 | **/ 13 | 14 | type Email struct { 15 | *SMTPInfo 16 | } 17 | 18 | type SMTPInfo struct { 19 | Host string 20 | Port int 21 | IsSSL bool 22 | UserName string 23 | Password string 24 | From string 25 | } 26 | 27 | func NewEmail(info *SMTPInfo) *Email { 28 | return &Email{SMTPInfo: info} 29 | } 30 | 31 | func (e *Email) SendMail(to []string, subject, body string) error { 32 | m := gomail.NewMessage() 33 | m.SetHeader("From", e.From) 34 | m.SetHeader("To", to...) 35 | m.SetHeader("Subject", subject) 36 | m.SetBody("text/html", body) 37 | 38 | dialer := gomail.NewDialer(e.Host, e.Port, e.UserName, e.Password) 39 | dialer.TLSConfig = &tls.Config{InsecureSkipVerify: e.IsSSL} 40 | return dialer.DialAndSend(m) 41 | } 42 | -------------------------------------------------------------------------------- /pkg/errcode/common_code.go: -------------------------------------------------------------------------------- 1 | package errcode 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-09-18 07:53 6 | * @Description: 统一错误代码 7 | **/ 8 | 9 | var ( 10 | Success = NewError(0, "成功") 11 | ServerError = NewError(10000000, "服务内部错误") 12 | InvalidParams = NewError(10000001, "入参错误") 13 | NotFound = NewError(10000002, "找不到") 14 | ) 15 | -------------------------------------------------------------------------------- /pkg/errcode/ercd_code.go: -------------------------------------------------------------------------------- 1 | package errcode 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2021-02-06 19:41 6 | * @Description: 7 | **/ 8 | 9 | var ( 10 | ErrorSaveFail = NewError(30060001, "存储数据到ETCD失败") 11 | ErrorDeleteFail = NewError(30060002, "ETCD删除数据失败") 12 | ErrorListFail = NewError(30060003, "ETCD获取数据列表失败") 13 | ErrorLogListFail = NewError(30060004, "获取日志列表失败") 14 | ErrorWorkerListFail = NewError(30060005, "获取worker列表失败") 15 | ) 16 | -------------------------------------------------------------------------------- /pkg/errcode/errcode.go: -------------------------------------------------------------------------------- 1 | package errcode 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | ) 7 | 8 | /** 9 | * @Author: super 10 | * @Date: 2020-09-18 08:09 11 | * @Description: 统一错误代码 12 | **/ 13 | 14 | type Error struct { 15 | code int `json:"code"` 16 | msg string `json:"msg"` 17 | details []string `json:"details"` 18 | } 19 | 20 | var codes = map[int]string{} 21 | 22 | func NewError(code int, msg string) *Error { 23 | if _, ok := codes[code]; ok { 24 | panic(fmt.Sprintf("错误码%d已经存在,请更换一个", code)) 25 | } 26 | codes[code] = msg 27 | return &Error{code: code, msg: msg} 28 | } 29 | 30 | func (e *Error) Error() string { 31 | return fmt.Sprintf("错误码:%d, 错误信息::%s", e.Code(), e.Msg()) 32 | } 33 | 34 | func (e *Error) Code() int { 35 | return e.code 36 | } 37 | 38 | func (e *Error) Msg() string { 39 | return e.msg 40 | } 41 | 42 | func (e *Error) Msgf(args []interface{}) string { 43 | return fmt.Sprintf(e.msg, args...) 44 | } 45 | 46 | func (e *Error) Details() []string { 47 | return e.details 48 | } 49 | 50 | func (e *Error) WithDetails(details ...string) *Error { 51 | newError := *e 52 | newError.details = []string{} 53 | for _, d := range details { 54 | newError.details = append(newError.details, d) 55 | } 56 | 57 | return &newError 58 | } 59 | 60 | func (e *Error) StatusCode() int { 61 | switch e.Code() { 62 | case Success.Code(): 63 | return http.StatusOK 64 | case ServerError.Code(): 65 | return http.StatusInternalServerError 66 | case InvalidParams.Code(): 67 | return http.StatusBadRequest 68 | } 69 | 70 | return http.StatusInternalServerError 71 | } 72 | -------------------------------------------------------------------------------- /pkg/errcode/user.go: -------------------------------------------------------------------------------- 1 | package errcode 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-11-24 18:45 6 | * @Description: 7 | **/ 8 | 9 | var ( 10 | ErrorUserSignInFail = NewError(20060001, "用户登录失败") 11 | ErrorUserUpdateFail = NewError(20060002, "用户信息更新失败") 12 | ErrorUserRegisterFail = NewError(20060003, "用户注册失败") 13 | ErrorUserCookieFail = NewError(20060004, "用户cookie验证失败") 14 | ) 15 | -------------------------------------------------------------------------------- /pkg/etcd/etcd.go: -------------------------------------------------------------------------------- 1 | package etcd 2 | 3 | import ( 4 | "github.com/coreos/etcd/clientv3" 5 | "go-crawler-distributed/pkg/setting" 6 | "time" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2021-02-06 18:22 12 | * @Description: 13 | **/ 14 | 15 | func NewEtcdEngine(etcdSetting *setting.EtcdSettingS) (client *clientv3.Client, kv clientv3.KV, lease clientv3.Lease, watcher clientv3.Watcher, err error) { 16 | config := clientv3.Config{ 17 | Endpoints: []string{etcdSetting.Endpoint}, 18 | DialTimeout: time.Duration(etcdSetting.DialTimeout) * time.Millisecond, 19 | } 20 | if client, err = clientv3.New(config); err != nil { 21 | return 22 | } 23 | kv = clientv3.NewKV(client) 24 | lease = clientv3.NewLease(client) 25 | watcher = clientv3.NewWatcher(client) 26 | return 27 | } 28 | -------------------------------------------------------------------------------- /pkg/file/file.go: -------------------------------------------------------------------------------- 1 | package file 2 | 3 | import ( 4 | "io/ioutil" 5 | "mime/multipart" 6 | "os" 7 | "path" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2020-08-22 10:45 13 | * @Description: 14 | **/ 15 | 16 | func GetSize(f multipart.File) (int, error) { 17 | content, err := ioutil.ReadAll(f) 18 | 19 | return len(content), err 20 | } 21 | 22 | func GetExt(fileName string) string { 23 | return path.Ext(fileName) 24 | } 25 | 26 | func CheckNotExist(src string) bool { 27 | _, err := os.Stat(src) 28 | 29 | return os.IsNotExist(err) 30 | } 31 | 32 | func CheckPermission(src string) bool { 33 | _, err := os.Stat(src) 34 | 35 | return os.IsPermission(err) 36 | } 37 | 38 | func IsNotExistMkDir(src string) error { 39 | if notExist := CheckNotExist(src); notExist == true { 40 | if err := MkDir(src); err != nil { 41 | return err 42 | } 43 | } 44 | 45 | return nil 46 | } 47 | 48 | func MkDir(src string) error { 49 | err := os.MkdirAll(src, os.ModePerm) 50 | if err != nil { 51 | return err 52 | } 53 | 54 | return nil 55 | } 56 | 57 | func Open(name string, flag int, perm os.FileMode) (*os.File, error) { 58 | f, err := os.OpenFile(name, flag, perm) 59 | if err != nil { 60 | return nil, err 61 | } 62 | 63 | return f, nil 64 | } 65 | -------------------------------------------------------------------------------- /pkg/idGenerator/idGenerator.go: -------------------------------------------------------------------------------- 1 | package idGenerator 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-11-24 13:45 6 | * @Description: 7 | **/ 8 | 9 | import ( 10 | "github.com/bwmarrin/snowflake" 11 | 12 | "strconv" 13 | ) 14 | 15 | /** 16 | * @Author: super 17 | * @Date: 2020-09-09 22:04 18 | * @Description: 雪花算法介绍:https://juejin.im/post/6844903562007314440 19 | **/ 20 | 21 | var node *snowflake.Node 22 | 23 | // InitSnowflake initiate Snowflake node singleton. 24 | func InitSnowflake() error { 25 | // Get node number from env TIX_NODE_NO 26 | //key, ok := os.LookupEnv("TIX_NODE_NO") 27 | //if !ok { 28 | // return fmt.Errorf("TIX_NODE_NO is not set in system environment") 29 | //} 30 | // Parse node number 31 | key := "1" 32 | nodeNo, err := strconv.ParseInt(key, 10, 64) 33 | if err != nil { 34 | return err 35 | } 36 | // Create snowflake node 37 | n, err := snowflake.NewNode(nodeNo) 38 | if err != nil { 39 | return err 40 | } 41 | // Set node 42 | node = n 43 | return nil 44 | } 45 | 46 | // GenerateSnowflake generate Twitter Snowflake ID 47 | func GenerateID() string { 48 | return node.Generate().String() 49 | } 50 | -------------------------------------------------------------------------------- /pkg/idGenerator/idGenerator_test.go: -------------------------------------------------------------------------------- 1 | package idGenerator 2 | 3 | import "testing" 4 | 5 | /** 6 | * @Author: super 7 | * @Date: 2020-11-24 13:57 8 | * @Description: 9 | **/ 10 | 11 | func TestGenerateSnowflake(t *testing.T) { 12 | err := InitSnowflake() 13 | if err != nil { 14 | t.Error(err) 15 | } 16 | id := GenerateID() 17 | t.Log(id) 18 | } 19 | -------------------------------------------------------------------------------- /pkg/ipParser/ipParser.go: -------------------------------------------------------------------------------- 1 | package ipParser 2 | 3 | import ( 4 | "errors" 5 | "github.com/kayon/iploc" 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/pkg/setting" 8 | ) 9 | /** 10 | * @Author: super 11 | * @Date: 2021-02-15 16:57 12 | * @Description: 13 | **/ 14 | 15 | func NewIpParser(ipParserSetting *setting.IpParserSettingS)(locator *iploc.Locator, err error){ 16 | locator, err = iploc.Open(ipParserSetting.FilePath) 17 | if err != nil { 18 | return 19 | } 20 | return 21 | } 22 | 23 | // 查询IP所属位置 24 | func GetIpLocationString(ip string) (string, error ){ 25 | detail := global.IpParser.Find(ip) 26 | if detail != nil { 27 | return detail.String(), nil 28 | }else{ 29 | return "", errors.New("can't find ip location") 30 | } 31 | } -------------------------------------------------------------------------------- /pkg/ipParser/qqwry.utf8.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang-collection/go-crawler-distributed/e35c6480f8bbe5aac856f6c769b251c769372f7d/pkg/ipParser/qqwry.utf8.dat -------------------------------------------------------------------------------- /pkg/logger/logger.go: -------------------------------------------------------------------------------- 1 | package logger 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "log" 9 | "runtime" 10 | "time" 11 | 12 | "github.com/gin-gonic/gin" 13 | ) 14 | 15 | /** 16 | * @Author: super 17 | * @Date: 2020-08-21 09:12 18 | * @Description: 方便后期进行统一配置 19 | **/ 20 | 21 | type Level int8 22 | 23 | type Fields map[string]interface{} 24 | 25 | const ( 26 | LevelDebug Level = iota 27 | LevelInfo 28 | LevelWarn 29 | LevelError 30 | LevelFatal 31 | LevelPanic 32 | ) 33 | 34 | func (l Level) String() string { 35 | switch l { 36 | case LevelDebug: 37 | return "debug" 38 | case LevelInfo: 39 | return "info" 40 | case LevelWarn: 41 | return "warn" 42 | case LevelError: 43 | return "error" 44 | case LevelFatal: 45 | return "fatal" 46 | case LevelPanic: 47 | return "panic" 48 | } 49 | return "" 50 | } 51 | 52 | type Logger struct { 53 | newLogger *log.Logger 54 | ctx context.Context 55 | fields Fields 56 | callers []string 57 | } 58 | 59 | func NewLogger(w io.Writer, prefix string, flag int) *Logger { 60 | l := log.New(w, prefix, flag) 61 | return &Logger{newLogger: l} 62 | } 63 | 64 | func (l *Logger) clone() *Logger { 65 | nl := *l 66 | return &nl 67 | } 68 | 69 | func (l *Logger) WithFields(f Fields) *Logger { 70 | ll := l.clone() 71 | if ll.fields == nil { 72 | ll.fields = make(Fields) 73 | } 74 | for k, v := range f { 75 | ll.fields[k] = v 76 | } 77 | return ll 78 | } 79 | 80 | func (l *Logger) WithContext(ctx context.Context) *Logger { 81 | ll := l.clone() 82 | ll.ctx = ctx 83 | return ll 84 | } 85 | 86 | func (l *Logger) WithCaller(skip int) *Logger { 87 | ll := l.clone() 88 | pc, file, line, ok := runtime.Caller(skip) 89 | if ok { 90 | f := runtime.FuncForPC(pc) 91 | ll.callers = []string{fmt.Sprintf("%s: %d %s", file, line, f.Name())} 92 | } 93 | 94 | return ll 95 | } 96 | 97 | func (l *Logger) WithCallersFrames() *Logger { 98 | maxCallerDepth := 25 99 | minCallerDepth := 1 100 | callers := []string{} 101 | pcs := make([]uintptr, maxCallerDepth) 102 | depth := runtime.Callers(minCallerDepth, pcs) 103 | frames := runtime.CallersFrames(pcs[:depth]) 104 | for frame, more := frames.Next(); more; frame, more = frames.Next() { 105 | s := fmt.Sprintf("%s: %d %s", frame.File, frame.Line, frame.Function) 106 | callers = append(callers, s) 107 | if !more { 108 | break 109 | } 110 | } 111 | ll := l.clone() 112 | ll.callers = callers 113 | return ll 114 | } 115 | 116 | func (l *Logger) WithTrace() *Logger { 117 | ginCtx, ok := l.ctx.(*gin.Context) 118 | if ok { 119 | return l.WithFields(Fields{ 120 | "trace_id": ginCtx.MustGet("X-Trace-ID"), 121 | "span_id": ginCtx.MustGet("X-Span-ID"), 122 | }) 123 | } 124 | return l 125 | } 126 | 127 | func (l *Logger) JSONFormat(level Level, message string) map[string]interface{} { 128 | data := make(Fields, len(l.fields)+4) 129 | data["level"] = level.String() 130 | data["time"] = time.Now().Local().UnixNano() 131 | data["message"] = message 132 | data["callers"] = l.callers 133 | if len(l.fields) > 0 { 134 | for k, v := range l.fields { 135 | if _, ok := data[k]; !ok { 136 | data[k] = v 137 | } 138 | } 139 | } 140 | 141 | return data 142 | } 143 | 144 | func (l *Logger) Output(level Level, message string) { 145 | body, _ := json.Marshal(l.JSONFormat(level, message)) 146 | content := string(body) 147 | switch level { 148 | case LevelDebug: 149 | l.newLogger.Print(content) 150 | case LevelInfo: 151 | l.newLogger.Print(content) 152 | case LevelWarn: 153 | l.newLogger.Print(content) 154 | case LevelError: 155 | l.newLogger.Print(content) 156 | case LevelFatal: 157 | l.newLogger.Fatal(content) 158 | case LevelPanic: 159 | l.newLogger.Panic(content) 160 | } 161 | } 162 | 163 | func (l *Logger) Debug(ctx context.Context, v ...interface{}) { 164 | l.WithContext(ctx).WithTrace().Output(LevelDebug, fmt.Sprint(v...)) 165 | } 166 | 167 | func (l *Logger) Debugf(ctx context.Context, format string, v ...interface{}) { 168 | l.WithContext(ctx).WithTrace().Output(LevelDebug, fmt.Sprintf(format, v...)) 169 | } 170 | 171 | func (l *Logger) Info(ctx context.Context, v ...interface{}) { 172 | l.WithContext(ctx).WithTrace().Output(LevelInfo, fmt.Sprint(v...)) 173 | } 174 | 175 | func (l *Logger) Infof(ctx context.Context, format string, v ...interface{}) { 176 | l.WithContext(ctx).WithTrace().Output(LevelInfo, fmt.Sprintf(format, v...)) 177 | } 178 | 179 | func (l *Logger) Warn(ctx context.Context, v ...interface{}) { 180 | l.WithContext(ctx).WithTrace().Output(LevelWarn, fmt.Sprint(v...)) 181 | } 182 | 183 | func (l *Logger) Warnf(ctx context.Context, format string, v ...interface{}) { 184 | l.WithContext(ctx).WithTrace().Output(LevelWarn, fmt.Sprintf(format, v...)) 185 | } 186 | 187 | func (l *Logger) Error(ctx context.Context, v ...interface{}) { 188 | l.WithContext(ctx).WithTrace().Output(LevelError, fmt.Sprint(v...)) 189 | } 190 | 191 | func (l *Logger) Errorf(ctx context.Context, format string, v ...interface{}) { 192 | l.WithContext(ctx).WithTrace().Output(LevelError, fmt.Sprintf(format, v...)) 193 | } 194 | 195 | func (l *Logger) Fatal(ctx context.Context, v ...interface{}) { 196 | l.WithContext(ctx).WithTrace().Output(LevelFatal, fmt.Sprint(v...)) 197 | } 198 | 199 | func (l *Logger) Fatalf(ctx context.Context, format string, v ...interface{}) { 200 | l.WithContext(ctx).WithTrace().Output(LevelFatal, fmt.Sprintf(format, v...)) 201 | } 202 | 203 | func (l *Logger) Panic(ctx context.Context, v ...interface{}) { 204 | l.WithContext(ctx).WithTrace().Output(LevelPanic, fmt.Sprint(v...)) 205 | } 206 | 207 | func (l *Logger) Panicf(ctx context.Context, format string, v ...interface{}) { 208 | l.WithContext(ctx).WithTrace().Output(LevelPanic, fmt.Sprintf(format, v...)) 209 | } 210 | -------------------------------------------------------------------------------- /pkg/mongoDB/mongo.go: -------------------------------------------------------------------------------- 1 | package mongoDB 2 | 3 | import ( 4 | "context" 5 | "go-crawler-distributed/pkg/setting" 6 | "go.mongodb.org/mongo-driver/mongo" 7 | "go.mongodb.org/mongo-driver/mongo/options" 8 | "go.mongodb.org/mongo-driver/mongo/readpref" 9 | "time" 10 | ) 11 | 12 | /** 13 | * @Author: super 14 | * @Date: 2021-02-02 11:46 15 | * @Description: 16 | **/ 17 | 18 | func NewMongoDBEngine(mongoDbSetting *setting.MongoDBSettingS) (*mongo.Client, error) { 19 | var client *mongo.Client 20 | var err error 21 | ctx, _ := context.WithTimeout(context.Background(), time.Duration(mongoDbSetting.Timeout)*time.Second) 22 | opt := options.Client().ApplyURI(mongoDbSetting.Url) 23 | opt.SetMaxPoolSize(mongoDbSetting.MaxPoolSize) 24 | if client, err = mongo.Connect(ctx, opt); err != nil { 25 | return nil, err 26 | } else { 27 | ctx2, _ := context.WithTimeout(context.Background(), time.Duration(mongoDbSetting.Timeout)*time.Second) 28 | err := client.Ping(ctx2, readpref.Primary()) 29 | if err != nil { 30 | return nil, err 31 | } 32 | } 33 | return client, nil 34 | } 35 | -------------------------------------------------------------------------------- /pkg/mq/consumer.go: -------------------------------------------------------------------------------- 1 | package mq 2 | 3 | import ( 4 | "github.com/streadway/amqp" 5 | 6 | "go-crawler-distributed/global" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-12-29 13:47 12 | * @Description: 13 | **/ 14 | 15 | func Consume(queueName string) (<-chan amqp.Delivery, error) { 16 | //1. 申请队列,如果队列不存在则自动创建,如果存在则获取存在的队列 17 | //保证队列存在,使消息发送到队列中 18 | _, err := global.RabbitMQEngine.Channel.QueueDeclare(queueName, 19 | //是否持久化 20 | false, 21 | //是否自动删除 22 | false, 23 | //是否具有排他性,独占队列 24 | false, 25 | //是否阻塞 26 | false, 27 | //额外属性 28 | nil, 29 | ) 30 | if err != nil { 31 | return nil, err 32 | } 33 | 34 | //接受消息 35 | msgs, err := global.RabbitMQEngine.Channel.Consume( 36 | queueName, 37 | //用于区分多个不同的消费者 38 | "", 39 | //是否自动应答,也就是消费者消费一个队列后是否主动告知rabbitmq当前的消息我已经消费完 40 | //rabbitmq会根据这个判断是否可以删除该消息 41 | //为false的话要手动实现 42 | false, 43 | //是否具有排他性 44 | false, 45 | //如果为true不能在同一个connection中发送消息传递给当前conn的消费者 46 | false, 47 | false, 48 | nil, 49 | ) 50 | if err != nil { 51 | return nil, err 52 | } 53 | return msgs, nil 54 | } 55 | -------------------------------------------------------------------------------- /pkg/mq/producer.go: -------------------------------------------------------------------------------- 1 | package mq 2 | 3 | import ( 4 | "github.com/streadway/amqp" 5 | 6 | "go-crawler-distributed/global" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-12-29 13:43 12 | * @Description: 13 | **/ 14 | 15 | //发布消息 16 | func Publish(queueName string, msg []byte) error { 17 | //检查channel是否正常 18 | if err := initChannel(global.RabbitMQEngine, global.RabbitMQSetting); err != nil { 19 | return err 20 | } 21 | 22 | //1. 申请队列,如果队列不存在则自动创建,如果存在则获取存在的队列 23 | //保证队列存在,使消息发送到队列中 24 | _, err := global.RabbitMQEngine.Channel.QueueDeclare(queueName, 25 | //是否持久化 26 | false, 27 | //是否自动删除 28 | false, 29 | //是否具有排他性,独占队列 30 | false, 31 | //是否阻塞 32 | false, 33 | //额外属性 34 | nil, 35 | ) 36 | if err != nil { 37 | return err 38 | } 39 | 40 | //2. 发送消息到队列中 41 | err = global.RabbitMQEngine.Channel.Publish( 42 | "", 43 | queueName, 44 | // 如果为true, 则根据exchange类型和routkey规则,如果无法找到符合条件的队列 45 | // 那么会把发送的消息回退给publish 46 | false, 47 | //如果为true,当exchange发送消息到队列后发现没有consume,则会把发送的消息返回给发送者 48 | false, 49 | amqp.Publishing{ 50 | ContentType: "text/plain", 51 | Body: msg, 52 | }, 53 | ) 54 | if err != nil { 55 | return err 56 | } 57 | return nil 58 | } 59 | -------------------------------------------------------------------------------- /pkg/mq/rabbitmq.go: -------------------------------------------------------------------------------- 1 | package mq 2 | 3 | import ( 4 | "github.com/streadway/amqp" 5 | 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/pkg/setting" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2020-11-18 11:50 13 | * @Description: rabbitMQ连接池 14 | **/ 15 | 16 | // 如果异常关闭,会接收通知 17 | var notifyClose chan *amqp.Error 18 | 19 | // Init : 初始化MQ连接信息 20 | func NewRabbitMQEngine(rabbitMQSetting *setting.RabbitMQSettingS) (*global.RabbitMQ, error) { 21 | rabbit := &global.RabbitMQ{} 22 | if err := initChannel(rabbit, rabbitMQSetting); err != nil { 23 | if rabbit.Channel != nil { 24 | rabbit.Channel.NotifyClose(notifyClose) 25 | } 26 | return nil, err 27 | } 28 | // 断线自动重连 29 | go func(rabbitMQ *global.RabbitMQ, rabbitMQSetting *setting.RabbitMQSettingS) { 30 | for { 31 | select { 32 | case _ = <-notifyClose: 33 | rabbit.Conn = nil 34 | rabbit.Channel = nil 35 | _ = initChannel(rabbitMQ, rabbitMQSetting) 36 | } 37 | } 38 | }(rabbit, rabbitMQSetting) 39 | return rabbit, nil 40 | } 41 | 42 | //初始化channel 43 | func initChannel(rabbitMQ *global.RabbitMQ, rabbitMQSetting *setting.RabbitMQSettingS) error { 44 | if rabbitMQ.Channel != nil { 45 | return nil 46 | } 47 | var err error 48 | rabbitHost := "amqp://" + rabbitMQSetting.UserName + ":" + rabbitMQSetting.Password + "@" + rabbitMQSetting.Host + "/" 49 | rabbitMQ.Conn, err = amqp.Dial(rabbitHost) 50 | if err != nil { 51 | return err 52 | } 53 | 54 | rabbitMQ.Channel, err = rabbitMQ.Conn.Channel() 55 | if err != nil { 56 | return err 57 | } 58 | 59 | return nil 60 | } 61 | -------------------------------------------------------------------------------- /pkg/mq/rabbitmq_test.go: -------------------------------------------------------------------------------- 1 | package mq 2 | 3 | import ( 4 | "fmt" 5 | "go-crawler-distributed/global" 6 | "go-crawler-distributed/pkg/setting" 7 | "strings" 8 | "testing" 9 | ) 10 | 11 | /** 12 | * @Author: super 13 | * @Date: 2020-12-29 13:58 14 | * @Description: 15 | **/ 16 | 17 | func TestPublish(t *testing.T) { 18 | newSetting, err := setting.NewSetting(strings.Split("/Users/super/develop/superTools-frontground-backend/configs", ",")...) 19 | if err != nil { 20 | t.Error(err) 21 | } 22 | err = newSetting.ReadSection("RabbitMQ", &global.RabbitMQSetting) 23 | if err != nil { 24 | t.Error(err) 25 | } 26 | global.RabbitMQEngine, err = NewRabbitMQEngine(global.RabbitMQSetting) 27 | if err != nil { 28 | t.Error(err) 29 | } 30 | err = Publish("test.oss", []byte("dddddddwedad")) 31 | if err != nil { 32 | t.Log(err) 33 | } 34 | } 35 | 36 | func TestConsume(t *testing.T) { 37 | newSetting, err := setting.NewSetting(strings.Split("/Users/super/develop/superTools-frontground-backend/configs", ",")...) 38 | if err != nil { 39 | t.Error(err) 40 | } 41 | err = newSetting.ReadSection("RabbitMQ", &global.RabbitMQSetting) 42 | if err != nil { 43 | t.Error(err) 44 | } 45 | global.RabbitMQEngine, err = NewRabbitMQEngine(global.RabbitMQSetting) 46 | if err != nil { 47 | t.Error(err) 48 | } 49 | msgs, err := Consume("test.oss") 50 | forever := make(chan bool) 51 | go func() { 52 | for d := range msgs { 53 | fmt.Println(d.Body) 54 | //实现其他的逻辑函数 55 | } 56 | }() 57 | <-forever 58 | } 59 | -------------------------------------------------------------------------------- /pkg/otgorm/otgorm.go: -------------------------------------------------------------------------------- 1 | package otgorm 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strings" 7 | 8 | "github.com/jinzhu/gorm" 9 | "github.com/opentracing/opentracing-go" 10 | "github.com/opentracing/opentracing-go/ext" 11 | ) 12 | 13 | /** 14 | * @Author: super 15 | * @Date: 2020-09-24 08:35 16 | * @Description: gorm调用链追踪 17 | **/ 18 | 19 | const ( 20 | parentSpanGormKey = "opentracing:parent.span" 21 | spanGormKey = "opentracing:span" 22 | ) 23 | 24 | // SetSpanToGorm sets span to gorm settings, returns cloned DB 25 | func WithContext(ctx context.Context, db *gorm.DB) *gorm.DB { 26 | if ctx == nil { 27 | return db 28 | } 29 | parentSpan := opentracing.SpanFromContext(ctx) 30 | if parentSpan == nil { 31 | return db 32 | } 33 | return db.Set(parentSpanGormKey, parentSpan) 34 | } 35 | 36 | // AddGormCallbacks adds callbacks for tracing, you should call SetSpanToGorm to make them work 37 | func AddGormCallbacks(db *gorm.DB) { 38 | callbacks := newCallbacks() 39 | registerCallbacks(db, "create", callbacks) 40 | registerCallbacks(db, "query", callbacks) 41 | registerCallbacks(db, "update", callbacks) 42 | registerCallbacks(db, "delete", callbacks) 43 | registerCallbacks(db, "row_query", callbacks) 44 | } 45 | 46 | type callbacks struct{} 47 | 48 | func newCallbacks() *callbacks { 49 | return &callbacks{} 50 | } 51 | 52 | func (c *callbacks) beforeCreate(scope *gorm.Scope) { c.before(scope) } 53 | func (c *callbacks) afterCreate(scope *gorm.Scope) { c.after(scope, "INSERT") } 54 | func (c *callbacks) beforeQuery(scope *gorm.Scope) { c.before(scope) } 55 | func (c *callbacks) afterQuery(scope *gorm.Scope) { c.after(scope, "SELECT") } 56 | func (c *callbacks) beforeUpdate(scope *gorm.Scope) { c.before(scope) } 57 | func (c *callbacks) afterUpdate(scope *gorm.Scope) { c.after(scope, "UPDATE") } 58 | func (c *callbacks) beforeDelete(scope *gorm.Scope) { c.before(scope) } 59 | func (c *callbacks) afterDelete(scope *gorm.Scope) { c.after(scope, "DELETE") } 60 | func (c *callbacks) beforeRowQuery(scope *gorm.Scope) { c.before(scope) } 61 | func (c *callbacks) afterRowQuery(scope *gorm.Scope) { c.after(scope, "") } 62 | 63 | func (c *callbacks) before(scope *gorm.Scope) { 64 | val, ok := scope.Get(parentSpanGormKey) 65 | if !ok { 66 | return 67 | } 68 | parentSpan := val.(opentracing.Span) 69 | tr := parentSpan.Tracer() 70 | sp := tr.StartSpan("sql", opentracing.ChildOf(parentSpan.Context())) 71 | ext.DBType.Set(sp, "sql") 72 | scope.Set(spanGormKey, sp) 73 | } 74 | 75 | func (c *callbacks) after(scope *gorm.Scope, operation string) { 76 | val, ok := scope.Get(spanGormKey) 77 | if !ok { 78 | return 79 | } 80 | sp := val.(opentracing.Span) 81 | if operation == "" { 82 | operation = strings.ToUpper(strings.Split(scope.SQL, " ")[0]) 83 | } 84 | ext.Error.Set(sp, scope.HasError()) 85 | ext.DBStatement.Set(sp, scope.SQL) 86 | sp.SetTag("db.table", scope.TableName()) 87 | sp.SetTag("db.method", operation) 88 | sp.SetTag("db.err", scope.HasError()) 89 | sp.SetTag("db.count", scope.DB().RowsAffected) 90 | sp.Finish() 91 | } 92 | 93 | func registerCallbacks(db *gorm.DB, name string, c *callbacks) { 94 | beforeName := fmt.Sprintf("tracing:%v_before", name) 95 | afterName := fmt.Sprintf("tracing:%v_after", name) 96 | gormCallbackName := fmt.Sprintf("gorm:%v", name) 97 | // gorm does some magic, if you pass CallbackProcessor here - nothing works 98 | switch name { 99 | case "create": 100 | db.Callback().Create().Before(gormCallbackName).Register(beforeName, c.beforeCreate) 101 | db.Callback().Create().After(gormCallbackName).Register(afterName, c.afterCreate) 102 | case "query": 103 | db.Callback().Query().Before(gormCallbackName).Register(beforeName, c.beforeQuery) 104 | db.Callback().Query().After(gormCallbackName).Register(afterName, c.afterQuery) 105 | case "update": 106 | db.Callback().Update().Before(gormCallbackName).Register(beforeName, c.beforeUpdate) 107 | db.Callback().Update().After(gormCallbackName).Register(afterName, c.afterUpdate) 108 | case "delete": 109 | db.Callback().Delete().Before(gormCallbackName).Register(beforeName, c.beforeDelete) 110 | db.Callback().Delete().After(gormCallbackName).Register(afterName, c.afterDelete) 111 | case "row_query": 112 | db.Callback().RowQuery().Before(gormCallbackName).Register(beforeName, c.beforeRowQuery) 113 | db.Callback().RowQuery().After(gormCallbackName).Register(afterName, c.afterRowQuery) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /pkg/setting/section.go: -------------------------------------------------------------------------------- 1 | package setting 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-09-18 08:32 6 | * @Description: 系统设置 7 | **/ 8 | 9 | import ( 10 | "time" 11 | ) 12 | 13 | type ServerSettingS struct { 14 | RunMode string 15 | HttpPort string 16 | ReadTimeout time.Duration 17 | WriteTimeout time.Duration 18 | } 19 | 20 | type AppSettingS struct { 21 | DefaultPageSize int 22 | MaxPageSize int 23 | DefaultContextTimeout time.Duration 24 | LogSavePath string 25 | LogFileName string 26 | LogFileExt string 27 | UploadSavePath string 28 | UploadServerUrl string 29 | UploadImageMaxSize int 30 | UploadImageAllowExts []string 31 | } 32 | 33 | type EmailSettingS struct { 34 | Host string 35 | Port int 36 | UserName string 37 | Password string 38 | IsSSL bool 39 | From string 40 | To []string 41 | } 42 | 43 | type JWTSettingS struct { 44 | Secret string 45 | Issuer string 46 | Expire time.Duration 47 | } 48 | 49 | type DatabaseSettingS struct { 50 | DBType string 51 | UserName string 52 | Password string 53 | Host string 54 | DBName string 55 | TablePrefix string 56 | Charset string 57 | ParseTime bool 58 | MaxIdleConns int 59 | MaxOpenConns int 60 | } 61 | 62 | type CacheSettingS struct { 63 | UserName string 64 | Password string 65 | Host string 66 | MaxIdle int 67 | MaxActive int 68 | } 69 | 70 | type RabbitMQSettingS struct { 71 | UserName string 72 | Password string 73 | Host string 74 | } 75 | 76 | type ElasticSettingS struct { 77 | Url string 78 | Index string 79 | } 80 | 81 | type ConsulSettingS struct { 82 | Url string 83 | ConfigPath string 84 | } 85 | 86 | type TracerSettingS struct { 87 | ServiceName string 88 | Host string 89 | } 90 | 91 | type MongoDBSettingS struct { 92 | Url string 93 | MaxPoolSize uint64 94 | Timeout int 95 | } 96 | 97 | type EtcdSettingS struct { 98 | Endpoint string 99 | DialTimeout int 100 | } 101 | 102 | type IpParserSettingS struct { 103 | FilePath string 104 | } 105 | 106 | var sections = make(map[string]interface{}) 107 | 108 | func (s *Setting) ReadSection(k string, v interface{}) error { 109 | err := s.vp.UnmarshalKey(k, v) 110 | if err != nil { 111 | return err 112 | } 113 | 114 | if _, ok := sections[k]; !ok { 115 | sections[k] = v 116 | } 117 | return nil 118 | } 119 | 120 | func (s *Setting) ReloadAllSection() error { 121 | for k, v := range sections { 122 | err := s.ReadSection(k, v) 123 | if err != nil { 124 | return err 125 | } 126 | } 127 | 128 | return nil 129 | } 130 | -------------------------------------------------------------------------------- /pkg/setting/setting.go: -------------------------------------------------------------------------------- 1 | package setting 2 | 3 | import ( 4 | "github.com/fsnotify/fsnotify" 5 | "github.com/spf13/viper" 6 | ) 7 | 8 | /** 9 | * @Author: super 10 | * @Date: 2020-09-18 08:28 11 | * @Description: 监听系统配置与section.go结合实现热更新 12 | **/ 13 | 14 | type Setting struct { 15 | vp *viper.Viper 16 | } 17 | 18 | func NewSetting(configs ...string) (*Setting, error) { 19 | vp := viper.New() 20 | vp.SetConfigName("config") 21 | for _, config := range configs { 22 | if config != "" { 23 | vp.AddConfigPath(config) 24 | } 25 | } 26 | vp.SetConfigType("yaml") 27 | err := vp.ReadInConfig() 28 | if err != nil { 29 | return nil, err 30 | } 31 | 32 | s := &Setting{vp} 33 | s.WatchSettingChange() 34 | return s, nil 35 | } 36 | 37 | func (s *Setting) WatchSettingChange() { 38 | go func() { 39 | s.vp.WatchConfig() 40 | s.vp.OnConfigChange(func(in fsnotify.Event) { 41 | _ = s.ReloadAllSection() 42 | }) 43 | }() 44 | } 45 | -------------------------------------------------------------------------------- /pkg/tracer/tracer.go: -------------------------------------------------------------------------------- 1 | package tracer 2 | 3 | import ( 4 | "io" 5 | "time" 6 | 7 | opentracing "github.com/opentracing/opentracing-go" 8 | "github.com/uber/jaeger-client-go/config" 9 | ) 10 | 11 | /** 12 | * @Author: super 13 | * @Date: 2020-09-24 08:08 14 | * @Description: 调用链追踪 15 | **/ 16 | 17 | func NewJaegerTracer(serviceName, agentHostPort string) (opentracing.Tracer, io.Closer, error) { 18 | cfg := &config.Configuration{ 19 | ServiceName: serviceName, 20 | Sampler: &config.SamplerConfig{ 21 | Type: "const", 22 | Param: 1, 23 | }, 24 | Reporter: &config.ReporterConfig{ 25 | LogSpans: true, 26 | BufferFlushInterval: 1 * time.Second, 27 | LocalAgentHostPort: agentHostPort, 28 | }, 29 | } 30 | tracer, closer, err := cfg.NewTracer() 31 | if err != nil { 32 | return nil, nil, err 33 | } 34 | opentracing.SetGlobalTracer(tracer) 35 | return tracer, closer, nil 36 | } 37 | -------------------------------------------------------------------------------- /pkg/upload/file.go: -------------------------------------------------------------------------------- 1 | package upload 2 | 3 | import ( 4 | "go-crawler-distributed/global" 5 | "go-crawler-distributed/pkg/util" 6 | 7 | "io" 8 | "io/ioutil" 9 | "mime/multipart" 10 | "os" 11 | "path" 12 | "strings" 13 | ) 14 | 15 | /** 16 | * @Author: super 17 | * @Date: 2020-09-23 19:02 18 | * @Description: 用于处理文件上传 19 | **/ 20 | 21 | type FileType int 22 | 23 | const TypeImage FileType = iota + 1 24 | 25 | func GetFileName(name string) string { 26 | ext := GetFileExt(name) 27 | fileName := strings.TrimSuffix(name, ext) 28 | fileName = util.EncodeMD5(fileName) 29 | 30 | return fileName + ext 31 | } 32 | 33 | func GetFileExt(name string) string { 34 | return path.Ext(name) 35 | } 36 | 37 | func GetSavePath() string { 38 | return global.AppSetting.UploadSavePath 39 | } 40 | 41 | func GetServerUrl() string { 42 | return global.AppSetting.UploadServerUrl 43 | } 44 | 45 | func CheckSavePath(dst string) bool { 46 | _, err := os.Stat(dst) 47 | 48 | return os.IsNotExist(err) 49 | } 50 | 51 | func CheckContainExt(t FileType, name string) bool { 52 | ext := GetFileExt(name) 53 | ext = strings.ToUpper(ext) 54 | switch t { 55 | case TypeImage: 56 | for _, allowExt := range global.AppSetting.UploadImageAllowExts { 57 | if strings.ToUpper(allowExt) == ext { 58 | return true 59 | } 60 | } 61 | } 62 | return false 63 | } 64 | 65 | func CheckMaxSize(t FileType, f multipart.File) bool { 66 | content, _ := ioutil.ReadAll(f) 67 | size := len(content) 68 | switch t { 69 | case TypeImage: 70 | if size >= global.AppSetting.UploadImageMaxSize*1024*1024 { 71 | return true 72 | } 73 | } 74 | return false 75 | } 76 | 77 | func CheckPermission(dst string) bool { 78 | _, err := os.Stat(dst) 79 | 80 | return os.IsPermission(err) 81 | } 82 | 83 | func CreateSavePath(dst string, perm os.FileMode) error { 84 | err := os.MkdirAll(dst, perm) 85 | if err != nil { 86 | return err 87 | } 88 | 89 | return nil 90 | } 91 | 92 | func SaveFile(file *multipart.FileHeader, dst string) error { 93 | src, err := file.Open() 94 | if err != nil { 95 | return err 96 | } 97 | defer src.Close() 98 | 99 | out, err := os.Create(dst) 100 | if err != nil { 101 | return err 102 | } 103 | defer out.Close() 104 | 105 | _, err = io.Copy(out, src) 106 | return err 107 | } 108 | -------------------------------------------------------------------------------- /pkg/util/aes.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "bytes" 5 | "crypto/aes" 6 | "crypto/cipher" 7 | "encoding/base64" 8 | "errors" 9 | 10 | "go-crawler-distributed/configs" 11 | ) 12 | 13 | /** 14 | * @Author: super 15 | * @Date: 2020-12-06 14:02 16 | * @Description: 17 | **/ 18 | 19 | //16,24,32位字符串分别对应AES-128,AES-192,AES-256算法 20 | var PwdKey = []byte(configs.PWD_KEY) 21 | 22 | //PKCS7 填充模式 23 | func PKCS7Padding(ciphertext []byte, blockSize int) []byte { 24 | padding := blockSize - len(ciphertext)%blockSize 25 | //Repeat()函数的功能是把切片[]byte{byte(padding)}复制padding个,然后合并成新的字节切片返回 26 | padtext := bytes.Repeat([]byte{byte(padding)}, padding) 27 | return append(ciphertext, padtext...) 28 | } 29 | 30 | //填充的反向操作,删除填充字符串 31 | func PKCS7UnPadding(origData []byte) ([]byte, error) { 32 | //获取数据长度 33 | length := len(origData) 34 | if length == 0 { 35 | return nil, errors.New("加密字符串错误!") 36 | } else { 37 | //获取填充字符串长度 38 | unpadding := int(origData[length-1]) 39 | //截取切片,删除填充字节,并且返回明文 40 | return origData[:(length - unpadding)], nil 41 | } 42 | } 43 | 44 | //实现加密 45 | func AesEcrypt(origData []byte, key []byte) ([]byte, error) { 46 | //创建加密算法实例 47 | block, err := aes.NewCipher(key) 48 | if err != nil { 49 | return nil, err 50 | } 51 | //获取块的大小 52 | blockSize := block.BlockSize() 53 | //对数据进行填充,让数据长度满足需求 54 | origData = PKCS7Padding(origData, blockSize) 55 | //采用AES加密方法中CBC加密模式 56 | blocMode := cipher.NewCBCEncrypter(block, key[:blockSize]) 57 | crypted := make([]byte, len(origData)) 58 | //执行加密 59 | blocMode.CryptBlocks(crypted, origData) 60 | return crypted, nil 61 | } 62 | 63 | //实现解密 64 | func AesDeCrypt(cypted []byte, key []byte) ([]byte, error) { 65 | //创建加密算法实例 66 | block, err := aes.NewCipher(key) 67 | if err != nil { 68 | return nil, err 69 | } 70 | //获取块大小 71 | blockSize := block.BlockSize() 72 | //创建加密客户端实例 73 | blockMode := cipher.NewCBCDecrypter(block, key[:blockSize]) 74 | origData := make([]byte, len(cypted)) 75 | //这个函数也可以用来解密 76 | blockMode.CryptBlocks(origData, cypted) 77 | //去除填充字符串 78 | origData, err = PKCS7UnPadding(origData) 79 | if err != nil { 80 | return nil, err 81 | } 82 | return origData, err 83 | } 84 | 85 | //加密base64 86 | func EnPwdCode(pwd []byte) (string, error) { 87 | result, err := AesEcrypt(pwd, PwdKey) 88 | if err != nil { 89 | return "", err 90 | } 91 | return base64.StdEncoding.EncodeToString(result), err 92 | } 93 | 94 | //解密 95 | func DePwdCode(pwd string) ([]byte, error) { 96 | //解密base64字符串 97 | pwdByte, err := base64.StdEncoding.DecodeString(pwd) 98 | if err != nil { 99 | return nil, err 100 | } 101 | //执行AES解密 102 | return AesDeCrypt(pwdByte, PwdKey) 103 | } 104 | -------------------------------------------------------------------------------- /pkg/util/base64.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "encoding/base64" 7 | "io/ioutil" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2020-09-24 20:04 13 | * @Description: base64编码与解码 14 | **/ 15 | 16 | //将文本通过gzip压缩后通过base64编码 17 | func EncodeBase64(value string) (string, error) { 18 | s := []byte(value) 19 | var b bytes.Buffer 20 | gz := gzip.NewWriter(&b) 21 | if _, err := gz.Write(s); err != nil { 22 | return "", err 23 | } 24 | if err := gz.Flush(); err != nil { 25 | return "", err 26 | } 27 | if err := gz.Close(); err != nil { 28 | return "", err 29 | } 30 | str := base64.StdEncoding.EncodeToString(b.Bytes()) 31 | return str, nil 32 | } 33 | 34 | //将编码的base64字符串解码回原文本 35 | func DecodeBase64(value string) string { 36 | data, _ := base64.StdEncoding.DecodeString(value) 37 | rdata := bytes.NewReader(data) 38 | r, _ := gzip.NewReader(rdata) 39 | s, _ := ioutil.ReadAll(r) 40 | return string(s) 41 | } 42 | -------------------------------------------------------------------------------- /pkg/util/base64_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import "testing" 4 | 5 | /** 6 | * @Author: super 7 | * @Date: 2020-09-24 20:08 8 | * @Description: 9 | **/ 10 | 11 | func TestEncodeBase64(t *testing.T) { 12 | s, err := EncodeBase64("hello world") 13 | if err != nil { 14 | t.Error(err) 15 | } 16 | t.Log(s) 17 | } 18 | 19 | func TestDecodeBase64(t *testing.T) { 20 | s := DecodeBase64("H4sIAAAAAAAA/8pIzcnJVyjPL8pJAQAAAP//AQAA//+FEUoNCwAAAA==") 21 | t.Log(s) 22 | } 23 | 24 | func BenchmarkEncodeBase64(b *testing.B) { 25 | for i := 0; i < b.N; i++ { 26 | s, err := EncodeBase64("helloworldasdafsdfasfsdgadfgadfweaweterteggdfsgdsbdfbvxvczxvfasdfasdfasdfsadfsadfsadfsd") 27 | if err != nil { 28 | b.Error(err) 29 | } 30 | b.Log(s) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /pkg/util/json.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import "encoding/json" 4 | 5 | /** 6 | * @Author: super 7 | * @Date: 2020-12-06 17:43 8 | * @Description: 9 | **/ 10 | 11 | func EncodeToJson(object interface{}) (string, error) { 12 | encodeBytes, err := json.Marshal(object) 13 | if err != nil { 14 | return "", err 15 | } 16 | return string(encodeBytes), nil 17 | } 18 | 19 | func DecodeToStruct(input string) (interface{}, error) { 20 | var result interface{} 21 | err := json.Unmarshal([]byte(input), &result) 22 | if err != nil { 23 | return nil, err 24 | } 25 | return result, nil 26 | } 27 | -------------------------------------------------------------------------------- /pkg/util/json_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | /** 8 | * @Author: super 9 | * @Date: 2020-12-06 17:45 10 | * @Description: 11 | **/ 12 | 13 | type LoginUser struct { 14 | ID string `json:"id"` 15 | UserName string `json:"user_name"` 16 | IPAddress string `json:"ip_address"` 17 | } 18 | 19 | func TestEncodeToJson(t *testing.T) { 20 | loginUser := LoginUser{ 21 | ID: "123", 22 | UserName: "username", 23 | IPAddress: "192.1.1.222", 24 | } 25 | result, err := EncodeToJson(loginUser) 26 | if err != nil { 27 | t.Error(err) 28 | } 29 | t.Log(result) 30 | } 31 | 32 | func BenchmarkEncodeToJson(b *testing.B) { 33 | loginUser := LoginUser{ 34 | ID: "123", 35 | UserName: "username", 36 | IPAddress: "192.1.1.222", 37 | } 38 | for i := 0; i < b.N; i++ { 39 | _, err := EncodeToJson(loginUser) 40 | if err != nil { 41 | b.Error(err) 42 | } 43 | } 44 | } 45 | 46 | func TestDecodeToStruct(t *testing.T) { 47 | input := `{"id":"123","user_name":"username","ip_address":"192.1.1.222"}` 48 | result, err := DecodeToStruct(input) 49 | if err != nil { 50 | t.Error(err) 51 | } 52 | t.Log(result) 53 | } 54 | -------------------------------------------------------------------------------- /pkg/util/jwt.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-08-22 09:13 6 | * @Description: 7 | **/ 8 | 9 | import ( 10 | "github.com/dgrijalva/jwt-go" 11 | 12 | "time" 13 | ) 14 | 15 | var secret = "" 16 | 17 | var jwtSecret = []byte(secret) 18 | 19 | type Claims struct { 20 | Username string `json:"username"` 21 | Password string `json:"password"` 22 | jwt.StandardClaims 23 | } 24 | 25 | func GenerateToken(username, password string) (string, error) { 26 | nowTime := time.Now() 27 | expireTime := nowTime.Add(3 * time.Hour) 28 | 29 | claims := Claims{ 30 | username, 31 | password, 32 | jwt.StandardClaims{ 33 | ExpiresAt: expireTime.Unix(), 34 | Issuer: "gin-blog", 35 | }, 36 | } 37 | 38 | tokenClaims := jwt.NewWithClaims(jwt.SigningMethodHS256, claims) 39 | token, err := tokenClaims.SignedString(jwtSecret) 40 | 41 | return token, err 42 | } 43 | 44 | func ParseToken(token string) (*Claims, error) { 45 | tokenClaims, err := jwt.ParseWithClaims(token, &Claims{}, func(token *jwt.Token) (interface{}, error) { 46 | return jwtSecret, nil 47 | }) 48 | 49 | if tokenClaims != nil { 50 | if claims, ok := tokenClaims.Claims.(*Claims); ok && tokenClaims.Valid { 51 | return claims, nil 52 | } 53 | } 54 | 55 | return nil, err 56 | } 57 | -------------------------------------------------------------------------------- /pkg/util/md5.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "crypto/md5" 5 | "encoding/hex" 6 | ) 7 | 8 | /** 9 | * @Author: super 10 | * @Date: 2020-09-23 18:59 11 | * @Description: 12 | **/ 13 | 14 | //字符串md5 15 | func EncodeMD5(value string) string { 16 | m := md5.New() 17 | m.Write([]byte(value)) 18 | return hex.EncodeToString(m.Sum(nil)) 19 | } 20 | -------------------------------------------------------------------------------- /pkg/util/md5_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import "testing" 4 | 5 | /** 6 | * @Author: super 7 | * @Date: 2020-09-24 19:54 8 | * @Description: 9 | **/ 10 | 11 | func TestEncodeMD5(t *testing.T) { 12 | var tests = []struct { 13 | in string 14 | out string 15 | }{ 16 | {"", "d41d8cd98f00b204e9800998ecf8427e"}, 17 | {"hello", "5d41402abc4b2a76b9719d911017c592"}, 18 | {"小肥猫告别了小瘦猫,去大城市闯荡了。“再见了,小瘦猫,我一只猫也能过得很好。”小肥猫抹下最后一颗泪珠,赌着气给小瘦猫写下了这封诀别信。小瘦猫还在熟睡着,连小肥猫用力关门的“咣当”声都没能把它吵醒。小肥猫一步三回头,却始终没有猫来追它回去。小肥猫叹口气,只能自己一个人去讨生活。她向来是胆小的,面对未知有诸多恐惧。恰逢大雨,航班居然延误十二个小时。坐在旁边的旅客兔小姐对同伴说:“按照我往常的经验,这趟航班应该被取消了”。小肥猫本来坚定下来的心又开始犹犹豫豫打起了鼓,要不要回去呢,可是回去就意味着自己的妥协。就在小肥猫犹豫不决的时候,小瘦猫突然出现了,没有责怪,也没有问小肥猫想离开它之后去哪里,小瘦猫只是把小肥猫拥抱在怀里。小肥猫退了票,像个跟屁虫一样,被小瘦猫牵回了家。小肥猫余气未消,走着走着她甩开手:每次你都是这样,一句多余的话都不说,连我要去哪里都不问吗?小瘦猫当然知道它想赌气离开自己,虽然不知道自己哪里做错了,可能在对待女朋友这里还是觉悟不够高吧。小瘦猫回答说:我不会问的。因为我会难过,无论你去到哪里,那个地方都没有我。", "80e99dc093cafed983ef6428a27ed645"}, 19 | } 20 | 21 | for i, tt := range tests { 22 | s := EncodeMD5(tt.in) 23 | if s != tt.out { 24 | t.Errorf("%d. %q => %q, wanted: %q", i, tt.in, s, tt.out) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /pkg/util/morse.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "errors" 5 | "strings" 6 | ) 7 | 8 | /** 9 | * @Author: super 10 | * @Date: 2020-08-24 09:33 11 | * @Description: 12 | **/ 13 | 14 | var lengthError = errors.New("length must > 1") 15 | var unsupportedError = errors.New("unsupported byte") 16 | 17 | var morseMap = map[byte]string{ 18 | 'a': ".-", 19 | 'b': "-...", 20 | 'c': "-.-.", 21 | 'd': "-..", 22 | 'e': ".", 23 | 'f': "..-.", 24 | 'g': "--.", 25 | 'h': "....", 26 | 'i': "..", 27 | 'j': ".---", 28 | 'k': "-.-", 29 | 'l': ".-..", 30 | 'm': "--", 31 | 'n': "-.", 32 | 'o': "---", 33 | 'p': ".--.", 34 | 'q': "--.-", 35 | 'r': ".-.", 36 | 's': "...", 37 | 't': "-", 38 | 'u': "..-", 39 | 'v': "...-", 40 | 'w': ".--", 41 | 'x': "-..-", 42 | 'y': "-.--", 43 | 'z': "--..", 44 | '1': ".----", 45 | '2': "..---", 46 | '3': "...--", 47 | '4': "....-", 48 | '5': ".....", 49 | '6': "-....", 50 | '7': "--...", 51 | '8': "---..", 52 | '9': "----.", 53 | '0': "-----", 54 | } 55 | 56 | func GenerateMorse(str string) (string, error) { 57 | str = strings.TrimSpace(str) 58 | length := len(str) 59 | if length == 0 { 60 | return "", lengthError 61 | } 62 | var builder strings.Builder 63 | bytes := []byte(str) 64 | for _, v := range bytes { 65 | if value, ok := morseMap[v]; ok { 66 | builder.WriteString(value) 67 | } else { 68 | return "", unsupportedError 69 | } 70 | } 71 | return builder.String(), nil 72 | } 73 | -------------------------------------------------------------------------------- /pkg/util/morse_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | /** 8 | * @Author: super 9 | * @Date: 2020-08-24 10:04 10 | * @Description: 11 | **/ 12 | 13 | type Morse struct { 14 | str string 15 | real string 16 | err error 17 | } 18 | 19 | func TestGenerateMorse(t *testing.T) { 20 | var morses = []struct { 21 | str string 22 | code string 23 | err error 24 | }{ 25 | {"aa11", ".-.-.----.----", nil}, 26 | {"11aa", ".----.----.-.-", nil}, 27 | {"", "", lengthError}, 28 | {"111,as", "", unsupportedError}, 29 | {"中文", "", unsupportedError}, 30 | {"1a12 ", ".----.-.----..---", nil}, 31 | {" ", "", lengthError}, 32 | {"asdj$%#, 441", "", unsupportedError}, 33 | {"!@#$", "", unsupportedError}, 34 | } 35 | 36 | for i, v := range morses { 37 | code, e := GenerateMorse(v.str) 38 | if code != v.code { 39 | t.Errorf("%d. %s morse code %s, wanted: %s, error= %v", i, v.str, code, v.code, e) 40 | } else if e != v.err { 41 | t.Errorf("%d. %s morse code %s, wanted: %s, error= %v", i, v.str, code, v.code, e) 42 | } 43 | } 44 | } 45 | 46 | func BenchmarkGenerateMorse(b *testing.B) { 47 | for i := 0; i < b.N; i++ { 48 | _, _ = GenerateMorse("asasd12454") 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /pkg/util/qrcode.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-08-21 22:08 6 | * @Description: 7 | **/ 8 | 9 | import "github.com/skip2/go-qrcode" 10 | 11 | func GenerateQRCodeByte(str string) ([]byte, error) { 12 | return qrcode.Encode(str, qrcode.Highest, 256) 13 | } 14 | -------------------------------------------------------------------------------- /pkg/util/reb2hex_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | /** 9 | * @Author: super 10 | * @Date: 2020-11-30 20:21 11 | * @Description: 12 | **/ 13 | 14 | func TestRgbToHex(t *testing.T) { 15 | fmt.Println(RgbToHex(`{ 16 | "red": 12, 17 | "green": 255, 18 | "blue": 255 19 | }`)) 20 | } 21 | 22 | func BenchmarkRgbToHex(b *testing.B) { 23 | for i := 0; i < b.N; i++ { 24 | _, _ = RgbToHex(`{ 25 | "red": 255, 26 | "green": 255, 27 | "blue": 255 28 | }`) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /pkg/util/regularExpression.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | /** 4 | * @Author: super 5 | * @Date: 2020-08-24 09:45 6 | * @Description: 7 | **/ 8 | 9 | func StringMatching() { 10 | 11 | } 12 | -------------------------------------------------------------------------------- /pkg/util/rgb2hex.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | ) 7 | 8 | /** 9 | * @Author: super 10 | * @Date: 2020-11-27 16:37 11 | * @Description: 12 | **/ 13 | type RGB struct { 14 | Red int `json:"red"` 15 | Green int `json:"green"` 16 | Blue int `json:"blue"` 17 | } 18 | 19 | func (rgb *RGB) ToString() string { 20 | result := "#" 21 | if rgb.Red < 16 { 22 | result += fmt.Sprintf("0%x", rgb.Red) 23 | } else { 24 | result += fmt.Sprintf("%x", rgb.Red) 25 | } 26 | if rgb.Green < 16 { 27 | result += fmt.Sprintf("0%x", rgb.Green) 28 | } else { 29 | result += fmt.Sprintf("%x", rgb.Green) 30 | } 31 | if rgb.Blue < 16 { 32 | result += fmt.Sprintf("0%x", rgb.Blue) 33 | } else { 34 | result += fmt.Sprintf("%x", rgb.Blue) 35 | } 36 | return result 37 | } 38 | 39 | func RgbToHex(rgb string) (string, error) { 40 | rgbStruct := &RGB{} 41 | err := json.Unmarshal([]byte(rgb), rgbStruct) 42 | if err != nil { 43 | return "", err 44 | } 45 | return rgbStruct.ToString(), nil 46 | } 47 | -------------------------------------------------------------------------------- /pkg/util/stringCode.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "encoding/base64" 7 | "io/ioutil" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2020-09-01 20:06 13 | * @Description: 字符串压缩 14 | **/ 15 | 16 | func ZipString(s []byte) (string, error) { 17 | var b bytes.Buffer 18 | gz := gzip.NewWriter(&b) 19 | if _, err := gz.Write(s); err != nil { 20 | return "", err 21 | } 22 | if err := gz.Flush(); err != nil { 23 | return "", err 24 | } 25 | if err := gz.Close(); err != nil { 26 | return "", err 27 | } 28 | str := base64.StdEncoding.EncodeToString(b.Bytes()) 29 | return str, nil 30 | } 31 | 32 | func UnzipString(str string) string { 33 | data, _ := base64.StdEncoding.DecodeString(str) 34 | rdata := bytes.NewReader(data) 35 | r, _ := gzip.NewReader(rdata) 36 | s, _ := ioutil.ReadAll(r) 37 | return string(s) 38 | } 39 | -------------------------------------------------------------------------------- /pkg/util/structMapping.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "go-crawler-distributed/internal/model" 5 | "go-crawler-distributed/service/elastic/proto" 6 | ) 7 | 8 | /** 9 | * @Author: super 10 | * @Date: 2021-01-05 19:27 11 | * @Description: 12 | **/ 13 | 14 | func ProtoToArticle(article *proto.Article) *model.Article { 15 | result := &model.Article{} 16 | result.Title = article.Title 17 | result.Genres = article.Genres 18 | result.Url = article.Url 19 | result.Content = article.Content 20 | return result 21 | } 22 | 23 | func ArticleToProto(article *model.Article) *proto.Article { 24 | result := &proto.Article{} 25 | result.Title = article.Title 26 | result.Genres = article.Genres 27 | result.Url = article.Url 28 | result.Content = article.Content 29 | return result 30 | } 31 | -------------------------------------------------------------------------------- /service/cache/client/client.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "context" 5 | "github.com/micro/go-micro/v2" 6 | "github.com/micro/go-micro/v2/registry" 7 | "github.com/micro/go-plugins/registry/consul/v2" 8 | "go-crawler-distributed/global" 9 | "go-crawler-distributed/service/cache/proto" 10 | ) 11 | 12 | /** 13 | * @Author: super 14 | * @Date: 2021-01-05 19:33 15 | * @Description: 16 | **/ 17 | 18 | var redisOP proto.RedisOperationService 19 | 20 | func init() { 21 | reg := consul.NewRegistry(func(options *registry.Options) { 22 | options.Addrs = []string{ 23 | global.ConsulSetting.Url, 24 | } 25 | }) 26 | 27 | service := micro.NewService( 28 | micro.Registry(reg), 29 | micro.Name("go.micro.service.redis.client"), 30 | ) 31 | service.Init() 32 | redisOP = proto.NewRedisOperationService("go.micro.service.redis", service.Client()) 33 | } 34 | 35 | func AddElementToSet(key string, value string) (int32, error) { 36 | res, err := redisOP.AddElementToSet(context.TODO(), &proto.Request{Key: key, Value: value}) 37 | if err != nil { 38 | global.Logger.Error(context.Background(), err) 39 | return -1, err 40 | } 41 | return res.Result, nil 42 | } 43 | 44 | func ElementIsInSet(key string, value string) (bool, error) { 45 | rsp, err := redisOP.ElementIsInSet(context.TODO(), &proto.Request{Key: key, Value: value}) 46 | if err != nil { 47 | global.Logger.Error(context.Background(), err) 48 | return false, err 49 | } 50 | return rsp.Result, err 51 | } 52 | -------------------------------------------------------------------------------- /service/cache/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/micro/go-micro/v2" 8 | "github.com/micro/go-micro/v2/registry" 9 | "github.com/micro/go-plugins/registry/consul/v2" 10 | 11 | "go-crawler-distributed/global" 12 | "go-crawler-distributed/service/cache/proto" 13 | "go-crawler-distributed/service/cache/server" 14 | ) 15 | 16 | /** 17 | * @Author: super 18 | * @Date: 2020-08-17 20:21 19 | * @Description: 20 | **/ 21 | 22 | func main() { 23 | reg := consul.NewRegistry(func(options *registry.Options) { 24 | options.Addrs = []string{ 25 | global.ConsulSetting.Url, 26 | } 27 | }) 28 | 29 | service := micro.NewService( 30 | micro.Registry(reg), 31 | micro.Name("go.micro.service.redis"), 32 | micro.RegisterTTL(time.Second*10), 33 | micro.RegisterInterval(time.Second*5), 34 | ) 35 | service.Init() 36 | 37 | // 注册处理器 38 | err := proto.RegisterRedisOperationHandler(service.Server(), new(server.CacheStruct)) 39 | if err != nil { 40 | global.Logger.Error(context.Background(), err) 41 | } 42 | 43 | // 运行服务 44 | if err := service.Run(); err != nil { 45 | global.Logger.Error(context.Background(), err) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /service/cache/proto/redis.pb.micro.go: -------------------------------------------------------------------------------- 1 | // Code generated by protoc-gen-micro. DO NOT EDIT. 2 | // source: redis.proto 3 | 4 | package proto 5 | 6 | import ( 7 | fmt "fmt" 8 | proto "github.com/golang/protobuf/proto" 9 | math "math" 10 | ) 11 | 12 | import ( 13 | context "context" 14 | api "github.com/micro/go-micro/v2/api" 15 | client "github.com/micro/go-micro/v2/client" 16 | server "github.com/micro/go-micro/v2/server" 17 | ) 18 | 19 | // Reference imports to suppress errors if they are not otherwise used. 20 | var _ = proto.Marshal 21 | var _ = fmt.Errorf 22 | var _ = math.Inf 23 | 24 | // This is a compile-time assertion to ensure that this generated file 25 | // is compatible with the proto package it is being compiled against. 26 | // A compilation error at this line likely means your copy of the 27 | // proto package needs to be updated. 28 | const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package 29 | 30 | // Reference imports to suppress errors if they are not otherwise used. 31 | var _ api.Endpoint 32 | var _ context.Context 33 | var _ client.Option 34 | var _ server.Option 35 | 36 | // Api Endpoints for RedisOperation service 37 | 38 | func NewRedisOperationEndpoints() []*api.Endpoint { 39 | return []*api.Endpoint{} 40 | } 41 | 42 | // Client API for RedisOperation service 43 | 44 | type RedisOperationService interface { 45 | SetString(ctx context.Context, in *Request, opts ...client.CallOption) (*StringResponse, error) 46 | GetString(ctx context.Context, in *Request, opts ...client.CallOption) (*StringResponse, error) 47 | AddElementToSet(ctx context.Context, in *Request, opts ...client.CallOption) (*IntResponse, error) 48 | ElementIsInSet(ctx context.Context, in *Request, opts ...client.CallOption) (*BoolResponse, error) 49 | GetAllElementFromSet(ctx context.Context, in *Request, opts ...client.CallOption) (*StringsResponse, error) 50 | } 51 | 52 | type redisOperationService struct { 53 | c client.Client 54 | name string 55 | } 56 | 57 | func NewRedisOperationService(name string, c client.Client) RedisOperationService { 58 | return &redisOperationService{ 59 | c: c, 60 | name: name, 61 | } 62 | } 63 | 64 | func (c *redisOperationService) SetString(ctx context.Context, in *Request, opts ...client.CallOption) (*StringResponse, error) { 65 | req := c.c.NewRequest(c.name, "RedisOperation.SetString", in) 66 | out := new(StringResponse) 67 | err := c.c.Call(ctx, req, out, opts...) 68 | if err != nil { 69 | return nil, err 70 | } 71 | return out, nil 72 | } 73 | 74 | func (c *redisOperationService) GetString(ctx context.Context, in *Request, opts ...client.CallOption) (*StringResponse, error) { 75 | req := c.c.NewRequest(c.name, "RedisOperation.GetString", in) 76 | out := new(StringResponse) 77 | err := c.c.Call(ctx, req, out, opts...) 78 | if err != nil { 79 | return nil, err 80 | } 81 | return out, nil 82 | } 83 | 84 | func (c *redisOperationService) AddElementToSet(ctx context.Context, in *Request, opts ...client.CallOption) (*IntResponse, error) { 85 | req := c.c.NewRequest(c.name, "RedisOperation.AddElementToSet", in) 86 | out := new(IntResponse) 87 | err := c.c.Call(ctx, req, out, opts...) 88 | if err != nil { 89 | return nil, err 90 | } 91 | return out, nil 92 | } 93 | 94 | func (c *redisOperationService) ElementIsInSet(ctx context.Context, in *Request, opts ...client.CallOption) (*BoolResponse, error) { 95 | req := c.c.NewRequest(c.name, "RedisOperation.ElementIsInSet", in) 96 | out := new(BoolResponse) 97 | err := c.c.Call(ctx, req, out, opts...) 98 | if err != nil { 99 | return nil, err 100 | } 101 | return out, nil 102 | } 103 | 104 | func (c *redisOperationService) GetAllElementFromSet(ctx context.Context, in *Request, opts ...client.CallOption) (*StringsResponse, error) { 105 | req := c.c.NewRequest(c.name, "RedisOperation.GetAllElementFromSet", in) 106 | out := new(StringsResponse) 107 | err := c.c.Call(ctx, req, out, opts...) 108 | if err != nil { 109 | return nil, err 110 | } 111 | return out, nil 112 | } 113 | 114 | // Server API for RedisOperation service 115 | 116 | type RedisOperationHandler interface { 117 | SetString(context.Context, *Request, *StringResponse) error 118 | GetString(context.Context, *Request, *StringResponse) error 119 | AddElementToSet(context.Context, *Request, *IntResponse) error 120 | ElementIsInSet(context.Context, *Request, *BoolResponse) error 121 | GetAllElementFromSet(context.Context, *Request, *StringsResponse) error 122 | } 123 | 124 | func RegisterRedisOperationHandler(s server.Server, hdlr RedisOperationHandler, opts ...server.HandlerOption) error { 125 | type redisOperation interface { 126 | SetString(ctx context.Context, in *Request, out *StringResponse) error 127 | GetString(ctx context.Context, in *Request, out *StringResponse) error 128 | AddElementToSet(ctx context.Context, in *Request, out *IntResponse) error 129 | ElementIsInSet(ctx context.Context, in *Request, out *BoolResponse) error 130 | GetAllElementFromSet(ctx context.Context, in *Request, out *StringsResponse) error 131 | } 132 | type RedisOperation struct { 133 | redisOperation 134 | } 135 | h := &redisOperationHandler{hdlr} 136 | return s.Handle(s.NewHandler(&RedisOperation{h}, opts...)) 137 | } 138 | 139 | type redisOperationHandler struct { 140 | RedisOperationHandler 141 | } 142 | 143 | func (h *redisOperationHandler) SetString(ctx context.Context, in *Request, out *StringResponse) error { 144 | return h.RedisOperationHandler.SetString(ctx, in, out) 145 | } 146 | 147 | func (h *redisOperationHandler) GetString(ctx context.Context, in *Request, out *StringResponse) error { 148 | return h.RedisOperationHandler.GetString(ctx, in, out) 149 | } 150 | 151 | func (h *redisOperationHandler) AddElementToSet(ctx context.Context, in *Request, out *IntResponse) error { 152 | return h.RedisOperationHandler.AddElementToSet(ctx, in, out) 153 | } 154 | 155 | func (h *redisOperationHandler) ElementIsInSet(ctx context.Context, in *Request, out *BoolResponse) error { 156 | return h.RedisOperationHandler.ElementIsInSet(ctx, in, out) 157 | } 158 | 159 | func (h *redisOperationHandler) GetAllElementFromSet(ctx context.Context, in *Request, out *StringsResponse) error { 160 | return h.RedisOperationHandler.GetAllElementFromSet(ctx, in, out) 161 | } 162 | -------------------------------------------------------------------------------- /service/cache/proto/redis.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | option go_package = ".;proto"; 4 | 5 | service RedisOperation { 6 | rpc SetString(Request) returns (StringResponse) {} 7 | rpc GetString(Request) returns (StringResponse) {} 8 | rpc AddElementToSet(Request) returns (IntResponse) {} 9 | rpc ElementIsInSet(Request) returns (BoolResponse) {} 10 | rpc GetAllElementFromSet(Request) returns (StringsResponse){} 11 | } 12 | 13 | message Request { 14 | string key = 1; 15 | string value = 2; 16 | } 17 | 18 | message StringResponse{ 19 | string result = 1; 20 | } 21 | 22 | message IntResponse{ 23 | int32 result = 1; 24 | } 25 | 26 | message BoolResponse{ 27 | bool result = 1; 28 | } 29 | 30 | message StringsResponse{ 31 | repeated string result = 1; 32 | } -------------------------------------------------------------------------------- /service/cache/server/server.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "context" 5 | "github.com/garyburd/redigo/redis" 6 | "go-crawler-distributed/global" 7 | "go-crawler-distributed/service/cache/proto" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2020-08-17 20:08 13 | * @Description: redis微服务化 14 | **/ 15 | 16 | type CacheStruct struct { 17 | } 18 | 19 | func (cache *CacheStruct) SetString(ctx context.Context, req *proto.Request, res *proto.StringResponse) error { 20 | c := global.RedisEngine.Get() 21 | 22 | str, err := redis.String(c.Do("set", req.Key, req.Value)) 23 | if err != nil { 24 | return err 25 | } 26 | res.Result = str 27 | return nil 28 | } 29 | 30 | func (cache *CacheStruct) GetString(ctx context.Context, req *proto.Request, res *proto.StringResponse) error { 31 | c := global.RedisEngine.Get() 32 | 33 | str, err := redis.String(c.Do("get", req.Key)) 34 | if err != nil { 35 | return err 36 | } 37 | res.Result = str 38 | return nil 39 | } 40 | 41 | func (cache *CacheStruct) AddElementToSet(ctx context.Context, req *proto.Request, res *proto.IntResponse) error { 42 | c := global.RedisEngine.Get() 43 | defer c.Close() 44 | 45 | result, err := redis.Int(c.Do("sadd", req.Key, req.Value)) 46 | if err != nil { 47 | return err 48 | } 49 | res.Result = int32(result) 50 | return nil 51 | } 52 | 53 | func (cache *CacheStruct) ElementIsInSet(ctx context.Context, req *proto.Request, res *proto.BoolResponse) error { 54 | c := global.RedisEngine.Get() 55 | defer c.Close() 56 | 57 | result, err := redis.Int(c.Do("sismember", req.Key, req.Value)) 58 | if err != nil { 59 | return err 60 | } 61 | if result == 1 { 62 | res.Result = true 63 | } else { 64 | res.Result = false 65 | } 66 | return nil 67 | } 68 | 69 | func (cache *CacheStruct) GetAllElementFromSet(ctx context.Context, req *proto.Request, res *proto.StringsResponse) error { 70 | c := global.RedisEngine.Get() 71 | strs, err := redis.Strings(c.Do("smembers", req.Key)) 72 | if err != nil { 73 | return err 74 | } 75 | res.Result = strs 76 | return nil 77 | } 78 | -------------------------------------------------------------------------------- /service/douban/crawl_detail/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go-crawler-distributed/internal/crawler" 5 | "go-crawler-distributed/internal/crawler/crawerConfig" 6 | "go-crawler-distributed/internal/crawler/douban/parser" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-08-31 16:52 12 | * @Description: 13 | **/ 14 | 15 | func main() { 16 | crawler.Crawl(crawerConfig.BookDetailUrl, crawerConfig.BookDetail, "BookDetail", parser.ParseBookDetail) 17 | } 18 | -------------------------------------------------------------------------------- /service/douban/crawl_list/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go-crawler-distributed/internal/crawler" 5 | "go-crawler-distributed/internal/crawler/crawerConfig" 6 | "go-crawler-distributed/internal/crawler/douban/parser" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-08-31 16:51 12 | * @Description: 13 | **/ 14 | 15 | func main() { 16 | crawler.Crawl(crawerConfig.TagUrl, crawerConfig.BookDetailUrl, "tagList", parser.ParseBookList) 17 | } 18 | -------------------------------------------------------------------------------- /service/douban/crawl_tags/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go-crawler-distributed/internal/crawler" 5 | "go-crawler-distributed/internal/crawler/crawerConfig" 6 | "go-crawler-distributed/internal/crawler/douban/parser" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-08-31 16:50 12 | * @Description: 13 | **/ 14 | 15 | func main() { 16 | crawler.Crawl("", crawerConfig.TagUrl, "tags", parser.ParseTagList) 17 | } 18 | -------------------------------------------------------------------------------- /service/douban/storage_detail/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go-crawler-distributed/internal/crawler" 5 | "go-crawler-distributed/internal/crawler/crawerConfig" 6 | "go-crawler-distributed/internal/crawler/douban/storage" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-08-31 16:52 12 | * @Description: 13 | **/ 14 | 15 | func main() { 16 | crawler.Crawl(crawerConfig.BookDetail, "", "storageBookDetail", storage.ParseAndStorage) 17 | } 18 | -------------------------------------------------------------------------------- /service/elastic/client/client.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "context" 5 | "github.com/micro/go-micro/v2" 6 | "github.com/micro/go-micro/v2/registry" 7 | "github.com/micro/go-plugins/registry/consul/v2" 8 | "go-crawler-distributed/global" 9 | "go-crawler-distributed/internal/model" 10 | "go-crawler-distributed/pkg/util" 11 | "go-crawler-distributed/service/elastic/proto" 12 | ) 13 | 14 | /** 15 | * @Author: super 16 | * @Date: 2021-01-05 19:34 17 | * @Description: 18 | **/ 19 | 20 | var elasticOP proto.ElasticOperationService 21 | 22 | func init() { 23 | reg := consul.NewRegistry(func(options *registry.Options) { 24 | options.Addrs = []string{ 25 | global.ConsulSetting.Url, 26 | } 27 | }) 28 | 29 | service := micro.NewService( 30 | micro.Registry(reg), 31 | micro.Name("go.micro.service.elastic.client"), 32 | ) 33 | service.Init() 34 | elasticOP = proto.NewElasticOperationService("go.micro.service.elastic", service.Client()) 35 | } 36 | 37 | func IndexExist(index string) (bool, error) { 38 | res, err := elasticOP.IndexExist(context.TODO(), &proto.IndexExistRequest{Index: index}) 39 | if err != nil { 40 | global.Logger.Error(context.Background(), err) 41 | return false, err 42 | } 43 | return res.Exist, nil 44 | } 45 | 46 | func SaveInfo(table string, data *model.Article) (string, error) { 47 | article := util.ArticleToProto(data) 48 | res, err := elasticOP.SaveInfo(context.TODO(), &proto.SaveInfoRequest{Table: table, Article: article}) 49 | if err != nil { 50 | global.Logger.Error(context.Background(), err) 51 | return "", err 52 | } 53 | return res.Result, nil 54 | } 55 | 56 | func GetInfo(table string, id string) (*model.Article, error) { 57 | res, err := elasticOP.GetInfo(context.TODO(), &proto.GetInfoRequest{Table: table, Id: id}) 58 | if err != nil { 59 | global.Logger.Error(context.Background(), err) 60 | return nil, err 61 | } 62 | article := util.ProtoToArticle(res.Article) 63 | return article, nil 64 | } 65 | 66 | func SearchInfo(table string, fieldName string, fieldValue string) ([]*model.Article, error) { 67 | res, err := elasticOP.SearchInfo(context.TODO(), &proto.SearchInfoRequest{Table: table, FieldName: fieldName, FieldValue: fieldValue}) 68 | if err != nil { 69 | global.Logger.Error(context.Background(), err) 70 | return nil, err 71 | } 72 | l := len(res.Article) 73 | result := make([]*model.Article, l) 74 | 75 | for i := 0; i < l; i++ { 76 | temp := util.ProtoToArticle(res.Article[i]) 77 | result = append(result, temp) 78 | } 79 | return result, nil 80 | } 81 | -------------------------------------------------------------------------------- /service/elastic/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "github.com/micro/go-micro/v2" 6 | "github.com/micro/go-micro/v2/registry" 7 | "github.com/micro/go-plugins/registry/consul/v2" 8 | "go-crawler-distributed/global" 9 | "go-crawler-distributed/service/elastic/proto" 10 | "go-crawler-distributed/service/elastic/server" 11 | "time" 12 | ) 13 | 14 | /** 15 | * @Author: super 16 | * @Date: 2020-09-01 20:44 17 | * @Description: 18 | **/ 19 | 20 | func main() { 21 | reg := consul.NewRegistry(func(options *registry.Options) { 22 | options.Addrs = []string{ 23 | global.ConsulSetting.Url, 24 | } 25 | }) 26 | 27 | service := micro.NewService( 28 | micro.Registry(reg), 29 | micro.Name("go.micro.service.elastic"), 30 | micro.RegisterTTL(time.Second*10), 31 | micro.RegisterInterval(time.Second*5), 32 | ) 33 | service.Init() 34 | 35 | // 注册处理器 36 | err := proto.RegisterElasticOperationHandler(service.Server(), new(server.Elastic)) 37 | if err != nil { 38 | global.Logger.Error(context.Background(), err) 39 | } 40 | 41 | // 运行服务 42 | if err := service.Run(); err != nil { 43 | global.Logger.Error(context.Background(), err) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /service/elastic/proto/elastic.pb.micro.go: -------------------------------------------------------------------------------- 1 | // Code generated by protoc-gen-micro. DO NOT EDIT. 2 | // source: elastic.proto 3 | 4 | package proto 5 | 6 | import ( 7 | fmt "fmt" 8 | proto "github.com/golang/protobuf/proto" 9 | math "math" 10 | ) 11 | 12 | import ( 13 | context "context" 14 | api "github.com/micro/go-micro/v2/api" 15 | client "github.com/micro/go-micro/v2/client" 16 | server "github.com/micro/go-micro/v2/server" 17 | ) 18 | 19 | // Reference imports to suppress errors if they are not otherwise used. 20 | var _ = proto.Marshal 21 | var _ = fmt.Errorf 22 | var _ = math.Inf 23 | 24 | // This is a compile-time assertion to ensure that this generated file 25 | // is compatible with the proto package it is being compiled against. 26 | // A compilation error at this line likely means your copy of the 27 | // proto package needs to be updated. 28 | const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package 29 | 30 | // Reference imports to suppress errors if they are not otherwise used. 31 | var _ api.Endpoint 32 | var _ context.Context 33 | var _ client.Option 34 | var _ server.Option 35 | 36 | // Api Endpoints for ElasticOperation service 37 | 38 | func NewElasticOperationEndpoints() []*api.Endpoint { 39 | return []*api.Endpoint{} 40 | } 41 | 42 | // Client API for ElasticOperation service 43 | 44 | type ElasticOperationService interface { 45 | IndexExist(ctx context.Context, in *IndexExistRequest, opts ...client.CallOption) (*IndexExistResponse, error) 46 | SaveInfo(ctx context.Context, in *SaveInfoRequest, opts ...client.CallOption) (*SaveInfoResponse, error) 47 | GetInfo(ctx context.Context, in *GetInfoRequest, opts ...client.CallOption) (*GetInfoResponse, error) 48 | SearchInfo(ctx context.Context, in *SearchInfoRequest, opts ...client.CallOption) (*SearchInfoResponse, error) 49 | } 50 | 51 | type elasticOperationService struct { 52 | c client.Client 53 | name string 54 | } 55 | 56 | func NewElasticOperationService(name string, c client.Client) ElasticOperationService { 57 | return &elasticOperationService{ 58 | c: c, 59 | name: name, 60 | } 61 | } 62 | 63 | func (c *elasticOperationService) IndexExist(ctx context.Context, in *IndexExistRequest, opts ...client.CallOption) (*IndexExistResponse, error) { 64 | req := c.c.NewRequest(c.name, "ElasticOperation.IndexExist", in) 65 | out := new(IndexExistResponse) 66 | err := c.c.Call(ctx, req, out, opts...) 67 | if err != nil { 68 | return nil, err 69 | } 70 | return out, nil 71 | } 72 | 73 | func (c *elasticOperationService) SaveInfo(ctx context.Context, in *SaveInfoRequest, opts ...client.CallOption) (*SaveInfoResponse, error) { 74 | req := c.c.NewRequest(c.name, "ElasticOperation.SaveInfo", in) 75 | out := new(SaveInfoResponse) 76 | err := c.c.Call(ctx, req, out, opts...) 77 | if err != nil { 78 | return nil, err 79 | } 80 | return out, nil 81 | } 82 | 83 | func (c *elasticOperationService) GetInfo(ctx context.Context, in *GetInfoRequest, opts ...client.CallOption) (*GetInfoResponse, error) { 84 | req := c.c.NewRequest(c.name, "ElasticOperation.GetInfo", in) 85 | out := new(GetInfoResponse) 86 | err := c.c.Call(ctx, req, out, opts...) 87 | if err != nil { 88 | return nil, err 89 | } 90 | return out, nil 91 | } 92 | 93 | func (c *elasticOperationService) SearchInfo(ctx context.Context, in *SearchInfoRequest, opts ...client.CallOption) (*SearchInfoResponse, error) { 94 | req := c.c.NewRequest(c.name, "ElasticOperation.SearchInfo", in) 95 | out := new(SearchInfoResponse) 96 | err := c.c.Call(ctx, req, out, opts...) 97 | if err != nil { 98 | return nil, err 99 | } 100 | return out, nil 101 | } 102 | 103 | // Server API for ElasticOperation service 104 | 105 | type ElasticOperationHandler interface { 106 | IndexExist(context.Context, *IndexExistRequest, *IndexExistResponse) error 107 | SaveInfo(context.Context, *SaveInfoRequest, *SaveInfoResponse) error 108 | GetInfo(context.Context, *GetInfoRequest, *GetInfoResponse) error 109 | SearchInfo(context.Context, *SearchInfoRequest, *SearchInfoResponse) error 110 | } 111 | 112 | func RegisterElasticOperationHandler(s server.Server, hdlr ElasticOperationHandler, opts ...server.HandlerOption) error { 113 | type elasticOperation interface { 114 | IndexExist(ctx context.Context, in *IndexExistRequest, out *IndexExistResponse) error 115 | SaveInfo(ctx context.Context, in *SaveInfoRequest, out *SaveInfoResponse) error 116 | GetInfo(ctx context.Context, in *GetInfoRequest, out *GetInfoResponse) error 117 | SearchInfo(ctx context.Context, in *SearchInfoRequest, out *SearchInfoResponse) error 118 | } 119 | type ElasticOperation struct { 120 | elasticOperation 121 | } 122 | h := &elasticOperationHandler{hdlr} 123 | return s.Handle(s.NewHandler(&ElasticOperation{h}, opts...)) 124 | } 125 | 126 | type elasticOperationHandler struct { 127 | ElasticOperationHandler 128 | } 129 | 130 | func (h *elasticOperationHandler) IndexExist(ctx context.Context, in *IndexExistRequest, out *IndexExistResponse) error { 131 | return h.ElasticOperationHandler.IndexExist(ctx, in, out) 132 | } 133 | 134 | func (h *elasticOperationHandler) SaveInfo(ctx context.Context, in *SaveInfoRequest, out *SaveInfoResponse) error { 135 | return h.ElasticOperationHandler.SaveInfo(ctx, in, out) 136 | } 137 | 138 | func (h *elasticOperationHandler) GetInfo(ctx context.Context, in *GetInfoRequest, out *GetInfoResponse) error { 139 | return h.ElasticOperationHandler.GetInfo(ctx, in, out) 140 | } 141 | 142 | func (h *elasticOperationHandler) SearchInfo(ctx context.Context, in *SearchInfoRequest, out *SearchInfoResponse) error { 143 | return h.ElasticOperationHandler.SearchInfo(ctx, in, out) 144 | } 145 | -------------------------------------------------------------------------------- /service/elastic/proto/elastic.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | option go_package = ".;proto"; 4 | 5 | service ElasticOperation { 6 | rpc IndexExist(IndexExistRequest) returns (IndexExistResponse) {} 7 | rpc SaveInfo(SaveInfoRequest) returns (SaveInfoResponse) {} 8 | rpc GetInfo(GetInfoRequest) returns (GetInfoResponse) {} 9 | rpc SearchInfo(SearchInfoRequest) returns (SearchInfoResponse) {} 10 | } 11 | 12 | message Article{ 13 | string title = 1; 14 | string url = 2; 15 | repeated string genres = 3; 16 | string content = 4; 17 | } 18 | 19 | message IndexExistRequest { 20 | string index = 1; 21 | } 22 | 23 | message IndexExistResponse{ 24 | bool exist = 1; 25 | } 26 | 27 | message SaveInfoRequest{ 28 | string table = 1; 29 | Article article = 2; 30 | } 31 | 32 | message SaveInfoResponse{ 33 | string result = 1; 34 | } 35 | 36 | message GetInfoRequest{ 37 | string table = 1; 38 | string id = 2; 39 | } 40 | 41 | message GetInfoResponse{ 42 | Article article = 1; 43 | } 44 | 45 | message SearchInfoRequest{ 46 | string table = 1; 47 | string fieldName = 2; 48 | string fieldValue = 3; 49 | } 50 | 51 | message SearchInfoResponse{ 52 | repeated Article article = 1; 53 | } -------------------------------------------------------------------------------- /service/elastic/server/server.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "context" 5 | "go-crawler-distributed/pkg/elastic" 6 | "go-crawler-distributed/pkg/util" 7 | "go-crawler-distributed/service/elastic/proto" 8 | ) 9 | 10 | /** 11 | * @Author: super 12 | * @Date: 2020-09-01 21:33 13 | * @Description: 14 | **/ 15 | 16 | type Elastic struct { 17 | } 18 | 19 | func (e *Elastic) IndexExist(ctx context.Context, req *proto.IndexExistRequest, res *proto.IndexExistResponse) error { 20 | exist, err := elastic.IndexExist(req.Index) 21 | if err != nil { 22 | return err 23 | } 24 | res.Exist = exist 25 | return nil 26 | } 27 | 28 | func (e *Elastic) SaveInfo(ctx context.Context, req *proto.SaveInfoRequest, res *proto.SaveInfoResponse) error { 29 | 30 | article := util.ProtoToArticle(req.Article) 31 | 32 | id, err := elastic.SaveInfo(req.Table, article) 33 | if err != nil { 34 | return err 35 | } 36 | res.Result = id 37 | return nil 38 | } 39 | 40 | func (e *Elastic) GetInfo(ctx context.Context, req *proto.GetInfoRequest, res *proto.GetInfoResponse) error { 41 | article, err := elastic.GetInfo(req.Table, req.Id) 42 | if err != nil { 43 | return err 44 | } 45 | result := util.ArticleToProto(article) 46 | res.Article = result 47 | return nil 48 | } 49 | 50 | func (e *Elastic) SearchInfo(ctx context.Context, req *proto.SearchInfoRequest, res *proto.SearchInfoResponse) error { 51 | articles, err := elastic.SearchInfo(req.Table, req.FieldName, req.FieldValue) 52 | if err != nil { 53 | return err 54 | } 55 | l := len(articles) 56 | result := make([]*proto.Article, l) 57 | 58 | for i := 0; i < l; i++ { 59 | temp := util.ArticleToProto(articles[i]) 60 | result = append(result, temp) 61 | } 62 | res.Article = result 63 | return nil 64 | } 65 | -------------------------------------------------------------------------------- /service/meituan/crawl_detail/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go-crawler-distributed/internal/crawler" 5 | "go-crawler-distributed/internal/crawler/crawerConfig" 6 | "go-crawler-distributed/internal/crawler/meituan/parser" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-09-01 19:10 12 | * @Description: 13 | **/ 14 | 15 | func main() { 16 | crawler.Crawl(crawerConfig.ArticleUrlList, crawerConfig.ArticleDetail, "ArticleDetail", parser.ParseArticleDetail) 17 | } 18 | -------------------------------------------------------------------------------- /service/meituan/crawl_list/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go-crawler-distributed/internal/crawler" 5 | "go-crawler-distributed/internal/crawler/crawerConfig" 6 | "go-crawler-distributed/internal/crawler/meituan/parser" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-09-01 18:30 12 | * @Description: 13 | **/ 14 | 15 | func main() { 16 | crawler.Crawl("", crawerConfig.ArticleList, "ArticleList", parser.ParseArticleList) 17 | } 18 | -------------------------------------------------------------------------------- /service/meituan/crawl_urllist/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go-crawler-distributed/internal/crawler" 5 | "go-crawler-distributed/internal/crawler/crawerConfig" 6 | "go-crawler-distributed/internal/crawler/meituan/parser" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-09-01 19:03 12 | * @Description: 13 | **/ 14 | 15 | func main() { 16 | crawler.Crawl(crawerConfig.ArticleList, crawerConfig.ArticleUrlList, "ArticleUrlList", parser.ParseArticleUrlList) 17 | } 18 | -------------------------------------------------------------------------------- /service/meituan/storage_detail/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "go-crawler-distributed/internal/crawler" 5 | "go-crawler-distributed/internal/crawler/crawerConfig" 6 | "go-crawler-distributed/internal/crawler/meituan/storage" 7 | ) 8 | 9 | /** 10 | * @Author: super 11 | * @Date: 2020-09-01 19:37 12 | * @Description: 13 | **/ 14 | 15 | func main() { 16 | crawler.Crawl(crawerConfig.ArticleDetail, "", "storageArticleDetail", storage.StorageArticle) 17 | } 18 | --------------------------------------------------------------------------------