├── README.md
├── account
├── account.txt
└── target.txt
├── browser
└── readme.md
├── config
└── config_utils.go
├── example-config.yaml
├── go.mod
├── go.sum
├── login.go
├── mdb
├── items.go
└── mdb.go
├── scrapy_rules
├── base_rules.go
├── fans_rules.go
├── follower_rules.go
├── follower_senior_rules.go
├── information_rules.go
└── tweet_rules.go
├── utils
└── utils.go
└── weibo_spider.go
/README.md:
--------------------------------------------------------------------------------
1 | # WeiboSpiderGo
2 |
3 | It's a sina weibo (chinese twitter) spider written by golang golly
4 |
5 | 可以双击运行的微博爬虫
6 |
7 | #### 使用准备
8 |
9 | - chrome driver安装
10 |
11 | 本地需要安装chrome浏览器、以及下载对应版本的chromedriver。例如,本机上安装了版本号为78的chrome则需要在https://chromedriver.chromium.org/downloads 链接中下载版本号为78的,对应平台的chromedriver.zip
12 |
13 | 将解压后的chromedriver文件放于项目的browser目录下
14 |
15 | - mongodb安装
16 |
17 | 安装mongodb,将地址和端口、数据库名填写到config.yaml中
18 |
19 | #### 使用
20 |
21 | 完成上一步的使用准备工作后,可以下载release中的代码,在上方栏的release页中,分别有mac版与windows exe版本的压缩包,可以直接下载运行
22 |
23 | 填写需要登陆的账号于account/account.txt中,每行一个账号,源码的account文件夹可以看到示例,双击login开始批量登陆(账号要选择免验证码的邮箱登陆的账号)
24 |
25 | 在account/target.txt文件中,同样每行一个,写下要爬取的用户id,双击weibo_spider或weibo_spider.exe开始爬取
26 |
27 | 希望大家不要涸泽而渔,release中的版本爬取间隔为10秒左右
28 |
29 | #### 配置文件内容
30 |
31 | 配置文件为根目录下的config.yaml
32 |
33 | 配置文件含义
34 |
35 | ```
36 | MONGO_ADDRESS - mongodb地址
37 | DB_NAME - mongodb数据库名
38 | ACCOUNT_FILE - 存放要爬取的目标账户id的文件
39 | DRIVER_PATH: "/browser/chromedriver"
40 | # 爬取类型
41 | SCRAPY_TYPE:
42 | Info: True
43 | Follow: False
44 | Fans: False
45 | Tweet:
46 | Main: True
47 | Comment: False
48 | ```
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 | #### 编译
57 |
58 | 安装依赖后使用
59 |
60 | ```
61 | go build login.go
62 | go build weibo_spider.go
63 | ```
64 |
65 | 进行编译
66 |
67 | #### 下一步
68 |
69 | - [ ] 代理ip添加
70 | - [ ] 图片视频下载
71 |
72 |
--------------------------------------------------------------------------------
/account/account.txt:
--------------------------------------------------------------------------------
1 | example@163.com----12333444
2 | example@qq.com----122223333
3 | example@126.com----12223113
4 |
--------------------------------------------------------------------------------
/account/target.txt:
--------------------------------------------------------------------------------
1 | 3217179555
2 | 1742566624
3 |
--------------------------------------------------------------------------------
/browser/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eureka101v/WeiboSpiderGo/3f3dcbb0447adc28611ed04aec93a9e736109f7c/browser/readme.md
--------------------------------------------------------------------------------
/config/config_utils.go:
--------------------------------------------------------------------------------
1 | package config
2 |
3 | import (
4 | "WeiboSpiderGo/utils"
5 | "github.com/spf13/viper"
6 | )
7 |
8 | var Conf *viper.Viper
9 |
10 | func init() {
11 | Conf = viper.New()
12 |
13 | Conf.SetConfigName("config")
14 |
15 | Conf.AddConfigPath(utils.ExecPath)
16 |
17 | Conf.SetConfigType("yaml")
18 | if err := Conf.ReadInConfig(); err != nil {
19 | panic(err)
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/example-config.yaml:
--------------------------------------------------------------------------------
1 | MONGO_ADDRESS: "127.0.0.1:27017"
2 | DB_NAME: "Sina2"
3 | ACCOUNT_FILE: "/account/account.txt"
4 | DRIVER_PATH: "/browser/chromedriver"
5 | # 爬取类型
6 | SCRAPY_TYPE:
7 | Info: True
8 | Follow: False
9 | Fans: False
10 | Tweet:
11 | Main: True
12 | Comment: False
13 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module WeiboSpiderGo
2 |
3 | go 1.13
4 |
5 | require (
6 | github.com/PuerkitoBio/goquery v1.5.0 // indirect
7 | github.com/antchfx/htmlquery v1.1.0 // indirect
8 | github.com/antchfx/xmlquery v1.1.0 // indirect
9 | github.com/antchfx/xpath v1.1.0
10 | github.com/gobwas/glob v0.2.3 // indirect
11 | github.com/gocolly/colly v1.2.0
12 | github.com/kennygrant/sanitize v1.2.4 // indirect
13 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
14 | github.com/spf13/viper v1.5.0
15 | github.com/tebeka/selenium v0.9.9
16 | github.com/temoto/robotstxt v1.1.1 // indirect
17 | golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9 // indirect
18 | gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22
19 | )
20 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
2 | cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
3 | cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
4 | cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg=
5 | github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
6 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
7 | github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc=
8 | github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
9 | github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnROylmoQL57iVUL57wGKTR5O6KpVnbm2tA=
10 | github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k=
11 | github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
12 | github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
13 | github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
14 | github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
15 | github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
16 | github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
17 | github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
18 | github.com/antchfx/htmlquery v1.1.0 h1:KMS88sLl5KP9GUVU2MQIDcQXNQ0M5MGlkC9WlYgAQqY=
19 | github.com/antchfx/htmlquery v1.1.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
20 | github.com/antchfx/xmlquery v1.1.0 h1:vj0kZ1y3Q6my4AV+a9xbWrMYzubw+84zuiKgvfV8vb8=
21 | github.com/antchfx/xmlquery v1.1.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
22 | github.com/antchfx/xpath v1.1.0 h1:mJTvYpiHvxNQRD4Lbfin/FodHVCHh2a5KrOFr4ZxMOI=
23 | github.com/antchfx/xpath v1.1.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
24 | github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
25 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
26 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
27 | github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
28 | github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
29 | github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
30 | github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
31 | github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
32 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
33 | github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
34 | github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
35 | github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
36 | github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
37 | github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
38 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
39 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
40 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
41 | github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
42 | github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
43 | github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
44 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
45 | github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
46 | github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
47 | github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
48 | github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
49 | github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
50 | github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
51 | github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
52 | github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
53 | github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
54 | github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
55 | github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
56 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
57 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
58 | github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
59 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
60 | github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
61 | github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
62 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
63 | github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
64 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
65 | github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
66 | github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
67 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
68 | github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
69 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
70 | github.com/google/go-github/v27 v27.0.4/go.mod h1:/0Gr8pJ55COkmv+S/yPKCczSkUPIM/LnFyubufRNIS0=
71 | github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
72 | github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
73 | github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
74 | github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
75 | github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
76 | github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
77 | github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
78 | github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs=
79 | github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
80 | github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY=
81 | github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
82 | github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
83 | github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
84 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
85 | github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
86 | github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
87 | github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
88 | github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
89 | github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
90 | github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
91 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
92 | github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
93 | github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
94 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
95 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
96 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
97 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
98 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
99 | github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4=
100 | github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
101 | github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
102 | github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE=
103 | github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
104 | github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
105 | github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
106 | github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc=
107 | github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
108 | github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
109 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
110 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
111 | github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
112 | github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso=
113 | github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
114 | github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
115 | github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
116 | github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
117 | github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
118 | github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
119 | github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
120 | github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
121 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
122 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
123 | github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
124 | github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
125 | github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
126 | github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI=
127 | github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
128 | github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8=
129 | github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
130 | github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk=
131 | github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
132 | github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
133 | github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
134 | github.com/spf13/viper v1.5.0 h1:GpsTwfsQ27oS/Aha/6d1oD7tpKIqWnOA6tgOX9HHkt4=
135 | github.com/spf13/viper v1.5.0/go.mod h1:AkYRkVJF8TkSG/xet6PzXX+l39KhhXa2pdqVSxnTcn4=
136 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
137 | github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
138 | github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
139 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
140 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
141 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
142 | github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
143 | github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
144 | github.com/tebeka/selenium v0.9.9 h1:cNziB+etNgyH/7KlNI7RMC1ua5aH1+5wUlFQyzeMh+w=
145 | github.com/tebeka/selenium v0.9.9/go.mod h1:5Fr8+pUvU6B1OiPfkdCKdXZyr5znvVkxuPd0NOdZCQc=
146 | github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=
147 | github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
148 | github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
149 | github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
150 | github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
151 | github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
152 | go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
153 | go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
154 | go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
155 | go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
156 | go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
157 | go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
158 | golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
159 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
160 | golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
161 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
162 | golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
163 | golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
164 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
165 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
166 | golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
167 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
168 | golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
169 | golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
170 | golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
171 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
172 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
173 | golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
174 | golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
175 | golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
176 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
177 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
178 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
179 | golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
180 | golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
181 | golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
182 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
183 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
184 | golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9 h1:DPz9iiH3YoKiKhX/ijjoZvT0VFwK2c6CWYWQ7Zyr8TU=
185 | golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
186 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
187 | golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
188 | golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
189 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
190 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
191 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
192 | golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
193 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
194 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
195 | golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
196 | golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
197 | golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
198 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
199 | golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
200 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
201 | golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
202 | golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
203 | golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
204 | golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0 h1:HyfiK1WMnHj5FXFXatD+Qs1A/xC2Run6RzeW1SyHxpc=
205 | golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
206 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
207 | golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
208 | golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
209 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
210 | golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
211 | golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
212 | golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
213 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
214 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
215 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
216 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
217 | golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
218 | golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
219 | golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
220 | golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
221 | golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
222 | golang.org/x/tools v0.0.0-20190624190245-7f2218787638/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
223 | google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
224 | google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
225 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
226 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
227 | google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
228 | google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I=
229 | google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
230 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
231 | google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
232 | google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
233 | google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
234 | google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
235 | google.golang.org/genproto v0.0.0-20190626174449-989357319d63/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s=
236 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
237 | google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
238 | google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
239 | google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
240 | gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
241 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
242 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
243 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
244 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
245 | gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 h1:VpOs+IwYnYBaFnrNAeB8UUWtL3vEUnzSCL1nVjPhqrw=
246 | gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
247 | gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo=
248 | gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74=
249 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
250 | gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
251 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
252 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
253 | honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
254 | honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
255 | rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
256 |
--------------------------------------------------------------------------------
/login.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "WeiboSpiderGo/config"
5 | "WeiboSpiderGo/mdb"
6 | "WeiboSpiderGo/utils"
7 | "bufio"
8 | "fmt"
9 | "github.com/tebeka/selenium"
10 | "github.com/tebeka/selenium/chrome"
11 | "gopkg.in/mgo.v2/bson"
12 | "os"
13 | "strings"
14 | )
15 |
16 | var db_name = config.Conf.GetString("DB_NAME")
17 |
18 | func Displayed(by, elementName string) func(selenium.WebDriver) (bool, error) {
19 | return func(wd selenium.WebDriver) (bool, error) {
20 | el, err := wd.FindElement(by, elementName)
21 | if err != nil {
22 | return false, nil
23 | }
24 | enabled, err := el.IsDisplayed()
25 | if err != nil {
26 | return false, nil
27 | }
28 |
29 | if !enabled {
30 | return false, nil
31 | }
32 |
33 | return true, nil
34 | }
35 | }
36 |
37 | func getCookieStr(username_text string, password_text string) string {
38 | // Start a Selenium WebDriver server instance (if one is not already
39 | // running).
40 | //username_text := "1222"
41 | //password_text := "23121"
42 | var (
43 | // These paths will be different on your system.
44 | driverPath = utils.ExecPath + config.Conf.GetString("DRIVER_PATH")
45 | port = 9005
46 | )
47 | opts := []selenium.ServiceOption{}
48 |
49 | service, err := selenium.NewChromeDriverService(driverPath, port, opts...)
50 | if nil != err {
51 | fmt.Println("start a chromedriver service falid", err.Error())
52 | return ""
53 | }
54 | //注意这里,server关闭之后,chrome窗口也会关闭
55 | defer service.Stop()
56 |
57 | // Connect to the WebDriver instance running locally.
58 | caps := selenium.Capabilities{"browserName": "chrome"}
59 | //禁止图片加载,加快渲染速度
60 | imagCaps := map[string]interface{}{
61 | "profile.managed_default_content_settings.images": 2,
62 | }
63 | chromeCaps := chrome.Capabilities{
64 | Prefs: imagCaps,
65 | Path: "",
66 | Args: []string{
67 | "--headless", // 设置Chrome无头模式,在linux下运行,需要设置这个参数,否则会报错
68 | //"--no-sandbox",
69 | "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", // 模拟user-agent,防反爬
70 | },
71 | }
72 | //以上是设置浏览器参数
73 | caps.AddChrome(chromeCaps)
74 | wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port))
75 | if err != nil {
76 | fmt.Println("connect to the webDriver faild", err.Error())
77 | return ""
78 | }
79 | defer wd.Quit()
80 | err = wd.Get("https://passport.weibo.cn/signin/login?entry=mweibo&r=https://weibo.cn/")
81 | if err != nil {
82 | fmt.Println("get page faild", err.Error())
83 | return ""
84 | }
85 | wd.Wait(Displayed(selenium.ByCSSSelector, "#loginName"))
86 | wd.Wait(Displayed(selenium.ByCSSSelector, "#loginPassword"))
87 | wd.Wait(Displayed(selenium.ByCSSSelector, "#loginAction"))
88 | username, err := wd.FindElement(selenium.ByCSSSelector, "#loginName")
89 | if err != nil {
90 | fmt.Println("get username faild", err.Error())
91 | return ""
92 | }
93 | password, err := wd.FindElement(selenium.ByCSSSelector, "#loginPassword")
94 | if err != nil {
95 | fmt.Println("get username faild", err.Error())
96 | return ""
97 | }
98 | submit, err := wd.FindElement(selenium.ByCSSSelector, "#loginAction")
99 | if err != nil {
100 | fmt.Println("get username faild", err.Error())
101 | return ""
102 | }
103 | username.SendKeys(username_text)
104 | password.SendKeys(password_text)
105 | submit.Click()
106 | wd.Wait(func(wdtemp selenium.WebDriver) (b bool, e error) {
107 | tit, err := wdtemp.Title()
108 | if err != nil {
109 | return false, nil
110 | }
111 | if tit != "我的首页" {
112 | return false, nil
113 | }
114 | return true, nil
115 | })
116 | mcookie, err := wd.GetCookies()
117 | var cookie_arr []string
118 | for _, c := range mcookie {
119 | cookie_arr = append(cookie_arr, c.Name+"="+c.Value)
120 | }
121 | cookie_str := strings.Join(cookie_arr, ";")
122 | return cookie_str
123 | }
124 |
125 | func saveToMgo(id_ string, password string, cookie_str string) {
126 | err := mdb.Upsert(db_name, "account", bson.M{"_id": id_}, bson.M{"$set": bson.M{"password": password, "cookie": cookie_str, "status": "success"}})
127 | if err != nil {
128 | panic(err)
129 | }
130 | if cookie_str != "" {
131 | fmt.Println("login success")
132 | } else {
133 | fmt.Println("login fail")
134 | }
135 | }
136 |
137 | func main() {
138 | file, err := os.Open(utils.ExecPath + config.Conf.GetString("ACCOUNT_FILE"))
139 | fmt.Println(utils.ExecPath + config.Conf.GetString("ACCOUNT_FILE"))
140 | if err != nil {
141 | fmt.Println("account.txt is not found")
142 | }
143 | defer file.Close()
144 | scanner := bufio.NewScanner(file)
145 | for scanner.Scan() {
146 | lineText := scanner.Text()
147 | text := strings.Split(lineText, "----")
148 | fmt.Println("start login username:", text[0])
149 | cookiestr := getCookieStr(text[0], text[1])
150 | saveToMgo(text[0], text[1], cookiestr)
151 | }
152 | }
153 |
--------------------------------------------------------------------------------
/mdb/items.go:
--------------------------------------------------------------------------------
1 | package mdb
2 |
3 | type Account struct {
4 | Id_ string `bson:"_id"`
5 | Password string `bson:"password"`
6 | Cookie string `bson:"cookie"`
7 | Status string `bson:"status"`
8 | }
9 |
10 | type Tweets struct {
11 | Id_ string `bson:"_id"`
12 | WeiboUrl string `bson:"weibo_url"`
13 | CreatedAt string `bson:"created_at"`
14 | LikeNum int32 `bson:"like_num"`
15 | RepostNum int32 `bson:"repost_num"`
16 | CommentNum int32 `bson:"comment_num"`
17 | Content string `bson:"content"`
18 | UserId string `bson:"user_id"`
19 | Tool string `bson:"tool"`
20 | ImageUrl string `bson:"image_url"`
21 | VideoUrl string `bson:"video_url"`
22 | OriginWeibo string `bson:"origin_weibo"`
23 | LocationMapInfo string `bson:"location_map_info"`
24 | CrawlTime int32 `bson:"crawl_time"`
25 | }
26 |
27 | type Information struct {
28 | Id_ string `bson:"_id"`
29 | Nickname string `bson:"nick_name"`
30 | Gender string `bson:"gender"`
31 | Province string `bson:"province"`
32 | City string `bson:"city"`
33 | BriefIntroduction string `bson:"brief_introduction"`
34 | Birthday string `bson:"birthday"`
35 | TweetsNum int32 `bson:"tweets_num"`
36 | FollowsNum int32 `bson:"follows_num"`
37 | FansNum int32 `bson:"fans_num"`
38 | SexOrientation string `bson:"sex_orientation"`
39 | Sentiment string `bson:"sentiment"`
40 | VipLevel string `bson:"vip_level"`
41 | Authentication string `bson:"authentication"`
42 | Labels string `bson:"labels"`
43 | CrawlTime int32 `bson:"crawl_time"`
44 | }
45 |
46 | type Relationships struct {
47 | Id_ string `bson:"_id"`
48 | FanId string `bson:"fan_id"`
49 | FollowedId string `bson:"followed_id"`
50 | CrawlTime int32 `bson:"crawl_time"`
51 | }
52 |
53 | type Comment struct {
54 | Id_ string `bson:"_id"`
55 | CommentUserId string `bson:"comment_user_id"`
56 | Content string `bson:"content"`
57 | WeiboUrl string `bson:"weibo_url"`
58 | CreatedAt string `bson:"created_at"`
59 | LikeNum int32 `bson:"like_num"`
60 | CrawlTime int32 `bson:"crawl_time"`
61 | }
62 |
--------------------------------------------------------------------------------
/mdb/mdb.go:
--------------------------------------------------------------------------------
1 | package mdb
2 |
3 | import (
4 | "WeiboSpiderGo/config"
5 | "gopkg.in/mgo.v2"
6 | "log"
7 | )
8 |
9 | var globalS *mgo.Session
10 |
11 | func init() {
12 | s, err := mgo.Dial(config.Conf.GetString("MONGO_ADDRESS"))
13 | if err != nil {
14 | log.Fatalf("Create Session: %s\n", err)
15 | }
16 | globalS = s
17 | }
18 |
19 | func connect(db, collection string) (*mgo.Session, *mgo.Collection) {
20 | ms := globalS.Copy()
21 | c := ms.DB(db).C(collection)
22 | ms.SetMode(mgo.Monotonic, true)
23 | return ms, c
24 | }
25 |
26 | func Insert(db, collection string, doc interface{}) error {
27 | ms, c := connect(db, collection)
28 | defer ms.Close()
29 |
30 | return c.Insert(doc)
31 | }
32 |
33 | func FindOne(db, collection string, query, selector, result interface{}) error {
34 | ms, c := connect(db, collection)
35 | defer ms.Close()
36 |
37 | return c.Find(query).Select(selector).One(result)
38 | }
39 |
40 | func FindAll(db, collection string, query, selector, result interface{}) error {
41 | ms, c := connect(db, collection)
42 | defer ms.Close()
43 |
44 | return c.Find(query).Select(selector).All(result)
45 | }
46 |
47 | func Update(db, collection string, selector, update interface{}) error {
48 | ms, c := connect(db, collection)
49 | defer ms.Close()
50 |
51 | return c.Update(selector, update)
52 | }
53 |
54 | //更新,如果不存在就插入一个新的数据 `upsert:true`
55 | func Upsert(db, collection string, selector, update interface{}) error {
56 | ms, c := connect(db, collection)
57 | defer ms.Close()
58 |
59 | _, err := c.Upsert(selector, update)
60 | return err
61 | }
62 |
63 | // `multi:true`
64 | func UpdateAll(db, collection string, selector, update interface{}) error {
65 | ms, c := connect(db, collection)
66 | defer ms.Close()
67 |
68 | _, err := c.UpdateAll(selector, update)
69 | return err
70 | }
71 |
72 | func Remove(db, collection string, selector interface{}) error {
73 | ms, c := connect(db, collection)
74 | defer ms.Close()
75 |
76 | return c.Remove(selector)
77 | }
78 |
79 | func RemoveAll(db, collection string, selector interface{}) error {
80 | ms, c := connect(db, collection)
81 | defer ms.Close()
82 |
83 | _, err := c.RemoveAll(selector)
84 | return err
85 | }
86 |
87 | func FindPage(db, collection string, page, limit int, query, selector, result interface{}) error {
88 | ms, c := connect(db, collection)
89 | defer ms.Close()
90 |
91 | return c.Find(query).Select(selector).Skip(page * limit).Limit(limit).All(result)
92 | }
93 |
94 | func IsEmpty(db, collection string) bool {
95 | ms, c := connect(db, collection)
96 | defer ms.Close()
97 | count, err := c.Count()
98 | if err != nil {
99 | log.Fatal(err)
100 | }
101 | return count == 0
102 | }
103 |
104 | func Count(db, collection string, query interface{}) (int, error) {
105 | ms, c := connect(db, collection)
106 | defer ms.Close()
107 | return c.Find(query).Count()
108 | }
109 |
--------------------------------------------------------------------------------
/scrapy_rules/base_rules.go:
--------------------------------------------------------------------------------
1 | package scrapy_rules
2 |
3 | import (
4 | "WeiboSpiderGo/config"
5 | "WeiboSpiderGo/mdb"
6 | "fmt"
7 | "github.com/gocolly/colly"
8 | "github.com/gocolly/colly/debug"
9 | "github.com/gocolly/colly/extensions"
10 | "gopkg.in/mgo.v2/bson"
11 | "math/rand"
12 | "net/http"
13 | "time"
14 | )
15 |
16 | var BaseUrl = "https://weibo.cn"
17 | var cookieStrLi []mdb.Account
18 | var dbName = config.Conf.GetString("DB_NAME")
19 |
20 | // return a collector
21 | func GetDefaultCollector() *colly.Collector {
22 | //set async and dont forget set c.wait()
23 | if config.Conf.GetBool("DEBUG_MODE") {
24 | }
25 | debugger := &debug.LogDebugger{}
26 |
27 | //file,err := os.Create(utils.ExecPath+"/debug.log")
28 | //if err!=nil{
29 | // panic(err)
30 | //}
31 | //debugger.Output = file
32 |
33 | var c = colly.NewCollector(
34 | colly.Async(true),
35 | colly.Debugger(debugger),
36 | )
37 | //disable http KeepAlives its could cause OOM in long time work
38 | c.WithTransport(&http.Transport{
39 | DisableKeepAlives: true,
40 | })
41 | mdb.FindAll(dbName, "account", bson.M{}, bson.M{}, &cookieStrLi)
42 | setDefaultCallback(c)
43 | extensions.RandomUserAgent(c)
44 | return c
45 | }
46 |
47 | // set default call,cookie and error handling
48 | func setDefaultCallback(c *colly.Collector) {
49 | // set random cookie
50 | c.OnRequest(func(r *colly.Request) {
51 | n := rand.Intn(len(cookieStrLi))
52 | r.Headers.Set("Cookie", cookieStrLi[n].Cookie)
53 | r.Ctx.Put("_id", cookieStrLi[n].Id_)
54 | })
55 |
56 | // Limit the maximum parallelism to 2
57 | // This is necessary if the goroutines are dynamically
58 | // created to control the limit of simultaneous requests.
59 | //
60 | // Parallelism can be controlled also by spawning fixed
61 | // number of go routines.
62 |
63 | // delay 3 to 5 second
64 | delay := time.Duration(config.Conf.GetInt("DELAY"))
65 | randomDelay := time.Duration(config.Conf.GetInt("RANDOM_DELAY"))
66 | if delay == 0 || randomDelay == 0 {
67 | delay, randomDelay = 8, 2
68 | }
69 | c.Limit(&colly.LimitRule{DomainGlob: "*", Parallelism: 2, Delay: delay * time.Second, RandomDelay: randomDelay * time.Second})
70 |
71 | // deal with error statusCode
72 | c.OnError(func(r *colly.Response, e error) {
73 | if r.StatusCode == 302 || r.StatusCode == 403 {
74 | mdb.Update(dbName, "account", bson.M{"_id": r.Ctx.Get("_id")}, bson.M{"$set": bson.M{"status": "error"}})
75 | } else if r.StatusCode == 418 {
76 | fmt.Println("please wait a second")
77 | }
78 | })
79 | }
80 |
--------------------------------------------------------------------------------
/scrapy_rules/fans_rules.go:
--------------------------------------------------------------------------------
1 | package scrapy_rules
2 |
3 | import (
4 | "WeiboSpiderGo/mdb"
5 | "WeiboSpiderGo/utils"
6 | "fmt"
7 | "github.com/gocolly/colly"
8 | "gopkg.in/mgo.v2"
9 | "strconv"
10 | "strings"
11 | "time"
12 | )
13 |
14 | func SetFansCallback(getFansC *colly.Collector) {
15 | getFansC.OnResponse(func(r *colly.Response) {
16 | content := string(r.Body)
17 | uid := utils.ReParse(`(\d+)/fans`, r.Request.URL.String())
18 | if strings.Contains(r.Request.URL.String(), "page=1") {
19 | allPage := utils.ReParse(`/> 1/(\d+)页`, content)
20 | pageNum, _ := strconv.Atoi(allPage)
21 | for i := 2; i < (pageNum + 1); i++ {
22 | link := fmt.Sprintf("%s/%s/fans?page=%d",BaseUrl,uid,i)
23 | getFansC.Visit(link)
24 | }
25 | }
26 | })
27 | getFansC.OnXML(`//a[text()="关注他" or text()="关注她" or text()="移除"]/@href`, func(element *colly.XMLElement) {
28 | followUrl := element.Text
29 | uid := utils.ReParse(`uid=(\d+)`, followUrl)
30 | ID := utils.ReParse(`(\d+)/fans`, element.Request.URL.String())
31 | relationship := mdb.Relationships{}
32 | relationship.CrawlTime = int32(time.Now().Unix())
33 | relationship.FanId = uid
34 | relationship.FollowedId = ID
35 | relationship.Id_ = uid + "-" + ID
36 | err := mdb.Insert(dbName, "Relationships", relationship)
37 | if mgo.IsDup(err) {
38 | //有重复数据
39 | fmt.Println("already scrapy")
40 | }
41 | })
42 | }
43 |
44 | func GetFansUrl(uid string) string{
45 | return fmt.Sprintf("%s/%s/fans?page=1",BaseUrl,uid)
46 | }
47 |
--------------------------------------------------------------------------------
/scrapy_rules/follower_rules.go:
--------------------------------------------------------------------------------
1 | package scrapy_rules
2 |
3 | import (
4 | "WeiboSpiderGo/mdb"
5 | "WeiboSpiderGo/utils"
6 | "fmt"
7 | "github.com/gocolly/colly"
8 | "gopkg.in/mgo.v2"
9 | "strconv"
10 | "strings"
11 | "time"
12 | )
13 |
14 | func SetFollowCallback(getFollowC *colly.Collector){
15 | getFollowC.OnResponse(func(r *colly.Response) {
16 | content := string(r.Body)
17 | uid := utils.ReParse(`uid=(\d+)&`, r.Request.URL.String())
18 | if strings.Contains(r.Request.URL.String(), "page=1") {
19 | allPage := utils.ReParse(`/> 1/(\d+)页`, content)
20 | pageNum, _ := strconv.Atoi(allPage)
21 | for i := 2; i < (pageNum + 1); i++ {
22 | link := fmt.Sprintf("%s/%s/follow?page=%d",BaseUrl,uid,i)
23 | getFollowC.Visit(link)
24 | }
25 | }
26 | })
27 | getFollowC.OnXML(`//a[text()="关注他" or text()="关注她" or text()="取消关注"]/@href`, func(element *colly.XMLElement) {
28 | followUrl := element.Text
29 | uid := utils.ReParse(`uid=(\d+)`, followUrl)
30 | ID := utils.ReParse(`uid=(\d+)`, element.Request.URL.String())
31 | relationship := mdb.Relationships{}
32 | relationship.CrawlTime = int32(time.Now().Unix())
33 | relationship.FanId = ID
34 | relationship.FollowedId = uid
35 | relationship.Id_ = ID + "-" + uid
36 | err := mdb.Insert(dbName, "Relationships", relationship)
37 | if mgo.IsDup(err) {
38 | //有重复数据
39 | fmt.Println("already scrapy")
40 | }
41 | })
42 | }
43 |
44 | func GetFollowUrl(uid string) string{
45 | return fmt.Sprintf("%s/%s/follow?page=1",BaseUrl,uid)
46 | }
47 |
--------------------------------------------------------------------------------
/scrapy_rules/follower_senior_rules.go:
--------------------------------------------------------------------------------
1 | package scrapy_rules
2 |
3 | import (
4 | "WeiboSpiderGo/mdb"
5 | "WeiboSpiderGo/utils"
6 | "fmt"
7 | "github.com/gocolly/colly"
8 | "gopkg.in/mgo.v2"
9 | "strconv"
10 | "strings"
11 | "time"
12 | )
13 |
14 | func SetFollowSeniorCallback(getFollowC *colly.Collector){
15 | getFollowC.OnResponse(func(r *colly.Response) {
16 | content := string(r.Body)
17 | uid := utils.ReParse(`uid=(\d+)&`, r.Request.URL.String())
18 | if strings.Contains(r.Request.URL.String(), "page=1") {
19 | allPage := utils.ReParse(`/> 1/(\d+)页`, content)
20 | pageNum, _ := strconv.Atoi(allPage)
21 | for i := 2; i < (pageNum + 1); i++ {
22 | link := fmt.Sprintf("%s/attgroup/change?cat=user&uid=%s&page=%d",BaseUrl,uid,i)
23 | getFollowC.Visit(link)
24 | }
25 | }
26 | })
27 | getFollowC.OnXML(`//a[text()="关注他" or text()="关注她" or text()="取消关注"]/@href`, func(element *colly.XMLElement) {
28 | followUrl := element.Text
29 | uid := utils.ReParse(`uid=(\d+)`, followUrl)
30 | ID := utils.ReParse(`uid=(\d+)`, element.Request.URL.String())
31 | relationship := mdb.Relationships{}
32 | relationship.CrawlTime = int32(time.Now().Unix())
33 | relationship.FanId = ID
34 | relationship.FollowedId = uid
35 | relationship.Id_ = ID + "-" + uid
36 | err := mdb.Insert(dbName, "Relationships", relationship)
37 | if mgo.IsDup(err) {
38 | //有重复数据
39 | fmt.Println("already scrapy")
40 | }
41 | })
42 | }
43 |
44 | func GetFollowerSeniorUrl(uid string)string{
45 | return fmt.Sprintf("%s/attgroup/change?cat=user&uid=%s&page=1",BaseUrl,uid)
46 | }
47 |
--------------------------------------------------------------------------------
/scrapy_rules/information_rules.go:
--------------------------------------------------------------------------------
1 | package scrapy_rules
2 |
3 | import (
4 | "WeiboSpiderGo/mdb"
5 | "WeiboSpiderGo/utils"
6 | "fmt"
7 | "github.com/gocolly/colly"
8 | "gopkg.in/mgo.v2"
9 | "strconv"
10 | "strings"
11 | "time"
12 | )
13 |
14 | func SetInfoCallback(getInfoC, getMoreInfoC *colly.Collector) {
15 | getInfoC.OnResponse(func(r *colly.Response) {
16 | content := string(r.Body)
17 | info := mdb.Information{}
18 | info.CrawlTime = int32(time.Now().Unix())
19 | info.Id_ = utils.ReParse(`(\d+)/info`, r.Request.URL.String())
20 | nickName := utils.ReParse(`昵称?[::]?(.*?)
`, content)
21 | authentication := utils.ReParse(`认证?[::]?(.*?)
`, content)
22 | gender := utils.ReParse(`性别?[::]?(.*?)
`, content)
23 | place := utils.ReParse(`地区?[::]?(.*?)
`, content)
24 | briefIntroduction := utils.ReParse(`简介?[::]?(.*?)
`, content)
25 | birthday := utils.ReParse(`生日?[::]?(.*?)
`, content)
26 | sexOrientation := utils.ReParse(`性取向?[::]?(.*?)
`, content)
27 | sentiment := utils.ReParse(`感情状况?[::]?(.*?)
`, content)
28 | vipLevel := utils.ReParse(`会员等级?[::]?(.*?) (.*?)`, content) //标签
30 | info.Nickname = nickName
31 | info.Gender = gender
32 | placeli := strings.Split(place, " ")
33 | info.Province = placeli[0]
34 | if len(placeli) > 1 {
35 | info.City = placeli[1]
36 | }
37 | info.BriefIntroduction = briefIntroduction
38 | info.Birthday = birthday
39 | if sexOrientation == gender {
40 | info.SexOrientation = "同性恋"
41 | } else {
42 | info.SexOrientation = "异性恋"
43 | }
44 | info.Sentiment = sentiment
45 | info.VipLevel = vipLevel
46 | info.Authentication = authentication
47 | info.Labels = ""
48 | for i, labelItem := range labels {
49 | if i != 0 {
50 | info.Labels += ","
51 | }
52 | info.Labels += labelItem[1]
53 | }
54 | r.Ctx.Put("info", info)
55 | getMoreInfoC.Request("GET","https://weibo.cn/u/" + info.Id_,nil,r.Ctx,nil)
56 | })
57 | }
58 |
59 | func SetMoreInfoCallback(getMoreInfoC *colly.Collector){
60 | getMoreInfoC.OnResponse(func(r *colly.Response) {
61 | content := string(r.Body)
62 | info := r.Ctx.GetAny("info").(mdb.Information)
63 | tweetsNum := utils.ReParse(`微博\[(\d+)\]`, content)
64 | followsNum := utils.ReParse(`关注\[(\d+)\]`, content)
65 | fansNum := utils.ReParse(`粉丝\[(\d+)\]`, content)
66 | if tweetsNum != "" {
67 | temp, _ := strconv.Atoi(tweetsNum)
68 | info.TweetsNum = int32(temp)
69 | }
70 | if followsNum != "" {
71 | temp, _ := strconv.Atoi(followsNum)
72 | info.FollowsNum = int32(temp)
73 | }
74 | if fansNum != "" {
75 | temp, _ := strconv.Atoi(fansNum)
76 | info.FansNum = int32(temp)
77 | }
78 | err := mdb.Insert(dbName, "Information", info)
79 | if mgo.IsDup(err) {
80 | //有重复数据
81 | fmt.Println("already scrapy")
82 | }
83 | })
84 | }
85 |
--------------------------------------------------------------------------------
/scrapy_rules/tweet_rules.go:
--------------------------------------------------------------------------------
1 | package scrapy_rules
2 |
3 | import (
4 | "WeiboSpiderGo/config"
5 | "WeiboSpiderGo/mdb"
6 | "WeiboSpiderGo/utils"
7 | "fmt"
8 | "github.com/gocolly/colly"
9 | "gopkg.in/mgo.v2"
10 | "strconv"
11 | "strings"
12 | "time"
13 | )
14 |
15 | func SetTweetCallback(getTweetsC, getContentSubC, getCommentSubC *colly.Collector) {
16 | getTweetsC.OnResponse(func(r *colly.Response) {
17 | content := string(r.Body)
18 | uid := utils.ReParse(`(\d+)/profile`, r.Request.URL.String())
19 | if strings.Contains(r.Request.URL.String(), "page=1") {
20 | allPage := utils.ReParse(`/> 1/(\d+)页`, content)
21 | pageNum, _ := strconv.Atoi(allPage)
22 | for i := 2; i < (pageNum + 1); i++ {
23 | link := fmt.Sprintf("%s/%s/profile?page=%d", BaseUrl, uid, i)
24 | getTweetsC.Visit(link)
25 | }
26 | }
27 | })
28 | getTweetsC.OnXML(`//div[@class="c" and @id]`, func(element *colly.XMLElement) {
29 | tweet := mdb.Tweets{}
30 | tweet.CrawlTime = int32(time.Now().Unix())
31 | tweetRepostUrl := element.ChildAttr(`.//a[contains(text(),"转发[")]`, "href")
32 | tweetItemId := utils.ReParse(`/repost/(.*?)\?`, tweetRepostUrl)
33 | tweet.UserId = utils.ReParse(`uid=(\d+)`, tweetRepostUrl)
34 | tweet.WeiboUrl = fmt.Sprintf("https://weibo.com/%s/%s", tweet.UserId, tweetItemId)
35 | tweet.Id_ = fmt.Sprintf("%s_%s", tweet.UserId, tweetItemId)
36 | createTimeInfo := element.ChildText(`.//span[@class="ct"]`)
37 | if strings.Contains(createTimeInfo, "来自") {
38 | timeStr := strings.Split(createTimeInfo, "来自")[0]
39 | timeStr = strings.TrimSpace(timeStr)
40 | tweet.CreatedAt = utils.ConvTime(timeStr)
41 | tweet.Tool = strings.Split(createTimeInfo, "来自")[1]
42 | } else {
43 | timeStr := strings.TrimSpace(createTimeInfo)
44 | tweet.CreatedAt = utils.ConvTime(timeStr)
45 | }
46 |
47 | likeNumText := element.ChildText(`.//a[contains(text(),"赞[")]`)
48 | likeNum, _ := strconv.Atoi(utils.ReParse(`\d+`, likeNumText))
49 | tweet.LikeNum = int32(likeNum)
50 |
51 | repostNumText := element.ChildText(`.//a[contains(text(),"转发[")]`)
52 | repostNum, _ := strconv.Atoi(utils.ReParse(`\d+`, repostNumText))
53 | tweet.RepostNum = int32(repostNum)
54 |
55 | commentNumText := element.ChildText(`.//a[contains(text(),"评论[") and not(contains(text(),"原文"))]`)
56 | commentNum, _ := strconv.Atoi(utils.ReParse(`\d+`, commentNumText))
57 | tweet.CommentNum = int32(commentNum)
58 |
59 | tweet.ImageUrl = element.ChildAttr(`.//img[@alt="图片"]`, "src")
60 | tweet.VideoUrl = element.ChildAttr(`.//a[contains(@href,"https://m.weibo.cn/s/video/show?object_id=")]`, "href")
61 |
62 | mapNode := element.ChildAttr(`.//a[contains(text(),"显示地图")]`, "href")
63 | if mapNode != "" {
64 | tweet.LocationMapInfo = utils.ReParse(`xy=(.*?)&`, mapNode)
65 | }
66 |
67 | tweet.OriginWeibo = element.ChildAttr(`.//a[contains(text(),"原文评论[")]`, "href")
68 |
69 | allContentLink := element.ChildAttr(`.//a[text()="全文" and contains(@href,"ckAll=1")]`, "href")
70 | if allContentLink == "" {
71 | //没有全文按钮
72 | content := element.Text
73 | if pos := strings.LastIndex(content, "转发理由:"); pos != -1 {
74 | content = content[pos+len("转发理由:"):]
75 | }
76 | content = content[0:strings.LastIndex(content, "赞")]
77 | if pos := strings.LastIndex(content, "[组图共"); pos != -1 {
78 | content = content[0:pos]
79 | }
80 | if pos := strings.LastIndex(content, "原图"); pos != -1 {
81 | l := len(content)
82 | if l >= pos+6 {
83 | content = content[0:pos]
84 | }
85 | }
86 | tweet.Content = strings.TrimSpace(content)
87 | err := mdb.Insert(dbName, "Tweets", tweet)
88 | if mgo.IsDup(err) {
89 | //有重复数据
90 | fmt.Println("already scrapy")
91 | }
92 | } else {
93 | element.Response.Ctx.Put("tweet", tweet)
94 | contentSubLink := fmt.Sprintf("%s%s", BaseUrl, allContentLink)
95 | getContentSubC.Request("GET", contentSubLink, nil, element.Response.Ctx, nil)
96 | }
97 |
98 | commentLink := fmt.Sprintf("%s/comment/%s?page=1", BaseUrl, strings.Split(tweet.Id_, "_")[1])
99 | element.Response.Ctx.Put("weibo_url", tweet.WeiboUrl)
100 | if config.Conf.GetBool("SCRAPY_TYPE.Tweet.Comment") {
101 | getCommentSubC.Request("GET", commentLink, nil, element.Response.Ctx, nil)
102 | }
103 | })
104 | }
105 |
106 | func SetFullContentCallback(getContentSubC *colly.Collector) {
107 | getContentSubC.OnXML(`//*[@id="M_"]/div[1]`, func(element *colly.XMLElement) {
108 | //var tweet mdb.Tweets
109 | tweetInt := element.Response.Ctx.GetAny("tweet")
110 | tweet := tweetInt.(mdb.Tweets)
111 | content := element.Text
112 | if pos := strings.LastIndex(content, "转发理由:"); pos != -1 {
113 | content = content[pos+len("转发理由:"):]
114 | }
115 | if pos := strings.LastIndex(content, "[组图共"); pos != -1 {
116 | content = content[0:pos]
117 | }
118 | if pos := strings.LastIndex(content, "原图"); pos != -1 {
119 | l := len(content)
120 | if l >= pos+6 {
121 | content = content[0:pos]
122 | }
123 | }
124 | tweet.Content = strings.TrimSpace(content)
125 | err := mdb.Insert(dbName, "Tweets", tweet)
126 | if mgo.IsDup(err) {
127 | //有重复数据
128 | fmt.Println("already scrapy")
129 | }
130 | })
131 | }
132 |
133 | func SetCommentCallback(getCommentSubC *colly.Collector) {
134 | getCommentSubC.OnResponse(func(r *colly.Response) {
135 | content := string(r.Body)
136 | if strings.Contains(r.Request.URL.String(), "page=1") {
137 | allPage := utils.ReParse(`/> 1/(\d+)页`, content)
138 | pageNum, _ := strconv.Atoi(allPage)
139 | for i := 2; i < (pageNum + 1); i++ {
140 | pageUrl := strings.Replace(r.Request.URL.String(), "page=1", "page="+strconv.Itoa(i), -1)
141 | getCommentSubC.Visit(pageUrl)
142 | }
143 | }
144 | })
145 | getCommentSubC.OnXML(`//div[@class="c" and contains(@id,"C_")]`, func(element *colly.XMLElement) {
146 | commentUserUrl := element.ChildAttr(`.//a[contains(@href,"/u/")]`, "href")
147 | if commentUserUrl == "" {
148 | return
149 | }
150 | comment := mdb.Comment{}
151 | comment.CrawlTime = int32(time.Now().Unix())
152 | comment.WeiboUrl = element.Response.Ctx.Get("weibo_url")
153 | comment.CommentUserId = utils.ReParse(`/u/(\d+)`, commentUserUrl)
154 | comment.Id_ = element.Attr("id")
155 | createdAtInfo := element.ChildText(`.//span[@class="ct"]`)
156 | likeNumText := element.ChildText(`.//a[contains(text(),"赞[")]`)
157 | likeNum, _ := strconv.Atoi(utils.ReParse(`\d+`, likeNumText))
158 | comment.LikeNum = int32(likeNum)
159 | comment.CreatedAt = utils.ConvTime(strings.Split(createdAtInfo, "\u0000")[0])
160 | content := element.Text
161 | content = content[0:strings.LastIndex(content, "赞")]
162 | if pos := strings.LastIndex(content, "举报"); pos != -1 {
163 | content = content[0:pos]
164 | }
165 | comment.Content = strings.TrimSpace(content)
166 | err := mdb.Insert(dbName, "Comments", comment)
167 | if mgo.IsDup(err) {
168 | //有重复数据
169 | fmt.Println("already scrapy")
170 | }
171 | })
172 | }
173 |
174 | func GetTweetUrl(uid string) string {
175 | return fmt.Sprintf("%s/%s/profile?page=1", BaseUrl, uid)
176 | }
177 |
--------------------------------------------------------------------------------
/utils/utils.go:
--------------------------------------------------------------------------------
1 | package utils
2 |
3 | import (
4 | "bufio"
5 | "os"
6 | "path/filepath"
7 | "regexp"
8 | "strconv"
9 | "strings"
10 | "time"
11 | )
12 |
13 | var ExecPath = getExecutePath()
14 |
15 | func ReParse(pattern string, content string) string {
16 | str := regexp.MustCompile(pattern).FindAllStringSubmatch(content, -1)
17 | if str != nil {
18 | if len(str[0]) == 1 {
19 | return str[0][0]
20 | }
21 | return str[0][1]
22 | }
23 | return ""
24 | }
25 |
26 | func ReParseMayLi(pattern string, content string) [][]string {
27 | str := regexp.MustCompile(pattern).FindAllStringSubmatch(content, -1)
28 | return str
29 | }
30 |
31 | func ConvTime(timeStr string) string {
32 | now_time := time.Now()
33 | if strings.Contains(timeStr, "分钟前") {
34 | min, _ := strconv.Atoi(ReParse(`^(\d+)分钟`, timeStr))
35 | createdTimep := now_time.Add(-time.Duration(min) * time.Minute)
36 | return createdTimep.Format("2006-01-02 15:04")
37 | }
38 | if strings.Contains(timeStr, "小时前") {
39 | hour, _ := strconv.Atoi(ReParse(`^(\d+)小时`, timeStr))
40 | createdTimep := now_time.Add(-time.Duration(hour) * time.Hour)
41 | return createdTimep.Format("2006-01-02 15:04")
42 | }
43 | if strings.Contains(timeStr, "今天") {
44 | return strings.Replace(timeStr, "今天", now_time.Format("2006-01-02"), -1)
45 | }
46 | if strings.Contains(timeStr, "月") {
47 | rp := strings.NewReplacer("月", "-", "日", "")
48 | return rp.Replace(timeStr)
49 | }
50 | return timeStr
51 | }
52 |
53 | func GetTargetUidList() []string {
54 | var uidLi []string
55 | file, err := os.Open(ExecPath + "/account/target.txt")
56 | defer file.Close()
57 | if err != nil {
58 | panic(err)
59 | }
60 | scanner := bufio.NewScanner(file)
61 | for scanner.Scan() {
62 | lineText := scanner.Text()
63 | lineText = strings.TrimSpace(lineText)
64 | lineText = strings.Replace(lineText, "\uFEFF", "", -1)
65 | uidLi = append(uidLi, lineText)
66 | }
67 | return uidLi
68 | }
69 |
70 | func getExecutePath() string {
71 | return filepath.Dir(os.Args[0])
72 | }
73 |
--------------------------------------------------------------------------------
/weibo_spider.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "WeiboSpiderGo/config"
5 | "WeiboSpiderGo/scrapy_rules"
6 | "WeiboSpiderGo/utils"
7 | "fmt"
8 | )
9 |
10 | var uidLi = utils.GetTargetUidList()
11 |
12 | func scrapyInfomation() {
13 | getInfoC := scrapy_rules.GetDefaultCollector()
14 | getMoreInfoC := scrapy_rules.GetDefaultCollector()
15 | scrapy_rules.SetMoreInfoCallback(getMoreInfoC)
16 |
17 | scrapy_rules.SetInfoCallback(getInfoC, getMoreInfoC)
18 |
19 | for _, uid := range uidLi {
20 | url := fmt.Sprintf("%s/%s/info", scrapy_rules.BaseUrl, uid)
21 | getInfoC.Visit(url)
22 | }
23 | getInfoC.Wait()
24 | getMoreInfoC.Wait()
25 | }
26 |
27 | func scrapyTweet() {
28 | getTweetsC := scrapy_rules.GetDefaultCollector()
29 | getContentSubC := scrapy_rules.GetDefaultCollector()
30 | scrapy_rules.SetFullContentCallback(getContentSubC)
31 | getCommentSubC := scrapy_rules.GetDefaultCollector()
32 | scrapy_rules.SetCommentCallback(getCommentSubC)
33 |
34 | scrapy_rules.SetTweetCallback(getTweetsC, getContentSubC, getCommentSubC)
35 |
36 | for _, uid := range uidLi {
37 | url := scrapy_rules.GetTweetUrl(uid)
38 | getTweetsC.Visit(url)
39 | }
40 | getTweetsC.Wait()
41 | getContentSubC.Wait()
42 | getCommentSubC.Wait()
43 | }
44 |
45 | func scrapyFollow() {
46 | getFollowC := scrapy_rules.GetDefaultCollector()
47 | scrapy_rules.SetFollowCallback(getFollowC)
48 | //read files
49 | for _, uid := range uidLi {
50 | url := scrapy_rules.GetFollowUrl(uid)
51 | getFollowC.Visit(url)
52 | }
53 | getFollowC.Wait()
54 | }
55 |
56 | func scrapyFans() {
57 | getFansC := scrapy_rules.GetDefaultCollector()
58 | scrapy_rules.SetFansCallback(getFansC)
59 |
60 | for _, uid := range uidLi {
61 | url := scrapy_rules.GetFansUrl(uid)
62 | getFansC.Visit(url)
63 | }
64 | getFansC.Wait()
65 | }
66 |
67 | func main() {
68 | if config.Conf.GetBool("SCRAPY_TYPE.Info") {
69 | scrapyInfomation()
70 | }
71 | if config.Conf.GetBool("SCRAPY_TYPE.Follow") {
72 | scrapyFollow()
73 | }
74 | //修复去重问题
75 | if config.Conf.GetBool("SCRAPY_TYPE.Fans") {
76 | scrapyFans()
77 | }
78 | if config.Conf.GetBool("SCRAPY_TYPE.Tweet.Main") {
79 | scrapyTweet()
80 | }
81 | }
82 |
--------------------------------------------------------------------------------