├── README.md ├── account ├── account.txt └── target.txt ├── browser └── readme.md ├── config └── config_utils.go ├── example-config.yaml ├── go.mod ├── go.sum ├── login.go ├── mdb ├── items.go └── mdb.go ├── scrapy_rules ├── base_rules.go ├── fans_rules.go ├── follower_rules.go ├── follower_senior_rules.go ├── information_rules.go └── tweet_rules.go ├── utils └── utils.go └── weibo_spider.go /README.md: -------------------------------------------------------------------------------- 1 | # WeiboSpiderGo 2 | 3 | It's a sina weibo (chinese twitter) spider written by golang golly 4 | 5 | 可以双击运行的微博爬虫 6 | 7 | #### 使用准备 8 | 9 | - chrome driver安装 10 | 11 | 本地需要安装chrome浏览器、以及下载对应版本的chromedriver。例如,本机上安装了版本号为78的chrome则需要在https://chromedriver.chromium.org/downloads 链接中下载版本号为78的,对应平台的chromedriver.zip 12 | 13 | 将解压后的chromedriver文件放于项目的browser目录下 14 | 15 | - mongodb安装 16 | 17 | 安装mongodb,将地址和端口、数据库名填写到config.yaml中 18 | 19 | #### 使用 20 | 21 | 完成上一步的使用准备工作后,可以下载release中的代码,在上方栏的release页中,分别有mac版与windows exe版本的压缩包,可以直接下载运行 22 | 23 | 填写需要登陆的账号于account/account.txt中,每行一个账号,源码的account文件夹可以看到示例,双击login开始批量登陆(账号要选择免验证码的邮箱登陆的账号) 24 | 25 | 在account/target.txt文件中,同样每行一个,写下要爬取的用户id,双击weibo_spider或weibo_spider.exe开始爬取 26 | 27 | 希望大家不要涸泽而渔,release中的版本爬取间隔为10秒左右 28 | 29 | #### 配置文件内容 30 | 31 | 配置文件为根目录下的config.yaml 32 | 33 | 配置文件含义 34 | 35 | ``` 36 | MONGO_ADDRESS - mongodb地址 37 | DB_NAME - mongodb数据库名 38 | ACCOUNT_FILE - 存放要爬取的目标账户id的文件 39 | DRIVER_PATH: "/browser/chromedriver" 40 | # 爬取类型 41 | SCRAPY_TYPE: 42 | Info: True 43 | Follow: False 44 | Fans: False 45 | Tweet: 46 | Main: True 47 | Comment: False 48 | ``` 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | #### 编译 57 | 58 | 安装依赖后使用 59 | 60 | ``` 61 | go build login.go 62 | go build weibo_spider.go 63 | ``` 64 | 65 | 进行编译 66 | 67 | #### 下一步 68 | 69 | - [ ] 代理ip添加 70 | - [ ] 图片视频下载 71 | 72 | -------------------------------------------------------------------------------- /account/account.txt: -------------------------------------------------------------------------------- 1 | example@163.com----12333444 2 | example@qq.com----122223333 3 | example@126.com----12223113 4 | -------------------------------------------------------------------------------- /account/target.txt: -------------------------------------------------------------------------------- 1 | 3217179555 2 | 1742566624 3 | -------------------------------------------------------------------------------- /browser/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eureka101v/WeiboSpiderGo/3f3dcbb0447adc28611ed04aec93a9e736109f7c/browser/readme.md -------------------------------------------------------------------------------- /config/config_utils.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "WeiboSpiderGo/utils" 5 | "github.com/spf13/viper" 6 | ) 7 | 8 | var Conf *viper.Viper 9 | 10 | func init() { 11 | Conf = viper.New() 12 | 13 | Conf.SetConfigName("config") 14 | 15 | Conf.AddConfigPath(utils.ExecPath) 16 | 17 | Conf.SetConfigType("yaml") 18 | if err := Conf.ReadInConfig(); err != nil { 19 | panic(err) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /example-config.yaml: -------------------------------------------------------------------------------- 1 | MONGO_ADDRESS: "127.0.0.1:27017" 2 | DB_NAME: "Sina2" 3 | ACCOUNT_FILE: "/account/account.txt" 4 | DRIVER_PATH: "/browser/chromedriver" 5 | # 爬取类型 6 | SCRAPY_TYPE: 7 | Info: True 8 | Follow: False 9 | Fans: False 10 | Tweet: 11 | Main: True 12 | Comment: False 13 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module WeiboSpiderGo 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/PuerkitoBio/goquery v1.5.0 // indirect 7 | github.com/antchfx/htmlquery v1.1.0 // indirect 8 | github.com/antchfx/xmlquery v1.1.0 // indirect 9 | github.com/antchfx/xpath v1.1.0 10 | github.com/gobwas/glob v0.2.3 // indirect 11 | github.com/gocolly/colly v1.2.0 12 | github.com/kennygrant/sanitize v1.2.4 // indirect 13 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect 14 | github.com/spf13/viper v1.5.0 15 | github.com/tebeka/selenium v0.9.9 16 | github.com/temoto/robotstxt v1.1.1 // indirect 17 | golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9 // indirect 18 | gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 19 | ) 20 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 3 | cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= 4 | cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg= 5 | github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= 6 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 7 | github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc= 8 | github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= 9 | github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnROylmoQL57iVUL57wGKTR5O6KpVnbm2tA= 10 | github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k= 11 | github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= 12 | github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= 13 | github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= 14 | github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= 15 | github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= 16 | github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= 17 | github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= 18 | github.com/antchfx/htmlquery v1.1.0 h1:KMS88sLl5KP9GUVU2MQIDcQXNQ0M5MGlkC9WlYgAQqY= 19 | github.com/antchfx/htmlquery v1.1.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8= 20 | github.com/antchfx/xmlquery v1.1.0 h1:vj0kZ1y3Q6my4AV+a9xbWrMYzubw+84zuiKgvfV8vb8= 21 | github.com/antchfx/xmlquery v1.1.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk= 22 | github.com/antchfx/xpath v1.1.0 h1:mJTvYpiHvxNQRD4Lbfin/FodHVCHh2a5KrOFr4ZxMOI= 23 | github.com/antchfx/xpath v1.1.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= 24 | github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= 25 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= 26 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= 27 | github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= 28 | github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= 29 | github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ= 30 | github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= 31 | github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= 32 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 33 | github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= 34 | github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= 35 | github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= 36 | github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= 37 | github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= 38 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 39 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 40 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 41 | github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= 42 | github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= 43 | github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I= 44 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 45 | github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= 46 | github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= 47 | github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= 48 | github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= 49 | github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= 50 | github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= 51 | github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= 52 | github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= 53 | github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= 54 | github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= 55 | github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= 56 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= 57 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 58 | github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 59 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 60 | github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 61 | github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= 62 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 63 | github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= 64 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 65 | github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 66 | github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 67 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= 68 | github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= 69 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 70 | github.com/google/go-github/v27 v27.0.4/go.mod h1:/0Gr8pJ55COkmv+S/yPKCczSkUPIM/LnFyubufRNIS0= 71 | github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= 72 | github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= 73 | github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= 74 | github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= 75 | github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= 76 | github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= 77 | github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= 78 | github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= 79 | github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= 80 | github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= 81 | github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= 82 | github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= 83 | github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= 84 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= 85 | github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= 86 | github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= 87 | github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= 88 | github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= 89 | github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= 90 | github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= 91 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 92 | github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= 93 | github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= 94 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= 95 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 96 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 97 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 98 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 99 | github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4= 100 | github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= 101 | github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= 102 | github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE= 103 | github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= 104 | github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= 105 | github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= 106 | github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= 107 | github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= 108 | github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 109 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 110 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 111 | github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= 112 | github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= 113 | github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= 114 | github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= 115 | github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= 116 | github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= 117 | github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= 118 | github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= 119 | github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= 120 | github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= 121 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= 122 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= 123 | github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= 124 | github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= 125 | github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 126 | github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI= 127 | github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= 128 | github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8= 129 | github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= 130 | github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9Gc1vn7yk= 131 | github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= 132 | github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= 133 | github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= 134 | github.com/spf13/viper v1.5.0 h1:GpsTwfsQ27oS/Aha/6d1oD7tpKIqWnOA6tgOX9HHkt4= 135 | github.com/spf13/viper v1.5.0/go.mod h1:AkYRkVJF8TkSG/xet6PzXX+l39KhhXa2pdqVSxnTcn4= 136 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 137 | github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 138 | github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= 139 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 140 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= 141 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 142 | github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s= 143 | github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= 144 | github.com/tebeka/selenium v0.9.9 h1:cNziB+etNgyH/7KlNI7RMC1ua5aH1+5wUlFQyzeMh+w= 145 | github.com/tebeka/selenium v0.9.9/go.mod h1:5Fr8+pUvU6B1OiPfkdCKdXZyr5znvVkxuPd0NOdZCQc= 146 | github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA= 147 | github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= 148 | github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= 149 | github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= 150 | github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= 151 | github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= 152 | go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= 153 | go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= 154 | go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= 155 | go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= 156 | go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= 157 | go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= 158 | golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 159 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 160 | golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 161 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 162 | golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= 163 | golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= 164 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 165 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= 166 | golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 167 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 168 | golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 169 | golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= 170 | golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 171 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 172 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 173 | golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 174 | golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 175 | golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 176 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 177 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 178 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 179 | golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 180 | golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 181 | golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 182 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 183 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 184 | golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9 h1:DPz9iiH3YoKiKhX/ijjoZvT0VFwK2c6CWYWQ7Zyr8TU= 185 | golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 186 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 187 | golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 188 | golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 189 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 190 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 191 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 192 | golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 193 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 194 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 195 | golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 196 | golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 197 | golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 198 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 199 | golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 200 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 201 | golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 202 | golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 203 | golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 204 | golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0 h1:HyfiK1WMnHj5FXFXatD+Qs1A/xC2Run6RzeW1SyHxpc= 205 | golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 206 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 207 | golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 208 | golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= 209 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 210 | golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 211 | golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 212 | golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 213 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 214 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 215 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= 216 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 217 | golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 218 | golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 219 | golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 220 | golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 221 | golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 222 | golang.org/x/tools v0.0.0-20190624190245-7f2218787638/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 223 | google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= 224 | google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= 225 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 226 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 227 | google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 228 | google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I= 229 | google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= 230 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= 231 | google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 232 | google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 233 | google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 234 | google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 235 | google.golang.org/genproto v0.0.0-20190626174449-989357319d63/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= 236 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= 237 | google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= 238 | google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= 239 | google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= 240 | gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= 241 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 242 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 243 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= 244 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 245 | gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 h1:VpOs+IwYnYBaFnrNAeB8UUWtL3vEUnzSCL1nVjPhqrw= 246 | gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= 247 | gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= 248 | gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= 249 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 250 | gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= 251 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 252 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 253 | honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 254 | honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 255 | rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= 256 | -------------------------------------------------------------------------------- /login.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "WeiboSpiderGo/config" 5 | "WeiboSpiderGo/mdb" 6 | "WeiboSpiderGo/utils" 7 | "bufio" 8 | "fmt" 9 | "github.com/tebeka/selenium" 10 | "github.com/tebeka/selenium/chrome" 11 | "gopkg.in/mgo.v2/bson" 12 | "os" 13 | "strings" 14 | ) 15 | 16 | var db_name = config.Conf.GetString("DB_NAME") 17 | 18 | func Displayed(by, elementName string) func(selenium.WebDriver) (bool, error) { 19 | return func(wd selenium.WebDriver) (bool, error) { 20 | el, err := wd.FindElement(by, elementName) 21 | if err != nil { 22 | return false, nil 23 | } 24 | enabled, err := el.IsDisplayed() 25 | if err != nil { 26 | return false, nil 27 | } 28 | 29 | if !enabled { 30 | return false, nil 31 | } 32 | 33 | return true, nil 34 | } 35 | } 36 | 37 | func getCookieStr(username_text string, password_text string) string { 38 | // Start a Selenium WebDriver server instance (if one is not already 39 | // running). 40 | //username_text := "1222" 41 | //password_text := "23121" 42 | var ( 43 | // These paths will be different on your system. 44 | driverPath = utils.ExecPath + config.Conf.GetString("DRIVER_PATH") 45 | port = 9005 46 | ) 47 | opts := []selenium.ServiceOption{} 48 | 49 | service, err := selenium.NewChromeDriverService(driverPath, port, opts...) 50 | if nil != err { 51 | fmt.Println("start a chromedriver service falid", err.Error()) 52 | return "" 53 | } 54 | //注意这里,server关闭之后,chrome窗口也会关闭 55 | defer service.Stop() 56 | 57 | // Connect to the WebDriver instance running locally. 58 | caps := selenium.Capabilities{"browserName": "chrome"} 59 | //禁止图片加载,加快渲染速度 60 | imagCaps := map[string]interface{}{ 61 | "profile.managed_default_content_settings.images": 2, 62 | } 63 | chromeCaps := chrome.Capabilities{ 64 | Prefs: imagCaps, 65 | Path: "", 66 | Args: []string{ 67 | "--headless", // 设置Chrome无头模式,在linux下运行,需要设置这个参数,否则会报错 68 | //"--no-sandbox", 69 | "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", // 模拟user-agent,防反爬 70 | }, 71 | } 72 | //以上是设置浏览器参数 73 | caps.AddChrome(chromeCaps) 74 | wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port)) 75 | if err != nil { 76 | fmt.Println("connect to the webDriver faild", err.Error()) 77 | return "" 78 | } 79 | defer wd.Quit() 80 | err = wd.Get("https://passport.weibo.cn/signin/login?entry=mweibo&r=https://weibo.cn/") 81 | if err != nil { 82 | fmt.Println("get page faild", err.Error()) 83 | return "" 84 | } 85 | wd.Wait(Displayed(selenium.ByCSSSelector, "#loginName")) 86 | wd.Wait(Displayed(selenium.ByCSSSelector, "#loginPassword")) 87 | wd.Wait(Displayed(selenium.ByCSSSelector, "#loginAction")) 88 | username, err := wd.FindElement(selenium.ByCSSSelector, "#loginName") 89 | if err != nil { 90 | fmt.Println("get username faild", err.Error()) 91 | return "" 92 | } 93 | password, err := wd.FindElement(selenium.ByCSSSelector, "#loginPassword") 94 | if err != nil { 95 | fmt.Println("get username faild", err.Error()) 96 | return "" 97 | } 98 | submit, err := wd.FindElement(selenium.ByCSSSelector, "#loginAction") 99 | if err != nil { 100 | fmt.Println("get username faild", err.Error()) 101 | return "" 102 | } 103 | username.SendKeys(username_text) 104 | password.SendKeys(password_text) 105 | submit.Click() 106 | wd.Wait(func(wdtemp selenium.WebDriver) (b bool, e error) { 107 | tit, err := wdtemp.Title() 108 | if err != nil { 109 | return false, nil 110 | } 111 | if tit != "我的首页" { 112 | return false, nil 113 | } 114 | return true, nil 115 | }) 116 | mcookie, err := wd.GetCookies() 117 | var cookie_arr []string 118 | for _, c := range mcookie { 119 | cookie_arr = append(cookie_arr, c.Name+"="+c.Value) 120 | } 121 | cookie_str := strings.Join(cookie_arr, ";") 122 | return cookie_str 123 | } 124 | 125 | func saveToMgo(id_ string, password string, cookie_str string) { 126 | err := mdb.Upsert(db_name, "account", bson.M{"_id": id_}, bson.M{"$set": bson.M{"password": password, "cookie": cookie_str, "status": "success"}}) 127 | if err != nil { 128 | panic(err) 129 | } 130 | if cookie_str != "" { 131 | fmt.Println("login success") 132 | } else { 133 | fmt.Println("login fail") 134 | } 135 | } 136 | 137 | func main() { 138 | file, err := os.Open(utils.ExecPath + config.Conf.GetString("ACCOUNT_FILE")) 139 | fmt.Println(utils.ExecPath + config.Conf.GetString("ACCOUNT_FILE")) 140 | if err != nil { 141 | fmt.Println("account.txt is not found") 142 | } 143 | defer file.Close() 144 | scanner := bufio.NewScanner(file) 145 | for scanner.Scan() { 146 | lineText := scanner.Text() 147 | text := strings.Split(lineText, "----") 148 | fmt.Println("start login username:", text[0]) 149 | cookiestr := getCookieStr(text[0], text[1]) 150 | saveToMgo(text[0], text[1], cookiestr) 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /mdb/items.go: -------------------------------------------------------------------------------- 1 | package mdb 2 | 3 | type Account struct { 4 | Id_ string `bson:"_id"` 5 | Password string `bson:"password"` 6 | Cookie string `bson:"cookie"` 7 | Status string `bson:"status"` 8 | } 9 | 10 | type Tweets struct { 11 | Id_ string `bson:"_id"` 12 | WeiboUrl string `bson:"weibo_url"` 13 | CreatedAt string `bson:"created_at"` 14 | LikeNum int32 `bson:"like_num"` 15 | RepostNum int32 `bson:"repost_num"` 16 | CommentNum int32 `bson:"comment_num"` 17 | Content string `bson:"content"` 18 | UserId string `bson:"user_id"` 19 | Tool string `bson:"tool"` 20 | ImageUrl string `bson:"image_url"` 21 | VideoUrl string `bson:"video_url"` 22 | OriginWeibo string `bson:"origin_weibo"` 23 | LocationMapInfo string `bson:"location_map_info"` 24 | CrawlTime int32 `bson:"crawl_time"` 25 | } 26 | 27 | type Information struct { 28 | Id_ string `bson:"_id"` 29 | Nickname string `bson:"nick_name"` 30 | Gender string `bson:"gender"` 31 | Province string `bson:"province"` 32 | City string `bson:"city"` 33 | BriefIntroduction string `bson:"brief_introduction"` 34 | Birthday string `bson:"birthday"` 35 | TweetsNum int32 `bson:"tweets_num"` 36 | FollowsNum int32 `bson:"follows_num"` 37 | FansNum int32 `bson:"fans_num"` 38 | SexOrientation string `bson:"sex_orientation"` 39 | Sentiment string `bson:"sentiment"` 40 | VipLevel string `bson:"vip_level"` 41 | Authentication string `bson:"authentication"` 42 | Labels string `bson:"labels"` 43 | CrawlTime int32 `bson:"crawl_time"` 44 | } 45 | 46 | type Relationships struct { 47 | Id_ string `bson:"_id"` 48 | FanId string `bson:"fan_id"` 49 | FollowedId string `bson:"followed_id"` 50 | CrawlTime int32 `bson:"crawl_time"` 51 | } 52 | 53 | type Comment struct { 54 | Id_ string `bson:"_id"` 55 | CommentUserId string `bson:"comment_user_id"` 56 | Content string `bson:"content"` 57 | WeiboUrl string `bson:"weibo_url"` 58 | CreatedAt string `bson:"created_at"` 59 | LikeNum int32 `bson:"like_num"` 60 | CrawlTime int32 `bson:"crawl_time"` 61 | } 62 | -------------------------------------------------------------------------------- /mdb/mdb.go: -------------------------------------------------------------------------------- 1 | package mdb 2 | 3 | import ( 4 | "WeiboSpiderGo/config" 5 | "gopkg.in/mgo.v2" 6 | "log" 7 | ) 8 | 9 | var globalS *mgo.Session 10 | 11 | func init() { 12 | s, err := mgo.Dial(config.Conf.GetString("MONGO_ADDRESS")) 13 | if err != nil { 14 | log.Fatalf("Create Session: %s\n", err) 15 | } 16 | globalS = s 17 | } 18 | 19 | func connect(db, collection string) (*mgo.Session, *mgo.Collection) { 20 | ms := globalS.Copy() 21 | c := ms.DB(db).C(collection) 22 | ms.SetMode(mgo.Monotonic, true) 23 | return ms, c 24 | } 25 | 26 | func Insert(db, collection string, doc interface{}) error { 27 | ms, c := connect(db, collection) 28 | defer ms.Close() 29 | 30 | return c.Insert(doc) 31 | } 32 | 33 | func FindOne(db, collection string, query, selector, result interface{}) error { 34 | ms, c := connect(db, collection) 35 | defer ms.Close() 36 | 37 | return c.Find(query).Select(selector).One(result) 38 | } 39 | 40 | func FindAll(db, collection string, query, selector, result interface{}) error { 41 | ms, c := connect(db, collection) 42 | defer ms.Close() 43 | 44 | return c.Find(query).Select(selector).All(result) 45 | } 46 | 47 | func Update(db, collection string, selector, update interface{}) error { 48 | ms, c := connect(db, collection) 49 | defer ms.Close() 50 | 51 | return c.Update(selector, update) 52 | } 53 | 54 | //更新,如果不存在就插入一个新的数据 `upsert:true` 55 | func Upsert(db, collection string, selector, update interface{}) error { 56 | ms, c := connect(db, collection) 57 | defer ms.Close() 58 | 59 | _, err := c.Upsert(selector, update) 60 | return err 61 | } 62 | 63 | // `multi:true` 64 | func UpdateAll(db, collection string, selector, update interface{}) error { 65 | ms, c := connect(db, collection) 66 | defer ms.Close() 67 | 68 | _, err := c.UpdateAll(selector, update) 69 | return err 70 | } 71 | 72 | func Remove(db, collection string, selector interface{}) error { 73 | ms, c := connect(db, collection) 74 | defer ms.Close() 75 | 76 | return c.Remove(selector) 77 | } 78 | 79 | func RemoveAll(db, collection string, selector interface{}) error { 80 | ms, c := connect(db, collection) 81 | defer ms.Close() 82 | 83 | _, err := c.RemoveAll(selector) 84 | return err 85 | } 86 | 87 | func FindPage(db, collection string, page, limit int, query, selector, result interface{}) error { 88 | ms, c := connect(db, collection) 89 | defer ms.Close() 90 | 91 | return c.Find(query).Select(selector).Skip(page * limit).Limit(limit).All(result) 92 | } 93 | 94 | func IsEmpty(db, collection string) bool { 95 | ms, c := connect(db, collection) 96 | defer ms.Close() 97 | count, err := c.Count() 98 | if err != nil { 99 | log.Fatal(err) 100 | } 101 | return count == 0 102 | } 103 | 104 | func Count(db, collection string, query interface{}) (int, error) { 105 | ms, c := connect(db, collection) 106 | defer ms.Close() 107 | return c.Find(query).Count() 108 | } 109 | -------------------------------------------------------------------------------- /scrapy_rules/base_rules.go: -------------------------------------------------------------------------------- 1 | package scrapy_rules 2 | 3 | import ( 4 | "WeiboSpiderGo/config" 5 | "WeiboSpiderGo/mdb" 6 | "fmt" 7 | "github.com/gocolly/colly" 8 | "github.com/gocolly/colly/debug" 9 | "github.com/gocolly/colly/extensions" 10 | "gopkg.in/mgo.v2/bson" 11 | "math/rand" 12 | "net/http" 13 | "time" 14 | ) 15 | 16 | var BaseUrl = "https://weibo.cn" 17 | var cookieStrLi []mdb.Account 18 | var dbName = config.Conf.GetString("DB_NAME") 19 | 20 | // return a collector 21 | func GetDefaultCollector() *colly.Collector { 22 | //set async and dont forget set c.wait() 23 | if config.Conf.GetBool("DEBUG_MODE") { 24 | } 25 | debugger := &debug.LogDebugger{} 26 | 27 | //file,err := os.Create(utils.ExecPath+"/debug.log") 28 | //if err!=nil{ 29 | // panic(err) 30 | //} 31 | //debugger.Output = file 32 | 33 | var c = colly.NewCollector( 34 | colly.Async(true), 35 | colly.Debugger(debugger), 36 | ) 37 | //disable http KeepAlives its could cause OOM in long time work 38 | c.WithTransport(&http.Transport{ 39 | DisableKeepAlives: true, 40 | }) 41 | mdb.FindAll(dbName, "account", bson.M{}, bson.M{}, &cookieStrLi) 42 | setDefaultCallback(c) 43 | extensions.RandomUserAgent(c) 44 | return c 45 | } 46 | 47 | // set default call,cookie and error handling 48 | func setDefaultCallback(c *colly.Collector) { 49 | // set random cookie 50 | c.OnRequest(func(r *colly.Request) { 51 | n := rand.Intn(len(cookieStrLi)) 52 | r.Headers.Set("Cookie", cookieStrLi[n].Cookie) 53 | r.Ctx.Put("_id", cookieStrLi[n].Id_) 54 | }) 55 | 56 | // Limit the maximum parallelism to 2 57 | // This is necessary if the goroutines are dynamically 58 | // created to control the limit of simultaneous requests. 59 | // 60 | // Parallelism can be controlled also by spawning fixed 61 | // number of go routines. 62 | 63 | // delay 3 to 5 second 64 | delay := time.Duration(config.Conf.GetInt("DELAY")) 65 | randomDelay := time.Duration(config.Conf.GetInt("RANDOM_DELAY")) 66 | if delay == 0 || randomDelay == 0 { 67 | delay, randomDelay = 8, 2 68 | } 69 | c.Limit(&colly.LimitRule{DomainGlob: "*", Parallelism: 2, Delay: delay * time.Second, RandomDelay: randomDelay * time.Second}) 70 | 71 | // deal with error statusCode 72 | c.OnError(func(r *colly.Response, e error) { 73 | if r.StatusCode == 302 || r.StatusCode == 403 { 74 | mdb.Update(dbName, "account", bson.M{"_id": r.Ctx.Get("_id")}, bson.M{"$set": bson.M{"status": "error"}}) 75 | } else if r.StatusCode == 418 { 76 | fmt.Println("please wait a second") 77 | } 78 | }) 79 | } 80 | -------------------------------------------------------------------------------- /scrapy_rules/fans_rules.go: -------------------------------------------------------------------------------- 1 | package scrapy_rules 2 | 3 | import ( 4 | "WeiboSpiderGo/mdb" 5 | "WeiboSpiderGo/utils" 6 | "fmt" 7 | "github.com/gocolly/colly" 8 | "gopkg.in/mgo.v2" 9 | "strconv" 10 | "strings" 11 | "time" 12 | ) 13 | 14 | func SetFansCallback(getFansC *colly.Collector) { 15 | getFansC.OnResponse(func(r *colly.Response) { 16 | content := string(r.Body) 17 | uid := utils.ReParse(`(\d+)/fans`, r.Request.URL.String()) 18 | if strings.Contains(r.Request.URL.String(), "page=1") { 19 | allPage := utils.ReParse(`/> 1/(\d+)页`, content) 20 | pageNum, _ := strconv.Atoi(allPage) 21 | for i := 2; i < (pageNum + 1); i++ { 22 | link := fmt.Sprintf("%s/%s/fans?page=%d",BaseUrl,uid,i) 23 | getFansC.Visit(link) 24 | } 25 | } 26 | }) 27 | getFansC.OnXML(`//a[text()="关注他" or text()="关注她" or text()="移除"]/@href`, func(element *colly.XMLElement) { 28 | followUrl := element.Text 29 | uid := utils.ReParse(`uid=(\d+)`, followUrl) 30 | ID := utils.ReParse(`(\d+)/fans`, element.Request.URL.String()) 31 | relationship := mdb.Relationships{} 32 | relationship.CrawlTime = int32(time.Now().Unix()) 33 | relationship.FanId = uid 34 | relationship.FollowedId = ID 35 | relationship.Id_ = uid + "-" + ID 36 | err := mdb.Insert(dbName, "Relationships", relationship) 37 | if mgo.IsDup(err) { 38 | //有重复数据 39 | fmt.Println("already scrapy") 40 | } 41 | }) 42 | } 43 | 44 | func GetFansUrl(uid string) string{ 45 | return fmt.Sprintf("%s/%s/fans?page=1",BaseUrl,uid) 46 | } 47 | -------------------------------------------------------------------------------- /scrapy_rules/follower_rules.go: -------------------------------------------------------------------------------- 1 | package scrapy_rules 2 | 3 | import ( 4 | "WeiboSpiderGo/mdb" 5 | "WeiboSpiderGo/utils" 6 | "fmt" 7 | "github.com/gocolly/colly" 8 | "gopkg.in/mgo.v2" 9 | "strconv" 10 | "strings" 11 | "time" 12 | ) 13 | 14 | func SetFollowCallback(getFollowC *colly.Collector){ 15 | getFollowC.OnResponse(func(r *colly.Response) { 16 | content := string(r.Body) 17 | uid := utils.ReParse(`uid=(\d+)&`, r.Request.URL.String()) 18 | if strings.Contains(r.Request.URL.String(), "page=1") { 19 | allPage := utils.ReParse(`/> 1/(\d+)页`, content) 20 | pageNum, _ := strconv.Atoi(allPage) 21 | for i := 2; i < (pageNum + 1); i++ { 22 | link := fmt.Sprintf("%s/%s/follow?page=%d",BaseUrl,uid,i) 23 | getFollowC.Visit(link) 24 | } 25 | } 26 | }) 27 | getFollowC.OnXML(`//a[text()="关注他" or text()="关注她" or text()="取消关注"]/@href`, func(element *colly.XMLElement) { 28 | followUrl := element.Text 29 | uid := utils.ReParse(`uid=(\d+)`, followUrl) 30 | ID := utils.ReParse(`uid=(\d+)`, element.Request.URL.String()) 31 | relationship := mdb.Relationships{} 32 | relationship.CrawlTime = int32(time.Now().Unix()) 33 | relationship.FanId = ID 34 | relationship.FollowedId = uid 35 | relationship.Id_ = ID + "-" + uid 36 | err := mdb.Insert(dbName, "Relationships", relationship) 37 | if mgo.IsDup(err) { 38 | //有重复数据 39 | fmt.Println("already scrapy") 40 | } 41 | }) 42 | } 43 | 44 | func GetFollowUrl(uid string) string{ 45 | return fmt.Sprintf("%s/%s/follow?page=1",BaseUrl,uid) 46 | } 47 | -------------------------------------------------------------------------------- /scrapy_rules/follower_senior_rules.go: -------------------------------------------------------------------------------- 1 | package scrapy_rules 2 | 3 | import ( 4 | "WeiboSpiderGo/mdb" 5 | "WeiboSpiderGo/utils" 6 | "fmt" 7 | "github.com/gocolly/colly" 8 | "gopkg.in/mgo.v2" 9 | "strconv" 10 | "strings" 11 | "time" 12 | ) 13 | 14 | func SetFollowSeniorCallback(getFollowC *colly.Collector){ 15 | getFollowC.OnResponse(func(r *colly.Response) { 16 | content := string(r.Body) 17 | uid := utils.ReParse(`uid=(\d+)&`, r.Request.URL.String()) 18 | if strings.Contains(r.Request.URL.String(), "page=1") { 19 | allPage := utils.ReParse(`/> 1/(\d+)页`, content) 20 | pageNum, _ := strconv.Atoi(allPage) 21 | for i := 2; i < (pageNum + 1); i++ { 22 | link := fmt.Sprintf("%s/attgroup/change?cat=user&uid=%s&page=%d",BaseUrl,uid,i) 23 | getFollowC.Visit(link) 24 | } 25 | } 26 | }) 27 | getFollowC.OnXML(`//a[text()="关注他" or text()="关注她" or text()="取消关注"]/@href`, func(element *colly.XMLElement) { 28 | followUrl := element.Text 29 | uid := utils.ReParse(`uid=(\d+)`, followUrl) 30 | ID := utils.ReParse(`uid=(\d+)`, element.Request.URL.String()) 31 | relationship := mdb.Relationships{} 32 | relationship.CrawlTime = int32(time.Now().Unix()) 33 | relationship.FanId = ID 34 | relationship.FollowedId = uid 35 | relationship.Id_ = ID + "-" + uid 36 | err := mdb.Insert(dbName, "Relationships", relationship) 37 | if mgo.IsDup(err) { 38 | //有重复数据 39 | fmt.Println("already scrapy") 40 | } 41 | }) 42 | } 43 | 44 | func GetFollowerSeniorUrl(uid string)string{ 45 | return fmt.Sprintf("%s/attgroup/change?cat=user&uid=%s&page=1",BaseUrl,uid) 46 | } 47 | -------------------------------------------------------------------------------- /scrapy_rules/information_rules.go: -------------------------------------------------------------------------------- 1 | package scrapy_rules 2 | 3 | import ( 4 | "WeiboSpiderGo/mdb" 5 | "WeiboSpiderGo/utils" 6 | "fmt" 7 | "github.com/gocolly/colly" 8 | "gopkg.in/mgo.v2" 9 | "strconv" 10 | "strings" 11 | "time" 12 | ) 13 | 14 | func SetInfoCallback(getInfoC, getMoreInfoC *colly.Collector) { 15 | getInfoC.OnResponse(func(r *colly.Response) { 16 | content := string(r.Body) 17 | info := mdb.Information{} 18 | info.CrawlTime = int32(time.Now().Unix()) 19 | info.Id_ = utils.ReParse(`(\d+)/info`, r.Request.URL.String()) 20 | nickName := utils.ReParse(`昵称?[::]?(.*?)
`, content) 21 | authentication := utils.ReParse(`认证?[::]?(.*?)
`, content) 22 | gender := utils.ReParse(`性别?[::]?(.*?)
`, content) 23 | place := utils.ReParse(`地区?[::]?(.*?)
`, content) 24 | briefIntroduction := utils.ReParse(`简介?[::]?(.*?)
`, content) 25 | birthday := utils.ReParse(`生日?[::]?(.*?)
`, content) 26 | sexOrientation := utils.ReParse(`性取向?[::]?(.*?)
`, content) 27 | sentiment := utils.ReParse(`感情状况?[::]?(.*?)
`, content) 28 | vipLevel := utils.ReParse(`会员等级?[::]?(.*?) (.*?)`, content) //标签 30 | info.Nickname = nickName 31 | info.Gender = gender 32 | placeli := strings.Split(place, " ") 33 | info.Province = placeli[0] 34 | if len(placeli) > 1 { 35 | info.City = placeli[1] 36 | } 37 | info.BriefIntroduction = briefIntroduction 38 | info.Birthday = birthday 39 | if sexOrientation == gender { 40 | info.SexOrientation = "同性恋" 41 | } else { 42 | info.SexOrientation = "异性恋" 43 | } 44 | info.Sentiment = sentiment 45 | info.VipLevel = vipLevel 46 | info.Authentication = authentication 47 | info.Labels = "" 48 | for i, labelItem := range labels { 49 | if i != 0 { 50 | info.Labels += "," 51 | } 52 | info.Labels += labelItem[1] 53 | } 54 | r.Ctx.Put("info", info) 55 | getMoreInfoC.Request("GET","https://weibo.cn/u/" + info.Id_,nil,r.Ctx,nil) 56 | }) 57 | } 58 | 59 | func SetMoreInfoCallback(getMoreInfoC *colly.Collector){ 60 | getMoreInfoC.OnResponse(func(r *colly.Response) { 61 | content := string(r.Body) 62 | info := r.Ctx.GetAny("info").(mdb.Information) 63 | tweetsNum := utils.ReParse(`微博\[(\d+)\]`, content) 64 | followsNum := utils.ReParse(`关注\[(\d+)\]`, content) 65 | fansNum := utils.ReParse(`粉丝\[(\d+)\]`, content) 66 | if tweetsNum != "" { 67 | temp, _ := strconv.Atoi(tweetsNum) 68 | info.TweetsNum = int32(temp) 69 | } 70 | if followsNum != "" { 71 | temp, _ := strconv.Atoi(followsNum) 72 | info.FollowsNum = int32(temp) 73 | } 74 | if fansNum != "" { 75 | temp, _ := strconv.Atoi(fansNum) 76 | info.FansNum = int32(temp) 77 | } 78 | err := mdb.Insert(dbName, "Information", info) 79 | if mgo.IsDup(err) { 80 | //有重复数据 81 | fmt.Println("already scrapy") 82 | } 83 | }) 84 | } 85 | -------------------------------------------------------------------------------- /scrapy_rules/tweet_rules.go: -------------------------------------------------------------------------------- 1 | package scrapy_rules 2 | 3 | import ( 4 | "WeiboSpiderGo/config" 5 | "WeiboSpiderGo/mdb" 6 | "WeiboSpiderGo/utils" 7 | "fmt" 8 | "github.com/gocolly/colly" 9 | "gopkg.in/mgo.v2" 10 | "strconv" 11 | "strings" 12 | "time" 13 | ) 14 | 15 | func SetTweetCallback(getTweetsC, getContentSubC, getCommentSubC *colly.Collector) { 16 | getTweetsC.OnResponse(func(r *colly.Response) { 17 | content := string(r.Body) 18 | uid := utils.ReParse(`(\d+)/profile`, r.Request.URL.String()) 19 | if strings.Contains(r.Request.URL.String(), "page=1") { 20 | allPage := utils.ReParse(`/> 1/(\d+)页`, content) 21 | pageNum, _ := strconv.Atoi(allPage) 22 | for i := 2; i < (pageNum + 1); i++ { 23 | link := fmt.Sprintf("%s/%s/profile?page=%d", BaseUrl, uid, i) 24 | getTweetsC.Visit(link) 25 | } 26 | } 27 | }) 28 | getTweetsC.OnXML(`//div[@class="c" and @id]`, func(element *colly.XMLElement) { 29 | tweet := mdb.Tweets{} 30 | tweet.CrawlTime = int32(time.Now().Unix()) 31 | tweetRepostUrl := element.ChildAttr(`.//a[contains(text(),"转发[")]`, "href") 32 | tweetItemId := utils.ReParse(`/repost/(.*?)\?`, tweetRepostUrl) 33 | tweet.UserId = utils.ReParse(`uid=(\d+)`, tweetRepostUrl) 34 | tweet.WeiboUrl = fmt.Sprintf("https://weibo.com/%s/%s", tweet.UserId, tweetItemId) 35 | tweet.Id_ = fmt.Sprintf("%s_%s", tweet.UserId, tweetItemId) 36 | createTimeInfo := element.ChildText(`.//span[@class="ct"]`) 37 | if strings.Contains(createTimeInfo, "来自") { 38 | timeStr := strings.Split(createTimeInfo, "来自")[0] 39 | timeStr = strings.TrimSpace(timeStr) 40 | tweet.CreatedAt = utils.ConvTime(timeStr) 41 | tweet.Tool = strings.Split(createTimeInfo, "来自")[1] 42 | } else { 43 | timeStr := strings.TrimSpace(createTimeInfo) 44 | tweet.CreatedAt = utils.ConvTime(timeStr) 45 | } 46 | 47 | likeNumText := element.ChildText(`.//a[contains(text(),"赞[")]`) 48 | likeNum, _ := strconv.Atoi(utils.ReParse(`\d+`, likeNumText)) 49 | tweet.LikeNum = int32(likeNum) 50 | 51 | repostNumText := element.ChildText(`.//a[contains(text(),"转发[")]`) 52 | repostNum, _ := strconv.Atoi(utils.ReParse(`\d+`, repostNumText)) 53 | tweet.RepostNum = int32(repostNum) 54 | 55 | commentNumText := element.ChildText(`.//a[contains(text(),"评论[") and not(contains(text(),"原文"))]`) 56 | commentNum, _ := strconv.Atoi(utils.ReParse(`\d+`, commentNumText)) 57 | tweet.CommentNum = int32(commentNum) 58 | 59 | tweet.ImageUrl = element.ChildAttr(`.//img[@alt="图片"]`, "src") 60 | tweet.VideoUrl = element.ChildAttr(`.//a[contains(@href,"https://m.weibo.cn/s/video/show?object_id=")]`, "href") 61 | 62 | mapNode := element.ChildAttr(`.//a[contains(text(),"显示地图")]`, "href") 63 | if mapNode != "" { 64 | tweet.LocationMapInfo = utils.ReParse(`xy=(.*?)&`, mapNode) 65 | } 66 | 67 | tweet.OriginWeibo = element.ChildAttr(`.//a[contains(text(),"原文评论[")]`, "href") 68 | 69 | allContentLink := element.ChildAttr(`.//a[text()="全文" and contains(@href,"ckAll=1")]`, "href") 70 | if allContentLink == "" { 71 | //没有全文按钮 72 | content := element.Text 73 | if pos := strings.LastIndex(content, "转发理由:"); pos != -1 { 74 | content = content[pos+len("转发理由:"):] 75 | } 76 | content = content[0:strings.LastIndex(content, "赞")] 77 | if pos := strings.LastIndex(content, "[组图共"); pos != -1 { 78 | content = content[0:pos] 79 | } 80 | if pos := strings.LastIndex(content, "原图"); pos != -1 { 81 | l := len(content) 82 | if l >= pos+6 { 83 | content = content[0:pos] 84 | } 85 | } 86 | tweet.Content = strings.TrimSpace(content) 87 | err := mdb.Insert(dbName, "Tweets", tweet) 88 | if mgo.IsDup(err) { 89 | //有重复数据 90 | fmt.Println("already scrapy") 91 | } 92 | } else { 93 | element.Response.Ctx.Put("tweet", tweet) 94 | contentSubLink := fmt.Sprintf("%s%s", BaseUrl, allContentLink) 95 | getContentSubC.Request("GET", contentSubLink, nil, element.Response.Ctx, nil) 96 | } 97 | 98 | commentLink := fmt.Sprintf("%s/comment/%s?page=1", BaseUrl, strings.Split(tweet.Id_, "_")[1]) 99 | element.Response.Ctx.Put("weibo_url", tweet.WeiboUrl) 100 | if config.Conf.GetBool("SCRAPY_TYPE.Tweet.Comment") { 101 | getCommentSubC.Request("GET", commentLink, nil, element.Response.Ctx, nil) 102 | } 103 | }) 104 | } 105 | 106 | func SetFullContentCallback(getContentSubC *colly.Collector) { 107 | getContentSubC.OnXML(`//*[@id="M_"]/div[1]`, func(element *colly.XMLElement) { 108 | //var tweet mdb.Tweets 109 | tweetInt := element.Response.Ctx.GetAny("tweet") 110 | tweet := tweetInt.(mdb.Tweets) 111 | content := element.Text 112 | if pos := strings.LastIndex(content, "转发理由:"); pos != -1 { 113 | content = content[pos+len("转发理由:"):] 114 | } 115 | if pos := strings.LastIndex(content, "[组图共"); pos != -1 { 116 | content = content[0:pos] 117 | } 118 | if pos := strings.LastIndex(content, "原图"); pos != -1 { 119 | l := len(content) 120 | if l >= pos+6 { 121 | content = content[0:pos] 122 | } 123 | } 124 | tweet.Content = strings.TrimSpace(content) 125 | err := mdb.Insert(dbName, "Tweets", tweet) 126 | if mgo.IsDup(err) { 127 | //有重复数据 128 | fmt.Println("already scrapy") 129 | } 130 | }) 131 | } 132 | 133 | func SetCommentCallback(getCommentSubC *colly.Collector) { 134 | getCommentSubC.OnResponse(func(r *colly.Response) { 135 | content := string(r.Body) 136 | if strings.Contains(r.Request.URL.String(), "page=1") { 137 | allPage := utils.ReParse(`/> 1/(\d+)页`, content) 138 | pageNum, _ := strconv.Atoi(allPage) 139 | for i := 2; i < (pageNum + 1); i++ { 140 | pageUrl := strings.Replace(r.Request.URL.String(), "page=1", "page="+strconv.Itoa(i), -1) 141 | getCommentSubC.Visit(pageUrl) 142 | } 143 | } 144 | }) 145 | getCommentSubC.OnXML(`//div[@class="c" and contains(@id,"C_")]`, func(element *colly.XMLElement) { 146 | commentUserUrl := element.ChildAttr(`.//a[contains(@href,"/u/")]`, "href") 147 | if commentUserUrl == "" { 148 | return 149 | } 150 | comment := mdb.Comment{} 151 | comment.CrawlTime = int32(time.Now().Unix()) 152 | comment.WeiboUrl = element.Response.Ctx.Get("weibo_url") 153 | comment.CommentUserId = utils.ReParse(`/u/(\d+)`, commentUserUrl) 154 | comment.Id_ = element.Attr("id") 155 | createdAtInfo := element.ChildText(`.//span[@class="ct"]`) 156 | likeNumText := element.ChildText(`.//a[contains(text(),"赞[")]`) 157 | likeNum, _ := strconv.Atoi(utils.ReParse(`\d+`, likeNumText)) 158 | comment.LikeNum = int32(likeNum) 159 | comment.CreatedAt = utils.ConvTime(strings.Split(createdAtInfo, "\u0000")[0]) 160 | content := element.Text 161 | content = content[0:strings.LastIndex(content, "赞")] 162 | if pos := strings.LastIndex(content, "举报"); pos != -1 { 163 | content = content[0:pos] 164 | } 165 | comment.Content = strings.TrimSpace(content) 166 | err := mdb.Insert(dbName, "Comments", comment) 167 | if mgo.IsDup(err) { 168 | //有重复数据 169 | fmt.Println("already scrapy") 170 | } 171 | }) 172 | } 173 | 174 | func GetTweetUrl(uid string) string { 175 | return fmt.Sprintf("%s/%s/profile?page=1", BaseUrl, uid) 176 | } 177 | -------------------------------------------------------------------------------- /utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "bufio" 5 | "os" 6 | "path/filepath" 7 | "regexp" 8 | "strconv" 9 | "strings" 10 | "time" 11 | ) 12 | 13 | var ExecPath = getExecutePath() 14 | 15 | func ReParse(pattern string, content string) string { 16 | str := regexp.MustCompile(pattern).FindAllStringSubmatch(content, -1) 17 | if str != nil { 18 | if len(str[0]) == 1 { 19 | return str[0][0] 20 | } 21 | return str[0][1] 22 | } 23 | return "" 24 | } 25 | 26 | func ReParseMayLi(pattern string, content string) [][]string { 27 | str := regexp.MustCompile(pattern).FindAllStringSubmatch(content, -1) 28 | return str 29 | } 30 | 31 | func ConvTime(timeStr string) string { 32 | now_time := time.Now() 33 | if strings.Contains(timeStr, "分钟前") { 34 | min, _ := strconv.Atoi(ReParse(`^(\d+)分钟`, timeStr)) 35 | createdTimep := now_time.Add(-time.Duration(min) * time.Minute) 36 | return createdTimep.Format("2006-01-02 15:04") 37 | } 38 | if strings.Contains(timeStr, "小时前") { 39 | hour, _ := strconv.Atoi(ReParse(`^(\d+)小时`, timeStr)) 40 | createdTimep := now_time.Add(-time.Duration(hour) * time.Hour) 41 | return createdTimep.Format("2006-01-02 15:04") 42 | } 43 | if strings.Contains(timeStr, "今天") { 44 | return strings.Replace(timeStr, "今天", now_time.Format("2006-01-02"), -1) 45 | } 46 | if strings.Contains(timeStr, "月") { 47 | rp := strings.NewReplacer("月", "-", "日", "") 48 | return rp.Replace(timeStr) 49 | } 50 | return timeStr 51 | } 52 | 53 | func GetTargetUidList() []string { 54 | var uidLi []string 55 | file, err := os.Open(ExecPath + "/account/target.txt") 56 | defer file.Close() 57 | if err != nil { 58 | panic(err) 59 | } 60 | scanner := bufio.NewScanner(file) 61 | for scanner.Scan() { 62 | lineText := scanner.Text() 63 | lineText = strings.TrimSpace(lineText) 64 | lineText = strings.Replace(lineText, "\uFEFF", "", -1) 65 | uidLi = append(uidLi, lineText) 66 | } 67 | return uidLi 68 | } 69 | 70 | func getExecutePath() string { 71 | return filepath.Dir(os.Args[0]) 72 | } 73 | -------------------------------------------------------------------------------- /weibo_spider.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "WeiboSpiderGo/config" 5 | "WeiboSpiderGo/scrapy_rules" 6 | "WeiboSpiderGo/utils" 7 | "fmt" 8 | ) 9 | 10 | var uidLi = utils.GetTargetUidList() 11 | 12 | func scrapyInfomation() { 13 | getInfoC := scrapy_rules.GetDefaultCollector() 14 | getMoreInfoC := scrapy_rules.GetDefaultCollector() 15 | scrapy_rules.SetMoreInfoCallback(getMoreInfoC) 16 | 17 | scrapy_rules.SetInfoCallback(getInfoC, getMoreInfoC) 18 | 19 | for _, uid := range uidLi { 20 | url := fmt.Sprintf("%s/%s/info", scrapy_rules.BaseUrl, uid) 21 | getInfoC.Visit(url) 22 | } 23 | getInfoC.Wait() 24 | getMoreInfoC.Wait() 25 | } 26 | 27 | func scrapyTweet() { 28 | getTweetsC := scrapy_rules.GetDefaultCollector() 29 | getContentSubC := scrapy_rules.GetDefaultCollector() 30 | scrapy_rules.SetFullContentCallback(getContentSubC) 31 | getCommentSubC := scrapy_rules.GetDefaultCollector() 32 | scrapy_rules.SetCommentCallback(getCommentSubC) 33 | 34 | scrapy_rules.SetTweetCallback(getTweetsC, getContentSubC, getCommentSubC) 35 | 36 | for _, uid := range uidLi { 37 | url := scrapy_rules.GetTweetUrl(uid) 38 | getTweetsC.Visit(url) 39 | } 40 | getTweetsC.Wait() 41 | getContentSubC.Wait() 42 | getCommentSubC.Wait() 43 | } 44 | 45 | func scrapyFollow() { 46 | getFollowC := scrapy_rules.GetDefaultCollector() 47 | scrapy_rules.SetFollowCallback(getFollowC) 48 | //read files 49 | for _, uid := range uidLi { 50 | url := scrapy_rules.GetFollowUrl(uid) 51 | getFollowC.Visit(url) 52 | } 53 | getFollowC.Wait() 54 | } 55 | 56 | func scrapyFans() { 57 | getFansC := scrapy_rules.GetDefaultCollector() 58 | scrapy_rules.SetFansCallback(getFansC) 59 | 60 | for _, uid := range uidLi { 61 | url := scrapy_rules.GetFansUrl(uid) 62 | getFansC.Visit(url) 63 | } 64 | getFansC.Wait() 65 | } 66 | 67 | func main() { 68 | if config.Conf.GetBool("SCRAPY_TYPE.Info") { 69 | scrapyInfomation() 70 | } 71 | if config.Conf.GetBool("SCRAPY_TYPE.Follow") { 72 | scrapyFollow() 73 | } 74 | //修复去重问题 75 | if config.Conf.GetBool("SCRAPY_TYPE.Fans") { 76 | scrapyFans() 77 | } 78 | if config.Conf.GetBool("SCRAPY_TYPE.Tweet.Main") { 79 | scrapyTweet() 80 | } 81 | } 82 | --------------------------------------------------------------------------------