├── README.md └── crawler ├── data.sql ├── main.go └── readMe.txt /README.md: -------------------------------------------------------------------------------- 1 | # crawler-boss 2 | 3 | ## 用Go实现抓取Boss直聘职位数据。有几个特点 4 | 5 | 1.代理防IP被封 6 | 7 | 2.模拟浏览器,反识别爬虫。 8 | 9 | 3.控制爬取频率。 10 | 11 | 4.多协程爬取。 12 | 13 | ## 不足之处 14 | 15 | 1.爬取失败,没有进行重试以及更换IP处理。 16 | 17 | 2.错误处理 18 | 19 | 3.代码结构方面进行优化。 20 | 21 | 22 | ![image](https://user-images.githubusercontent.com/16487020/126505408-a32aed43-a6a2-4570-ba94-440511d31f13.png) 23 | 24 | 25 | ## 交流 && 疑问 26 | 如果有任何错误或不懂的地方欢迎给我提问 https://github.com/githubw2015/crawler-boss 27 | 28 | 如果对你有所帮助,请给个Star,你的支持,是我最大的动力。 29 | -------------------------------------------------------------------------------- /crawler/data.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `sp_boss_jobs` ( 2 | `id` int(10) unsigned NOT NULL AUTO_INCREMENT, 3 | `job_name` varchar(32) NOT NULL DEFAULT '' COMMENT '工作名称', 4 | `salary` varchar(30) NOT NULL COMMENT '薪资', 5 | `job_type` varchar(4) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '类型', 6 | `city` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '城市', 7 | `href` varchar(255) NOT NULL COMMENT '详情连接', 8 | `company_name` varchar(32) DEFAULT '' COMMENT '公司名称', 9 | `company_address` varchar(64) NOT NULL DEFAULT '' COMMENT '公司地址', 10 | `work_years` varchar(16) DEFAULT '' COMMENT '工作年限', 11 | `education` varchar(16) DEFAULT '' COMMENT '学历要求', 12 | `company_label` varchar(16) DEFAULT '' COMMENT '公司所属行业', 13 | `financing_stage` varchar(16) DEFAULT '' COMMENT '融资阶段', 14 | `staff_number` varchar(16) DEFAULT '' COMMENT '公司规模-员工人数', 15 | `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', 16 | `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', 17 | PRIMARY KEY (`id`) 18 | ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT='boss招聘信息表'; -------------------------------------------------------------------------------- /crawler/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "github.com/PuerkitoBio/goquery" 7 | "github.com/jinzhu/gorm" 8 | _ "github.com/jinzhu/gorm/dialects/mysql" 9 | "github.com/tebeka/selenium" 10 | "github.com/tebeka/selenium/chrome" 11 | "io/ioutil" 12 | "log" 13 | "math/rand" 14 | "strconv" 15 | "strings" 16 | "sync" 17 | "time" 18 | ) 19 | 20 | const ( 21 | seleniumPath = `C:\chromedriver_win32\selenium-server-standalone-3.9.1.jar` 22 | geckoDriverPath = `C:\chromedriver_win32\chromedriver.exe` 23 | port = 9515 24 | ) 25 | 26 | var ( 27 | DB *gorm.DB 28 | username = "root" 29 | password = "root" 30 | dbName = "test" 31 | ) 32 | 33 | var searKeywords = []string{ 34 | "golang", 35 | "php", 36 | "Python", 37 | "Java", 38 | } 39 | 40 | var cityMap = map[int]string{ 41 | 101020100: "上海", 42 | //101010100: "北京", 43 | //101280100: "广州", 44 | //101280600: "深圳", 45 | //101210100: "杭州", 46 | } 47 | 48 | var proxyIps = []string{ 49 | "http://120.38.241.162:4510", 50 | "http://58.241.203.160:4545", 51 | "http://180.125.107.166:4536", 52 | "http://180.125.33.225:4557", 53 | "http://124.94.250.26:4560", 54 | "http://42.54.90.13:4550", 55 | "http://27.44.216.205:4545", 56 | "http://180.125.2.213:4567", 57 | "http://117.60.242.32:4547", 58 | } 59 | 60 | func init() { 61 | var err error 62 | DB, err = gorm.Open("mysql", fmt.Sprintf("%s:%s@/%s?charset=utf8&parseTime=True&loc=Local", username, password, dbName)) 63 | if err != nil { 64 | log.Fatalf(" gorm.Open.err: %v", err) 65 | } 66 | 67 | DB.SingularTable(true) 68 | } 69 | 70 | var wg sync.WaitGroup 71 | 72 | func main() { 73 | //初始化基本参数 74 | opts := []selenium.ServiceOption{ 75 | selenium.ChromeDriver(geckoDriverPath), // Specify the path to GeckoDriver in order to use Firefox. 76 | selenium.Output(ioutil.Discard), // Output debug information to STDERR. 77 | } 78 | service, err := selenium.NewSeleniumService(seleniumPath, port, opts...) 79 | defer service.Stop() 80 | 81 | for index, val := range cityMap { 82 | for _, item := range searKeywords { 83 | wg.Add(1) 84 | go func(item string, index int, val string) { 85 | if err != nil { 86 | panic(err) // panic is used only as an example and is not otherwise recommended. 87 | } 88 | //打开 chrome 浏览器 89 | caps := selenium.Capabilities{"browserName": "chrome"} 90 | //禁止图片加载,加快渲染速度 91 | imagCaps := map[string]interface{}{ 92 | "profile.managed_default_content_settings.images": 2, 93 | } 94 | rand.Seed(time.Now().Unix()) 95 | proxyIndex := rand.Intn(len(proxyIps)) 96 | chromeCaps := chrome.Capabilities{ 97 | Prefs: imagCaps, 98 | Path: "", 99 | Args: []string{ 100 | "--headless", 101 | "--start-maximized", 102 | //"--window-size=1200x600", 103 | "--no-sandbox", 104 | "--user-agent=" + GetRandomUserAgent(), 105 | "--disable-gpu", 106 | "--disable-impl-side-painting", 107 | "--disable-gpu-sandbox", 108 | "--disable-accelerated-2d-canvas", 109 | "--disable-accelerated-jpeg-decoding", 110 | "--test-type=ui", 111 | "--proxy-server=" + proxyIps[proxyIndex], 112 | }, 113 | } 114 | 115 | //以上是设置浏览器参数 116 | caps.AddChrome(chromeCaps) 117 | //打开 chrome 浏览器 118 | wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port)) 119 | if err != nil { 120 | panic(err) 121 | } 122 | //wd.AddCookie(&selenium.Cookie{ 123 | // Name: "__zp_stoken__", 124 | // Value: "__fid=c2b051dc22170700021a31d7606054c0; wt2=DLzLXzYb7kUaCJMioMFtrwMhk6eQlRn81wUGE0NkP6lHW1BcFpJSAPKv89ZXnViD933HW6_mmU-_734s4nYYbMg~~; _bl_uid=C4kRkrpp7k7s80ge9i5wrwst599d; acw_tc=0bdd34b616265981976475631e01e066c1d589888f9a880aaa421e2f892e21; lastCity=101020100; __zp_seo_uuid__=c18ed6d2-ab6f-4214-896a-c55a0d9bc586; __c=1626599142; __g=-; __l=r=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D_ycLarYk8_yn0W3nbwH-I2939KNJrnyYRn7Ahn43fZp1bMhDMqRI1cFTkozRfT9F%26wd%3D%26eqid%3Db762e6bb0008e8df0000000560f3eedf&l=%2Fwww.zhipin.com%2Fshanghai%2F&s=1&g=&s=3&friend_source=0; __a=11523211.1626525304.1626597684.1626599142.97.7.1.97; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1626594121,1626594699,1626597681,1626599142; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1626599142; __zp_stoken__=83bfcZ1IkQXl8FiE1czd0FAMGQnRYLjhrJWQoUQVFMUZMYWt2Y1RxQGEYKGYFR1gFPEx0BnQhKQIuAygUBl9TeUptCy9YbCw4VR1VYlYZOUdRbzpNUDRMPFZxAV0zMih4DG9kO30kVnYNQTo0", 125 | //}) 126 | var count = 0 127 | for i := 1; ; i++ { 128 | urls := `https://www.zhipin.com/c` + strconv.Itoa(index) + `/?query=` + item + `&page=` + strconv.Itoa(i) 129 | fmt.Println(urls) 130 | //加载网页 131 | if err := wd.Get(urls); err != nil { 132 | panic(err) 133 | } 134 | time.Sleep(time.Second * 10) 135 | jsRt, err := wd.ExecuteScript("return document.readyState", nil) 136 | if err != nil { 137 | log.Println("exe js err", err) 138 | } 139 | fmt.Println("jsRt", jsRt) 140 | if jsRt != "complete" { 141 | log.Println(item + "网页加载未完成" + strconv.Itoa(i)) 142 | time.Sleep(time.Second * 5) 143 | } 144 | // next disabled 145 | // 获取网站内容 146 | var frameHtml string 147 | frameHtml, err = wd.PageSource() 148 | if err != nil { 149 | log.Println(err) 150 | return 151 | } 152 | //解析 html 文件 153 | var doc *goquery.Document 154 | doc, err = goquery.NewDocumentFromReader(bytes.NewReader([]byte(frameHtml))) 155 | if err != nil { 156 | log.Println(err) 157 | return 158 | } 159 | var Workexperience, Education, rongzi, staffNumber string 160 | doc.Find("#main ul li").Each(func(i int, context *goquery.Selection) { 161 | jobName := trimSpase(context.Find("span[class=\"job-name\"]").Text()) 162 | salary := trimSpase(context.Find("span[class=\"red\"]").Text()) 163 | href, _ := context.Find("div[class=\"info-primary\"] a").Attr("href") 164 | company := trimSpase(context.Find("div[class=\"info-company\"] h3").Text()) 165 | address := trimSpase(context.Find("span[class=\"job-area\"]").Text()) 166 | worklimit, _ := context.Find("div[class=\"job-limit clearfix\"] p").Html() 167 | industry := trimSpase(context.Find("div[class=\"info-company\"] a[class=\"false-link\"]").Text()) 168 | 169 | data1 := strings.Split(worklimit, "") 170 | for index, val := range data1 { 171 | if index == 0 { 172 | Workexperience = trimSpase(val) 173 | } else if index == 1 { 174 | Education = trimSpase(val) 175 | } 176 | } 177 | 178 | href = "https://www.zhipin.com" + href 179 | rognstuff, _ := context.Find("div[class=\"info-company\"] p").Html() 180 | data2 := strings.Split(rognstuff, "") 181 | for index, val := range data2 { 182 | if index == 1 { 183 | rongzi = trimSpase(val) 184 | } else if index == 2 { 185 | staffNumber = trimSpase(val) 186 | } 187 | } 188 | if jobName != "" { 189 | sp := SpBossJobs{ 190 | JobName: jobName, 191 | Salary: salary, 192 | Href: href, 193 | JobType: item, 194 | City: val, 195 | CompanyName: company, 196 | CompanyAddress: address, 197 | WorkYears: Workexperience, 198 | Education: Education, 199 | CompanyLabel: industry, 200 | FinancingStage: rongzi, 201 | StaffNumber: staffNumber, 202 | } 203 | sp.Add() 204 | count++ 205 | } 206 | }) 207 | _, errs := wd.FindElement(selenium.ByCSSSelector, "a[class='next disabled']") 208 | if errs == nil { 209 | fmt.Println(item + "找到隐藏;抓取总数:" + strconv.Itoa(count)) 210 | break 211 | } 212 | } 213 | wg.Done() 214 | wd.Quit() // 关闭浏览器 215 | }(item, index, val) 216 | } 217 | } 218 | 219 | wg.Wait() 220 | fmt.Println("结束") 221 | } 222 | 223 | func trimSpase(str string) string { 224 | strs := strings.Replace(str, " ", "", -1) 225 | strs = strings.Replace(strs, "\n", "", -1) 226 | return strs 227 | } 228 | 229 | // boss招聘信息表 230 | type SpBossJobs struct { 231 | Id uint `db:"id"` 232 | JobName string `db:"job_name"` //工作名称 233 | Salary string `db:"salary"` //薪资 234 | City string `db:"city"` //城市 235 | JobType string `db:"job_type"` //薪资 236 | Href string `db:"href"` //详情连接 237 | CompanyName string `db:"company_name"` //公司名称 238 | CompanyAddress string `db:"company_address"` //公司地址 239 | WorkYears string `db:"work_years"` //工作年限 240 | Education string `db:"education"` //学历要求 241 | CompanyLabel string `db:"company_label"` //公司所属行业 242 | FinancingStage string `db:"financing_stage"` //融资阶段 243 | StaffNumber string `db:"staff_number"` //公司规模-员工人数 244 | } 245 | 246 | //添加数据 247 | func (sp *SpBossJobs) Add() { 248 | err := DB.Create(sp).Error 249 | if err != nil { 250 | fmt.Println("创建失败") 251 | } 252 | } 253 | 254 | var userAgentList = []string{ 255 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 256 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", 257 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 258 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 259 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4", 260 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", 261 | "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0", 262 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", 263 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0", 264 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0", 265 | "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 266 | "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko", 267 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 268 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 269 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:54.0) Gecko/20100101 Firefox/54.0", 270 | "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 271 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", 272 | "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", 273 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0", 274 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393", 275 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36", 276 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/603.2.5 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.5", 277 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36", 278 | "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", 279 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063", 280 | "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", 281 | "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0", 282 | "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36", 283 | "Mozilla/5.0 (iPad; CPU OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.0 Mobile/14F89 Safari/602.1", 284 | "Mozilla/5.0 (Windows NT 6.1; rv:54.0) Gecko/20100101 Firefox/54.0", 285 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", 286 | "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0", 287 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0", 288 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 289 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:54.0) Gecko/20100101 Firefox/54.0", 290 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", 291 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36", 292 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36", 293 | "Mozilla/5.0 (X11; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0", 294 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36", 295 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 296 | "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0", 297 | "Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko", 298 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30", 299 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0", 300 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36", 301 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", 302 | "Mozilla/5.0 (Windows NT 5.1; rv:52.0) Gecko/20100101 Firefox/52.0", 303 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36", 304 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0", 305 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36", 306 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/58.0.3029.110 Chrome/58.0.3029.110 Safari/537.36", 307 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/603.2.5 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.5", 308 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36", 309 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36", 310 | "Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0", 311 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 312 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0", 313 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36", 314 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 315 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36", 316 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36 OPR/46.0.2597.32", 317 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/59.0.3071.109 Chrome/59.0.3071.109 Safari/537.36", 318 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:53.0) Gecko/20100101 Firefox/53.0", 319 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36", 320 | "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko", 321 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 OPR/45.0.2552.898", 322 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", 323 | "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 324 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", 325 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1", 326 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36 OPR/46.0.2597.39", 327 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:54.0) Gecko/20100101 Firefox/54.0", 328 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/601.7.7 (KHTML, like Gecko) Version/9.1.2 Safari/601.7.7", 329 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/602.4.8 (KHTML, like Gecko) Version/10.0.3 Safari/602.4.8", 330 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", 331 | "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko", 332 | "Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0", 333 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36", 334 | } 335 | 336 | func GetRandomUserAgent() string { 337 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 338 | return userAgentList[r.Intn(len(userAgentList))] 339 | } 340 | -------------------------------------------------------------------------------- /crawler/readMe.txt: -------------------------------------------------------------------------------- 1 | 2 | --------------------------------------------------------------------------------