├── README.md
└── crawler
├── data.sql
├── main.go
└── readMe.txt
/README.md:
--------------------------------------------------------------------------------
1 | # crawler-boss
2 |
3 | ## 用Go实现抓取Boss直聘职位数据。有几个特点
4 |
5 | 1.代理防IP被封
6 |
7 | 2.模拟浏览器,反识别爬虫。
8 |
9 | 3.控制爬取频率。
10 |
11 | 4.多协程爬取。
12 |
13 | ## 不足之处
14 |
15 | 1.爬取失败,没有进行重试以及更换IP处理。
16 |
17 | 2.错误处理
18 |
19 | 3.代码结构方面进行优化。
20 |
21 |
22 | 
23 |
24 |
25 | ## 交流 && 疑问
26 | 如果有任何错误或不懂的地方欢迎给我提问 https://github.com/githubw2015/crawler-boss
27 |
28 | 如果对你有所帮助,请给个Star,你的支持,是我最大的动力。
29 |
--------------------------------------------------------------------------------
/crawler/data.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE `sp_boss_jobs` (
2 | `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
3 | `job_name` varchar(32) NOT NULL DEFAULT '' COMMENT '工作名称',
4 | `salary` varchar(30) NOT NULL COMMENT '薪资',
5 | `job_type` varchar(4) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '类型',
6 | `city` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '城市',
7 | `href` varchar(255) NOT NULL COMMENT '详情连接',
8 | `company_name` varchar(32) DEFAULT '' COMMENT '公司名称',
9 | `company_address` varchar(64) NOT NULL DEFAULT '' COMMENT '公司地址',
10 | `work_years` varchar(16) DEFAULT '' COMMENT '工作年限',
11 | `education` varchar(16) DEFAULT '' COMMENT '学历要求',
12 | `company_label` varchar(16) DEFAULT '' COMMENT '公司所属行业',
13 | `financing_stage` varchar(16) DEFAULT '' COMMENT '融资阶段',
14 | `staff_number` varchar(16) DEFAULT '' COMMENT '公司规模-员工人数',
15 | `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
16 | `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
17 | PRIMARY KEY (`id`)
18 | ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT='boss招聘信息表';
--------------------------------------------------------------------------------
/crawler/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "fmt"
6 | "github.com/PuerkitoBio/goquery"
7 | "github.com/jinzhu/gorm"
8 | _ "github.com/jinzhu/gorm/dialects/mysql"
9 | "github.com/tebeka/selenium"
10 | "github.com/tebeka/selenium/chrome"
11 | "io/ioutil"
12 | "log"
13 | "math/rand"
14 | "strconv"
15 | "strings"
16 | "sync"
17 | "time"
18 | )
19 |
20 | const (
21 | seleniumPath = `C:\chromedriver_win32\selenium-server-standalone-3.9.1.jar`
22 | geckoDriverPath = `C:\chromedriver_win32\chromedriver.exe`
23 | port = 9515
24 | )
25 |
26 | var (
27 | DB *gorm.DB
28 | username = "root"
29 | password = "root"
30 | dbName = "test"
31 | )
32 |
33 | var searKeywords = []string{
34 | "golang",
35 | "php",
36 | "Python",
37 | "Java",
38 | }
39 |
40 | var cityMap = map[int]string{
41 | 101020100: "上海",
42 | //101010100: "北京",
43 | //101280100: "广州",
44 | //101280600: "深圳",
45 | //101210100: "杭州",
46 | }
47 |
48 | var proxyIps = []string{
49 | "http://120.38.241.162:4510",
50 | "http://58.241.203.160:4545",
51 | "http://180.125.107.166:4536",
52 | "http://180.125.33.225:4557",
53 | "http://124.94.250.26:4560",
54 | "http://42.54.90.13:4550",
55 | "http://27.44.216.205:4545",
56 | "http://180.125.2.213:4567",
57 | "http://117.60.242.32:4547",
58 | }
59 |
60 | func init() {
61 | var err error
62 | DB, err = gorm.Open("mysql", fmt.Sprintf("%s:%s@/%s?charset=utf8&parseTime=True&loc=Local", username, password, dbName))
63 | if err != nil {
64 | log.Fatalf(" gorm.Open.err: %v", err)
65 | }
66 |
67 | DB.SingularTable(true)
68 | }
69 |
70 | var wg sync.WaitGroup
71 |
72 | func main() {
73 | //初始化基本参数
74 | opts := []selenium.ServiceOption{
75 | selenium.ChromeDriver(geckoDriverPath), // Specify the path to GeckoDriver in order to use Firefox.
76 | selenium.Output(ioutil.Discard), // Output debug information to STDERR.
77 | }
78 | service, err := selenium.NewSeleniumService(seleniumPath, port, opts...)
79 | defer service.Stop()
80 |
81 | for index, val := range cityMap {
82 | for _, item := range searKeywords {
83 | wg.Add(1)
84 | go func(item string, index int, val string) {
85 | if err != nil {
86 | panic(err) // panic is used only as an example and is not otherwise recommended.
87 | }
88 | //打开 chrome 浏览器
89 | caps := selenium.Capabilities{"browserName": "chrome"}
90 | //禁止图片加载,加快渲染速度
91 | imagCaps := map[string]interface{}{
92 | "profile.managed_default_content_settings.images": 2,
93 | }
94 | rand.Seed(time.Now().Unix())
95 | proxyIndex := rand.Intn(len(proxyIps))
96 | chromeCaps := chrome.Capabilities{
97 | Prefs: imagCaps,
98 | Path: "",
99 | Args: []string{
100 | "--headless",
101 | "--start-maximized",
102 | //"--window-size=1200x600",
103 | "--no-sandbox",
104 | "--user-agent=" + GetRandomUserAgent(),
105 | "--disable-gpu",
106 | "--disable-impl-side-painting",
107 | "--disable-gpu-sandbox",
108 | "--disable-accelerated-2d-canvas",
109 | "--disable-accelerated-jpeg-decoding",
110 | "--test-type=ui",
111 | "--proxy-server=" + proxyIps[proxyIndex],
112 | },
113 | }
114 |
115 | //以上是设置浏览器参数
116 | caps.AddChrome(chromeCaps)
117 | //打开 chrome 浏览器
118 | wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port))
119 | if err != nil {
120 | panic(err)
121 | }
122 | //wd.AddCookie(&selenium.Cookie{
123 | // Name: "__zp_stoken__",
124 | // Value: "__fid=c2b051dc22170700021a31d7606054c0; wt2=DLzLXzYb7kUaCJMioMFtrwMhk6eQlRn81wUGE0NkP6lHW1BcFpJSAPKv89ZXnViD933HW6_mmU-_734s4nYYbMg~~; _bl_uid=C4kRkrpp7k7s80ge9i5wrwst599d; acw_tc=0bdd34b616265981976475631e01e066c1d589888f9a880aaa421e2f892e21; lastCity=101020100; __zp_seo_uuid__=c18ed6d2-ab6f-4214-896a-c55a0d9bc586; __c=1626599142; __g=-; __l=r=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D_ycLarYk8_yn0W3nbwH-I2939KNJrnyYRn7Ahn43fZp1bMhDMqRI1cFTkozRfT9F%26wd%3D%26eqid%3Db762e6bb0008e8df0000000560f3eedf&l=%2Fwww.zhipin.com%2Fshanghai%2F&s=1&g=&s=3&friend_source=0; __a=11523211.1626525304.1626597684.1626599142.97.7.1.97; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1626594121,1626594699,1626597681,1626599142; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1626599142; __zp_stoken__=83bfcZ1IkQXl8FiE1czd0FAMGQnRYLjhrJWQoUQVFMUZMYWt2Y1RxQGEYKGYFR1gFPEx0BnQhKQIuAygUBl9TeUptCy9YbCw4VR1VYlYZOUdRbzpNUDRMPFZxAV0zMih4DG9kO30kVnYNQTo0",
125 | //})
126 | var count = 0
127 | for i := 1; ; i++ {
128 | urls := `https://www.zhipin.com/c` + strconv.Itoa(index) + `/?query=` + item + `&page=` + strconv.Itoa(i)
129 | fmt.Println(urls)
130 | //加载网页
131 | if err := wd.Get(urls); err != nil {
132 | panic(err)
133 | }
134 | time.Sleep(time.Second * 10)
135 | jsRt, err := wd.ExecuteScript("return document.readyState", nil)
136 | if err != nil {
137 | log.Println("exe js err", err)
138 | }
139 | fmt.Println("jsRt", jsRt)
140 | if jsRt != "complete" {
141 | log.Println(item + "网页加载未完成" + strconv.Itoa(i))
142 | time.Sleep(time.Second * 5)
143 | }
144 | // next disabled
145 | // 获取网站内容
146 | var frameHtml string
147 | frameHtml, err = wd.PageSource()
148 | if err != nil {
149 | log.Println(err)
150 | return
151 | }
152 | //解析 html 文件
153 | var doc *goquery.Document
154 | doc, err = goquery.NewDocumentFromReader(bytes.NewReader([]byte(frameHtml)))
155 | if err != nil {
156 | log.Println(err)
157 | return
158 | }
159 | var Workexperience, Education, rongzi, staffNumber string
160 | doc.Find("#main ul li").Each(func(i int, context *goquery.Selection) {
161 | jobName := trimSpase(context.Find("span[class=\"job-name\"]").Text())
162 | salary := trimSpase(context.Find("span[class=\"red\"]").Text())
163 | href, _ := context.Find("div[class=\"info-primary\"] a").Attr("href")
164 | company := trimSpase(context.Find("div[class=\"info-company\"] h3").Text())
165 | address := trimSpase(context.Find("span[class=\"job-area\"]").Text())
166 | worklimit, _ := context.Find("div[class=\"job-limit clearfix\"] p").Html()
167 | industry := trimSpase(context.Find("div[class=\"info-company\"] a[class=\"false-link\"]").Text())
168 |
169 | data1 := strings.Split(worklimit, "")
170 | for index, val := range data1 {
171 | if index == 0 {
172 | Workexperience = trimSpase(val)
173 | } else if index == 1 {
174 | Education = trimSpase(val)
175 | }
176 | }
177 |
178 | href = "https://www.zhipin.com" + href
179 | rognstuff, _ := context.Find("div[class=\"info-company\"] p").Html()
180 | data2 := strings.Split(rognstuff, "")
181 | for index, val := range data2 {
182 | if index == 1 {
183 | rongzi = trimSpase(val)
184 | } else if index == 2 {
185 | staffNumber = trimSpase(val)
186 | }
187 | }
188 | if jobName != "" {
189 | sp := SpBossJobs{
190 | JobName: jobName,
191 | Salary: salary,
192 | Href: href,
193 | JobType: item,
194 | City: val,
195 | CompanyName: company,
196 | CompanyAddress: address,
197 | WorkYears: Workexperience,
198 | Education: Education,
199 | CompanyLabel: industry,
200 | FinancingStage: rongzi,
201 | StaffNumber: staffNumber,
202 | }
203 | sp.Add()
204 | count++
205 | }
206 | })
207 | _, errs := wd.FindElement(selenium.ByCSSSelector, "a[class='next disabled']")
208 | if errs == nil {
209 | fmt.Println(item + "找到隐藏;抓取总数:" + strconv.Itoa(count))
210 | break
211 | }
212 | }
213 | wg.Done()
214 | wd.Quit() // 关闭浏览器
215 | }(item, index, val)
216 | }
217 | }
218 |
219 | wg.Wait()
220 | fmt.Println("结束")
221 | }
222 |
223 | func trimSpase(str string) string {
224 | strs := strings.Replace(str, " ", "", -1)
225 | strs = strings.Replace(strs, "\n", "", -1)
226 | return strs
227 | }
228 |
229 | // boss招聘信息表
230 | type SpBossJobs struct {
231 | Id uint `db:"id"`
232 | JobName string `db:"job_name"` //工作名称
233 | Salary string `db:"salary"` //薪资
234 | City string `db:"city"` //城市
235 | JobType string `db:"job_type"` //薪资
236 | Href string `db:"href"` //详情连接
237 | CompanyName string `db:"company_name"` //公司名称
238 | CompanyAddress string `db:"company_address"` //公司地址
239 | WorkYears string `db:"work_years"` //工作年限
240 | Education string `db:"education"` //学历要求
241 | CompanyLabel string `db:"company_label"` //公司所属行业
242 | FinancingStage string `db:"financing_stage"` //融资阶段
243 | StaffNumber string `db:"staff_number"` //公司规模-员工人数
244 | }
245 |
246 | //添加数据
247 | func (sp *SpBossJobs) Add() {
248 | err := DB.Create(sp).Error
249 | if err != nil {
250 | fmt.Println("创建失败")
251 | }
252 | }
253 |
254 | var userAgentList = []string{
255 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
256 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
257 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
258 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
259 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4",
260 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
261 | "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0",
262 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
263 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0",
264 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0",
265 | "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
266 | "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
267 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
268 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
269 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:54.0) Gecko/20100101 Firefox/54.0",
270 | "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
271 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
272 | "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
273 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0",
274 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393",
275 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36",
276 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/603.2.5 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.5",
277 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36",
278 | "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
279 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063",
280 | "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
281 | "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0",
282 | "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
283 | "Mozilla/5.0 (iPad; CPU OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.0 Mobile/14F89 Safari/602.1",
284 | "Mozilla/5.0 (Windows NT 6.1; rv:54.0) Gecko/20100101 Firefox/54.0",
285 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
286 | "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0",
287 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0",
288 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
289 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:54.0) Gecko/20100101 Firefox/54.0",
290 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
291 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36",
292 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36",
293 | "Mozilla/5.0 (X11; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0",
294 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36",
295 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
296 | "Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0",
297 | "Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko",
298 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30",
299 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0",
300 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
301 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
302 | "Mozilla/5.0 (Windows NT 5.1; rv:52.0) Gecko/20100101 Firefox/52.0",
303 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36",
304 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0",
305 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
306 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/58.0.3029.110 Chrome/58.0.3029.110 Safari/537.36",
307 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/603.2.5 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.5",
308 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36",
309 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36",
310 | "Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Firefox/45.0",
311 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
312 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0",
313 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
314 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
315 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
316 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36 OPR/46.0.2597.32",
317 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/59.0.3071.109 Chrome/59.0.3071.109 Safari/537.36",
318 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:53.0) Gecko/20100101 Firefox/53.0",
319 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
320 | "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko",
321 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 OPR/45.0.2552.898",
322 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
323 | "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
324 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
325 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
326 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36 OPR/46.0.2597.39",
327 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:54.0) Gecko/20100101 Firefox/54.0",
328 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/601.7.7 (KHTML, like Gecko) Version/9.1.2 Safari/601.7.7",
329 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/602.4.8 (KHTML, like Gecko) Version/10.0.3 Safari/602.4.8",
330 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
331 | "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko",
332 | "Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0",
333 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36",
334 | }
335 |
336 | func GetRandomUserAgent() string {
337 | r := rand.New(rand.NewSource(time.Now().UnixNano()))
338 | return userAgentList[r.Intn(len(userAgentList))]
339 | }
340 |
--------------------------------------------------------------------------------
/crawler/readMe.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------