├── .gitignore
├── README.md
├── comment_crawl.py
├── config.ini
├── poi_crawl.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | data/
2 | .idea/
3 | __pycache__/
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Ctrip_Crawler 携程景点爬虫
 2 | 
 3 | 
 4 | 
 5 | ![image-20210313235926448](https://irimskyblog.oss-cn-beijing.aliyuncs.com/content/20210313235929.png)
 6 | 
 7 | 
 8 | 
 9 | ![image-20210314000428177](https://irimskyblog.oss-cn-beijing.aliyuncs.com/content/20210314000432.png)
10 | 
11 | 
12 | 
13 | - 爬取的是 [**携程移动端**](https://m.ctrip.com/webapp/you/gspoi/sight/1.html?seo=1) 的数据（景点数据以及评论）
14 | 
15 | - 修改`config.ini`中的配置可以改变**目标城市**（默认北京）以及**爬取模式**
16 | 
17 |     ![](https://irimskyblog.oss-cn-beijing.aliyuncs.com/content/20210507163603.png)
18 | 
19 |     
20 | 
21 | - 爬取结果有两部分：`data/poi.csv`为**景点数据**，`data/comment/{id}.csv`为对应ID的景点的**评论数据**
22 | 
23 | - 评论内容的爬取有两种方法：
24 |   - 将`config.ini`中的`isCrawlComment`置为1，运行`poi_crawl.py`文件，在爬取 景点数据 的过程中爬取 评论数据 
25 |   - 将`config.ini`中的`isCrawlComment`置为0，运行`poi_crawl.py`文件，在爬取 景点数据 结束后运行再运行`comment_crawl.py`文件，获取 景点数据 中的所有景点的评论
26 |   
27 | - 每次运行前都会在同一文件夹下复制一份上一次爬取的景点结果的备份，名为`back.csv`
28 | 
29 | - 数据中 **价格**、**最低价格**为response中的数据，暂无参考价值
30 | 
31 | - 后面四种人群门票价格为**预估的销量加权平均价格**，如果有不同需求可以修改 `GetTicketPrice` 函数。（返回的数据为所有的门票价格）
32 | 
33 | - 景点数据中的**开放时间**与**优惠政策** 数据的格式为json格式
34 | 
35 | - 爬取的 评论数据 格式为：
36 | 
37 |     - **用户ID**
38 |     - **评论文本**
39 |     - **发送时间戳**
40 |     - **赞同数**
41 | 
42 | TODO： 
43 | 
44 | 后续可能会支持：
45 | 
46 | 输入城市名称自动获取城市编号 （√）
47 | 如果上次爬取过程中断可以从断点处开始爬取 （√）


--------------------------------------------------------------------------------
/comment_crawl.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import time
 3 | from requests import post
 4 | import csv
 5 | 
 6 | URL = 'https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList'  # 获取评论数据列表的URL
 7 | SizePerPage = 20    # 每页的数据量，最好不好随意改变
 8 | data = {"arg": {"resourceId": 229, "resourceType": 11, "pageIndex": 1, "pageSize": SizePerPage, "sortType": 3, "commentTagId": "0",
 9 |                 "collapseType": 1, "channelType": 7, "videoImageSize": "700_392", "starType": 0},
10 |         "head": {"cid": "09031065211914680477", "ctok": "", "cver": "1.0", "lang": "01", "sid": "8888", "syscode": "09",
11 |                  "auth": None, "extension": [{"name": "protocal", "value": "https"}]}, "contentType": "json"}
12 | 
13 | 
14 | def GetComments(Id, total):
15 |     f = open(f'data/comments/{Id}.csv', 'w', encoding='utf-8')
16 |     DATA = data.copy()
17 |     DATA['arg']['resourceId'] = Id
18 |     wr = csv.writer(f)
19 |     times = total // SizePerPage
20 |     for i in range(times):
21 |         DATA['arg']['pageIndex'] = i + 1
22 |         resp = post(URL, json=DATA)
23 |         comments = resp.json()['result']['items']
24 |         if not comments:
25 |             print(resp.json())
26 |             break
27 |         for comment in comments:
28 |             if comment.get('languageType', '') != "zh-cn" or len(comment['content']) < 10:
29 |                 continue
30 |             userId = comment.get('userInfo')
31 |             if userId:
32 |                 userId = userId.get('userId', 'null')
33 | 
34 |             rrr = [userId, comment['content'], comment['publishTime'], comment['usefulCount']]
35 |             wr.writerow(rrr)
36 |             print(comment['content'])
37 | 
38 |         time.sleep(1)
39 |         resp.close()
40 | 
41 |     f.close()
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     with open('data/pois.csv', 'r', encoding='utf-8') as f:
46 |         rd = csv.reader(f)
47 |         cnt = 0
48 |         flag = 0
49 |         for row in rd:
50 |             if cnt == 0:
51 |                 cnt = 1
52 |                 continue
53 |             ID = int(row[2])
54 |             print(ID, row[0])
55 |             GetComments(ID, int(row[11]))
56 | 


--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
1 | [poi]
2 | ; 城市
3 | city = 南京
4 | ;是否为中断后续写，1为是
5 | isRestart = 0
6 | ;是否在爬取过程中爬取评论，1为是
7 | isCrawlComment = 1
8 | 


--------------------------------------------------------------------------------
/poi_crawl.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import configparser
  4 | from requests import post
  5 | import csv
  6 | import logging
  7 | import json
  8 | import shutil
  9 | from bs4 import BeautifulSoup
 10 | from comment_crawl import GetComments
 11 | 
 12 | URL = 'https://m.ctrip.com/restapi/soa2/13342/json/getSightRecreationList'  # 获取景点列表数据
 13 | DetailURL = 'https://m.ctrip.com/restapi/soa2/18254/json/getPoiMoreDetail'  # 获取景点详情数据
 14 | TicketURL = 'https://m.ctrip.com/restapi/soa2/12530/json/getProductShelf'  # 获取票价数据
 15 | CityURL = 'https://m.ctrip.com/restapi/soa2/13342/json/SearchSightRecreation'  # 获取城市编号
 16 | isRestart = 0
 17 | isCrawlComment = 0
 18 | history = set()
 19 | 
 20 | data = {'fromChannel': 2,
 21 |         'index': 1,
 22 |         'count': 20,
 23 |         'districtId': 9,  # 可修改此处变更爬取城市
 24 |         'sortType': 0,
 25 |         'categoryId': 0,
 26 |         'lat': 0,
 27 |         'lon': 0,
 28 |         'showNewVersion': True,
 29 |         'locationFilterDistance': 300,
 30 |         'locationDistrictId': 0,
 31 |         'themeId': 0,
 32 |         'level2ThemeId': 0,
 33 |         'locationFilterId': 0,
 34 |         'locationFilterType': 0,
 35 |         'sightLevels': [],
 36 |         'ticketType': None,
 37 |         'commentScore': None,
 38 |         'showAgg': True,
 39 |         'fromNearby': '',
 40 |         'sourceFrom': 'sightlist',
 41 |         'themeName': '',
 42 |         'scene': '',
 43 |         'hiderank': '',
 44 |         'isLibertinism': False,
 45 |         'hideTop': False,
 46 |         'head': {'cid': '09031065211914680477',
 47 |                  'ctok': '',
 48 |                  'cver': '1.0',
 49 |                  'lang': '01',
 50 |                  'sid': '8888',
 51 |                  'syscode': '09',
 52 |                  'auth': '',
 53 |                  'xsid': '',
 54 |                  'extension': []}}
 55 | 
 56 | detail_data = {
 57 |     "poiId": 87211,
 58 |     "scene": "basic",
 59 |     "head": {
 60 |         "cid": "09031065211914680477",
 61 |         "ctok": "",
 62 |         "cver": "1.0",
 63 |         "lang": "01",
 64 |         "sid": "8888",
 65 |         "syscode": "09",
 66 |         "auth": "",
 67 |         "xsid": "",
 68 |         "extension": []
 69 |     }
 70 | }
 71 | 
 72 | ticket_data = {
 73 |     'head': {'cid': '09031065211914680477',
 74 |              'syscode': '09',
 75 |              'extension': [{'name': 'needNewStructureV2', 'value': 'true'},
 76 |                            {'name': 'crawlerKey',
 77 |                             'value': '0bd0f473f984aaf20ece34b437cce49e51d55d4baefe0ca56ac3559956a72691'},
 78 |                            {'name': 'fingerprintKeys',
 79 |                             'value': 'N1mwb6YgTeUNE4HEsYM9iqhWLYdte05EQLjd9WHYFsrb3IsNvUkj8Yo6jLMWgkvb1jfYUtEADwFmizljDYX7eGmw9ZvMcjhbvs1eoZYBgjO4yXY5XyN7vGDYDlwMpjQ3edSiFTYkYlYMNr9cEQ4wtTxZqYSajk1jLhW0YzYXYfoYnoiFTikZiMhj4Y9cEodiDlYldwO8yfpyFpYDzwhYpARsSwoTRfHEDcRtfyfoyahwT0JdfEtFJhOEgXR0YqoKaGymbi9kJnOJcBjkmRLbWPPxXOraSR18wq5E5fi0AecPjSYZNR1gwhLWm6wlgwNbRgzv8oYTOWA1JtHJFMjUSEgYN8jMgwQPvGSjsYzPEFDJ4GEk5eg5ykaW3AYnY0nE64JgMEZDe6OyStWOdY8YU7RdGwmHRUSENGRfoyoXyOLwc6J4BEBoESMwcqvTYQ0E5ojNSWgDWPbWaZYz4YbTYsUR90YTQWo6YdlYl0Ypoj4Uet1E1bW0Qeo9wbdeU8jPMYNsy8lEZbj65ElarpUj6zwZs'},
 80 |                            {'name': 'H5', 'value': 'H5'}]},
 81 |     'debug': False,
 82 |     'pageid': '214070',
 83 |     'contentType': 'json',
 84 |     'clientInfo': {'pageId': '214070',
 85 |                    'platformId': None,
 86 |                    'crnVersion': '2021-02-03 20:08:05',
 87 |                    'location': {'lat': '',
 88 |                                 'lon': '',
 89 |                                 'cityId': None,
 90 |                                 'locatedCityId': None,
 91 |                                 'districtId': None,
 92 |                                 'locatedDistrictId': None},
 93 |                    'locale': 'zh-CN',
 94 |                    'currency': 'CNY'},
 95 |     'spotid': 229,
 96 |     'poiId': 75595,
 97 |     'locale': 'zh-CN',
 98 |     'currency': 'CNY',
 99 |     'platformId': None,
100 |     'needFilter': True,
101 |     'resourceLimit': True}
102 | 
103 | city_data = {
104 |     'KeyWord': '',
105 |     'DistrictId': 1,
106 |     'CategoryId': 0,
107 |     'head': {
108 |         'cid': '09031065211914680477',
109 |         'ctok': '',
110 |         'cver': '1.0',
111 |         'lang': '01',
112 |         'sid': '8888',
113 |         'syscode': '09',
114 |         'auth': '',
115 |         'xsid': '',
116 |         'extension': []
117 |     }
118 | }
119 | 
120 | 
121 | def getCityID(city_name):
122 |     city_data['KeyWord'] = city_name
123 |     city_res = post(CityURL, json=city_data).json()['districtResult']
124 |     if len(city_res) == 0:
125 |         logging.error('城市名错误！无结果')
126 |         exit(0)
127 | 
128 |     elif len(city_res) >= 1:
129 |         cityID = city_res[0]['districtId']
130 |         if len(city_res) > 1:
131 |             logging.warning(f'多个相似城市名结果，注意确认\n')
132 |             print([i["districtName"] + "-" + i["name"] for i in city_res])
133 | 
134 |         print(f'目标城市：{city_res[0]["districtName"] + "-" + city_res[0]["name"]}\n')
135 |         return cityID
136 | 
137 | 
138 | def CalPrice(kv):
139 |     sumSale = 0
140 |     avg = 0
141 |     for k, v in kv:
142 |         sumSale += k
143 |     for k, v in kv:
144 |         try:
145 |             avg += k / sumSale * v
146 |         except:
147 |             continue
148 |     return avg
149 | 
150 | 
151 | def GetTicketPrice(spotid, poiId):
152 |     tdata = ticket_data.copy()
153 |     tdata['spotid'] = spotid
154 |     tdata['poiId'] = poiId
155 |     ticket_res = post(TicketURL, json=tdata)
156 |     dataa = ticket_res.json().get('data')
157 |     if not dataa:
158 |         return [0, 0, 0, 0]
159 |     shelfGroup = dataa.get('shelfGroups')
160 |     if not shelfGroup:
161 |         return [0, 0, 0, 0]
162 | 
163 |     chengr = []
164 |     laor = []
165 |     xues = []
166 |     ertong = []
167 |     lr = 0
168 |     xs = 0
169 |     cr = 0
170 |     et = 0
171 |     tt = 0
172 |     overall = []
173 |     maxsales = 0
174 |     for i in shelfGroup:
175 |         ticketGroups = i.get('ticketGroups')
176 |         if ticketGroups:
177 |             for j in ticketGroups:
178 |                 sales = j.get('yearlySale')
179 |                 maxsales = max(sales, maxsales)
180 |                 if j.get('mainTicket', False):
181 |                     tt = 1
182 |                     subTickets = j.get('subTicketGroups')
183 |                     if subTickets:
184 |                         for sub in subTickets:
185 |                             if not sub.get('subTicketGroupInfo') or not sub['subTicketGroupInfo'].get('priceInfo') or not sub['subTicketGroupInfo']['priceInfo'].get('price'):
186 |                                 continue
187 |                                 
188 |                             if '成人' in sub['subTicketGroupInfo']['name'] + sub['subTicketGroupInfo'].get('subName', ''):
189 |                                 chengr.append((sales, sub['subTicketGroupInfo']['priceInfo']['price']))
190 |                                 cr = 1
191 |                             if '老人' in sub['subTicketGroupInfo']['name'] + sub['subTicketGroupInfo'].get('subName', ''):
192 |                                 laor.append((sales, sub['subTicketGroupInfo']['priceInfo']['price']))
193 |                                 lr = 1
194 |                             if '学生' in sub['subTicketGroupInfo']['name'] + sub['subTicketGroupInfo'].get('subName', ''):
195 |                                 xues.append((sales, sub['subTicketGroupInfo']['priceInfo']['price']))
196 |                                 xs = 1
197 |                             if '儿童' in sub['subTicketGroupInfo']['name'] + sub['subTicketGroupInfo'].get('subName', ''):
198 |                                 ertong.append((sales, sub['subTicketGroupInfo']['priceInfo']['price']))
199 |                                 et = 1
200 |                             overall.append((sales, sub['subTicketGroupInfo']['priceInfo']['price']))
201 | 
202 |     if tt == 0:
203 |         for i in shelfGroup:
204 |             ticketGroups = i.get('ticketGroups')
205 |             if ticketGroups:
206 |                 for j in ticketGroups:
207 |                     sales = j.get('yearlySale')
208 |                     if not sales == maxsales:
209 |                         continue
210 | 
211 |                     subTickets = j.get('subTicketGroups')
212 |                     if subTickets:
213 |                         for sub in subTickets:
214 |                             if not sub.get('subTicketGroupInfo') or not sub['subTicketGroupInfo'].get('priceInfo') or not sub['subTicketGroupInfo']['priceInfo'].get('price'):
215 |                                 continue
216 |                                 
217 |                             if '票' not in sub['subTicketGroupInfo']['name'] + sub['subTicketGroupInfo'].get('subName',
218 |                                                                                                             '') \
219 |                                     or sub['subTicketGroupInfo']['priceInfo']['price'] > 300:
220 |                                 continue
221 | 
222 |                             if '成人' in sub['subTicketGroupInfo']['name'] + sub['subTicketGroupInfo'].get('subName',
223 |                                                                                                          ''):
224 |                                 chengr.append((sales, sub['subTicketGroupInfo']['priceInfo']['price']))
225 |                                 cr = 1
226 |                             if '老人' in sub['subTicketGroupInfo']['name'] + sub['subTicketGroupInfo'].get('subName',
227 |                                                                                                          ''):
228 |                                 laor.append((sales, sub['subTicketGroupInfo']['priceInfo']['price']))
229 |                                 lr = 1
230 |                             if '学生' in sub['subTicketGroupInfo']['name'] + sub['subTicketGroupInfo'].get('subName',
231 |                                                                                                          ''):
232 |                                 xues.append((sales, sub['subTicketGroupInfo']['priceInfo']['price']))
233 |                                 xs = 1
234 |                             if '儿童' in sub['subTicketGroupInfo']['name'] + sub['subTicketGroupInfo'].get('subName',
235 |                                                                                                          ''):
236 |                                 ertong.append((sales, sub['subTicketGroupInfo']['priceInfo']['price']))
237 |                                 et = 1
238 |                             overall.append((sales, sub['subTicketGroupInfo']['priceInfo']['price']))
239 |                     else:
240 |                         logging.warning('无sub')
241 | 
242 |     if cr == 0:
243 |         chengr = overall.copy()
244 |     if lr == 0:
245 |         laor = chengr.copy()
246 |     if xs == 0:
247 |         xues = chengr.copy()
248 |     if et == 0:
249 |         ertong = chengr.copy()
250 | 
251 |     crp = CalPrice(chengr)
252 |     xsp = CalPrice(xues)
253 |     lrp = CalPrice(laor)
254 |     etp = CalPrice(ertong)
255 | 
256 |     ticket_res.close()
257 |     return [crp, lrp, xsp, etp]
258 | 
259 | 
260 | def GetDetail(poiId):
261 |     ddata = detail_data.copy()
262 |     ddata['poiId'] = poiId
263 |     detail_res = post(DetailURL, json=ddata)
264 |     templateList = detail_res.json().get('templateList')
265 |     spendTime = ''
266 |     opentime = ''
267 |     desc = ''
268 |     preferential = {}
269 | 
270 |     if not templateList:
271 |         return [spendTime, opentime, desc, preferential]
272 | 
273 |     for i in templateList:
274 |         if i.get('templateName') == '温馨提示':
275 |             moduleList = i.get('moduleList')
276 |             if moduleList:
277 |                 for j in moduleList:
278 |                     if j.get('moduleName') == '开放时间':
279 |                         mod = j.get('poiOpenModule')
280 |                         spendTime = mod.get('playSpendTime')
281 |                         opentime = str(mod)
282 |                     elif j.get('moduleName') == '优待政策':
283 |                         mod = j.get('preferentialModule').get('policyInfoList')
284 |                         if mod:
285 |                             for l in mod:
286 |                                 cus = l.get('customDesc')
287 |                                 preferential[cus] = []
288 |                                 for k in l.get('policyDetail'):
289 |                                     lst = [k.get('limitation'), k.get('policyDesc')]
290 |                                     preferential[cus].append(lst)
291 | 
292 | 
293 |         elif i.get('templateName') == '信息介绍':
294 |             moduleList = i.get('moduleList')
295 |             if moduleList:
296 |                 for j in moduleList:
297 |                     if j.get('moduleName') == '图文详情':
298 |                         mod = j.get('introductionModule')
299 |                         desc = mod.get('introduction')
300 |                         soup = BeautifulSoup(desc, 'lxml')
301 |                         desc = soup.text
302 | 
303 |     detail_res.close()
304 |     return [spendTime, opentime, desc, preferential]
305 | 
306 | 
307 | if __name__ == '__main__':
308 |     if not os.path.exists('data/'):
309 |         os.mkdir('data')
310 |     if not os.path.exists('data/comments'):
311 |         os.mkdir('data/comments')
312 | 
313 |     conf = configparser.ConfigParser()
314 |     conf.read('config.ini', encoding="utf-8")
315 | 
316 |     try:
317 |         city = conf.get('poi', 'city')
318 |     except:
319 |         city = '北京'
320 | 
321 |     try:
322 |         isRestart = int(conf.get('poi', 'isRestart'))
323 |         isCrawlComment = int(conf.get('poi', 'isCrawlComment'))
324 |         shutil.copyfile('data/pois.csv', 'data/back.csv')  # 写前备份
325 |     except:
326 |         pass
327 | 
328 |     data['districtId'] = getCityID(city)    # 获取城市编号
329 | 
330 |     if isRestart == 1:
331 |         try:
332 |             with open('data/pois.csv', 'r', encoding='utf-8') as rf:
333 |                 rd = csv.reader(rf)
334 |                 cnt = 0
335 |                 for r in rd:
336 |                     cnt += 1
337 |                     if cnt == 1:
338 |                         continue
339 |                     history.add(int(r[2]))
340 | 
341 |             f = open('data/pois.csv', 'a', encoding='utf-8')
342 | 
343 |         except FileNotFoundError:
344 |             logging.error('无法续写，文件不存在')
345 |             isRestart = 1
346 |             f = open('data/pois.csv', 'w', encoding='utf-8')
347 | 
348 |     else:
349 |         f = open('data/pois.csv', 'w', encoding='utf-8')
350 | 
351 |     wr = csv.writer(f)
352 |     if isRestart == 0:
353 |         wr.writerow(['名称', '英文名', 'id', 'poiID', '经度', '维度', '标签', '特色', '价格', '最低价格', '评价分数',
354 |                      '评论数量', '封面图片', '成人票价格', '老人票价格', '学生票价格', '儿童票价格', '建议游玩', '开放时间', '介绍', '优待政策'])
355 | 
356 |     # 最大页数
357 |     for page in range(1, 5000):
358 |         print(f'开始爬取第{page}页')
359 |         data['index'] = page
360 |         poiListRes = post(URL, json=data)
361 |         if not poiListRes.json().get('result'):
362 |             print(poiListRes.json())
363 |             break
364 |         poiList = poiListRes.json()['result']['sightRecreationList']
365 |         if len(poiList) == 0:  # 如果此页没有数据，说明爬取结束或者出错了
366 |             break
367 |         for poi in poiList:
368 |             row = []
369 |             ID = poi.get('id', '')
370 |             print(poi.get('name'), end='')
371 |             if isRestart and int(ID) in history:
372 |                 print(' 已存在')
373 |                 continue
374 |             print()
375 |             row.append(poi.get('name', ''))  # 名称
376 |             row.append(poi.get('eName', ''))  # 英文名
377 |             row.append(ID)  # id
378 | 
379 |             poiID = poi.get('poiId', '')
380 |             row.append(poiID)  # poiId
381 |             row.append(poi['coordInfo']['gDLat'])  # 经度
382 |             row.append(poi['coordInfo']['gDLon'])  # 维度
383 | 
384 |             tagSet = set()
385 |             tagSet.update(poi.get('resourceTags', []))
386 |             tagSet.update(poi.get('tagNameList', []))
387 |             tagSet.update(poi.get('themeTags', []))
388 |             tagStr = '|'.join(tagSet)
389 |             row.append(tagStr)  # 标签
390 | 
391 |             row.append('|'.join(poi.get('shortFeatures', [])))  # 特色
392 | 
393 |             row.append(poi.get('price', 0))  # 价格
394 |             row.append(poi.get('displayMinPrice', 0))  # 最低价格
395 | 
396 |             commentScore = poi.get('commentScore')
397 |             if not commentScore:
398 |                 commentScore = 0.0
399 |             row.append(commentScore)  # 评价分数
400 |             commentCount = poi.get('commentCount')
401 |             if not commentCount:
402 |                 commentCount = 0
403 |             row.append(commentCount)  # 评论数量
404 | 
405 |             row.append(poi.get('coverImageUrl', ''))  # 封面
406 |             row += GetTicketPrice(spotid=ID, poiId=poiID)
407 |             row += GetDetail(poiId=poiID)
408 |             ertong = row[-1].get('儿童')
409 |             yhbj = 0
410 |             free = 0
411 |             if ertong:
412 |                 for i in ertong:
413 |                     if '半价' in i[1] or '优惠' in i[1]:
414 |                         yhbj = 1
415 |                     elif i[1] == '免费':
416 |                         free = 1
417 |                 if yhbj == 0 and free == 1:
418 |                     row[-5] = 0  # 儿童票免费
419 | 
420 |             wr.writerow(row)
421 | 
422 |             time.sleep(1)
423 | 
424 |             if isCrawlComment == 1: # 爬取评论
425 |                 try:
426 |                     print(f'开始爬取{poi.get("name", "")}评论')
427 |                     GetComments(ID, commentCount)
428 |                 except Exception as e:
429 |                     print(e)
430 |                     logging.error(f'爬取{poi.get("name", "")}评论错误！')
431 | 
432 |         time.sleep(2)
433 | 
434 |     f.close()
435 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests==2.24.0
2 | beautifulsoup4==4.9.3
3 | 


--------------------------------------------------------------------------------