├── README.md ├── jd_comment.py ├── jd_pjdeps.py └── jdspider.py /README.md: -------------------------------------------------------------------------------- 1 | # auto_comment 2 | 3 | ## 已不可用,停止维护(2024/09/03) 4 | 5 | 可用[项目](https://github.com/6dylan6/jdpro)下的评价(jd_AutoEval.js); 6 | 7 | ## JD自动评价 8 | 9 | 支持评价晒单(带两张图),追评,服务评价,支持AI生成评价内容 10 | 11 | 青龙拉库命令 ql repo https://github.com/6dylan6/auto_comment.git "jd_" "" "jdspider" 12 | 13 | 如果运行报依赖错误,运行评价依赖安装任务,没有问题不要运行 14 | 15 | 浏览器登录抓取CK(PC端CK),添加变量PC_COOKIE,每次运行评价10个订单 16 | 17 | www的那个地址抓CK,登录后F12点到network,不要用命令document.cookie抓,会不完整,找带cookie的请求复制(其实只复制thor=xxx这串就行) 18 | 19 | 有问题欢迎提pr、issue 20 | 21 | ## 更新日志: 22 | 23 | 2022/11/6 新增多账号; 报错不停止运行;带两个图评价晒单;倒序评价,优先比较老的订单 24 | 25 | 2022/11/16 修复有些订单匹配不到pid;服务评价报错 26 | 27 | 2022/11/20 随机选取评价图片 28 | 29 | 2023/1/7 正常运行,有问题的多跑几次看看 30 | 31 | 2023/3/14 正常使用 32 | 33 | 2023/3/28 修复评价内容乱码 34 | 35 | 2023/4/19 添加gpt生成评价内容,当配置OPENAI_API_KEY环境变量启用,具体用法看注释(Cp0204的pr) 36 | 37 | 2023/4/22 移除openai依赖,优化代理方式,优化日志显示(Cp0204的pr) 38 | 39 | 2023/6/27 gpt错误跳出(Cp0204的pr) 40 | 41 | 2023/7/28 修复获取不到评价信息 42 | 43 | 2023/9/17 修复收集不到商品评价 44 | 45 | ## 运行例图(评价晒单基本是优质评价,评价有奖的订单可以获得奖励!!!): 46 | 47 | ![image](https://i.postimg.cc/KznsXxfN/1.png) 48 | -------------------------------------------------------------------------------- /jd_comment.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 自动带图评价、追评、服务评价,需电脑端CK 3 | # @Time : 2022/11/4 4 | # @Author : @qiu-lzsnmb and @Dimlitter @Dylan 5 | # @File : auto_comment.py 6 | # 多账号评价,异常处理 7 | # 2023/3/28 修复乱码 8 | # 2023/4/19 当存在OPENAI_API_KEY环境变量时,启用AI评价;网络支持:1.环境变量OPENAI_API_BASE_URL反向代理、2.ProxyUrl代理、3.环境支持直连; 9 | ''' 10 | new Env('自动评价'); 11 | 8 8 2 1 * https://raw.githubusercontent.com/6dylan6/auto_comment/main/jd_comment.py 12 | ''' 13 | import argparse 14 | import copy 15 | import logging 16 | import os 17 | import random 18 | import sys 19 | import time,re 20 | import urllib.parse 21 | #import notify 22 | 23 | try: 24 | import jieba # just for linting 25 | import jieba.analyse 26 | import requests 27 | #import yaml 28 | from lxml import etree 29 | import zhon.hanzi 30 | 31 | except: 32 | print('解决依赖问题...稍等') 33 | os.system('pip3 install lxml &> /dev/null') 34 | os.system('pip3 install jieba &> /dev/null') 35 | os.system('pip3 install zhon &> /dev/null') 36 | os.system('pip3 install requests &> /dev/null') 37 | os.system('pip3 install urllib3==1.25.11 &> /dev/null') 38 | import jieba 39 | import jieba.analyse 40 | #import yaml 41 | from lxml import etree 42 | import requests 43 | import urllib.parse 44 | import jdspider 45 | # constants 46 | CONFIG_PATH = './config.yml' 47 | USER_CONFIG_PATH = './config.user.yml' 48 | ORDINARY_SLEEP_SEC = 10 49 | SUNBW_SLEEP_SEC = 5 50 | REVIEW_SLEEP_SEC = 10 51 | SERVICE_RATING_SLEEP_SEC = 15 52 | 53 | ## logging with styles 54 | ## Reference: https://stackoverflow.com/a/384125/12002560 55 | _COLORS = { 56 | 'black': 0, 57 | 'red': 1, 58 | 'green': 2, 59 | 'yellow': 3, 60 | 'blue': 4, 61 | 'magenta': 5, 62 | 'cyan': 6, 63 | 'white': 7 64 | } 65 | 66 | _RESET_SEQ = '\033[0m' 67 | _COLOR_SEQ = '\033[1;%dm' 68 | _BOLD_SEQ = '\033[1m' 69 | _ITALIC_SEQ = '\033[3m' 70 | _UNDERLINED_SEQ = '\033[4m' 71 | 72 | _FORMATTER_COLORS = { 73 | 'DEBUG': _COLORS['blue'], 74 | 'INFO': _COLORS['green'], 75 | 'WARNING': _COLORS['yellow'], 76 | 'ERROR': _COLORS['red'], 77 | 'CRITICAL': _COLORS['red'] 78 | } 79 | 80 | def format_style_seqs(msg, use_style=True): 81 | if use_style: 82 | msg = msg.replace('$RESET', _RESET_SEQ) 83 | msg = msg.replace('$BOLD', _BOLD_SEQ) 84 | msg = msg.replace('$ITALIC', _ITALIC_SEQ) 85 | msg = msg.replace('$UNDERLINED', _UNDERLINED_SEQ) 86 | else: 87 | msg = msg.replace('$RESET', '') 88 | msg = msg.replace('$BOLD', '') 89 | msg = msg.replace('$ITALIC', '') 90 | msg = msg.replace('$UNDERLINED', '') 91 | 92 | class StyleFormatter(logging.Formatter): 93 | def __init__(self, fmt=None, datefmt=None, use_style=True): 94 | logging.Formatter.__init__(self, fmt, datefmt) 95 | self.use_style = use_style 96 | 97 | def format(self, record): 98 | rcd = copy.copy(record) 99 | levelname = rcd.levelname 100 | if self.use_style and levelname in _FORMATTER_COLORS: 101 | levelname_with_color = '%s%s%s' % ( 102 | _COLOR_SEQ % (30 + _FORMATTER_COLORS[levelname]), 103 | levelname, _RESET_SEQ) 104 | rcd.levelname = levelname_with_color 105 | return logging.Formatter.format(self, rcd) 106 | 107 | 108 | # 评价生成 109 | def generation(pname, _class=0, _type=1, opts=None): 110 | if "OPENAI_API_KEY" in os.environ: 111 | return generation_ai(pname, opts) 112 | opts = opts or {} 113 | items = ['商品名'] 114 | items.clear() 115 | items.append(pname) 116 | opts['logger'].debug('Items: %s', items) 117 | loop_times = len(items) 118 | opts['logger'].debug('Total loop times: %d', loop_times) 119 | for i, item in enumerate(items): 120 | opts['logger'].debug('Loop: %d / %d', i + 1, loop_times) 121 | opts['logger'].debug('Current item: %s', item) 122 | spider = jdspider.JDSpider(item,ck) 123 | opts['logger'].debug('Successfully created a JDSpider instance') 124 | # 增加对增值服务的评价鉴别 125 | if "赠品" in pname or "非实物" in pname or "京服无忧" in pname or "权益" in pname or "非卖品" in pname or "增值服务" in pname: 126 | result = [ 127 | "赠品挺好的。", 128 | "很贴心,能有这样免费赠送的赠品!", 129 | "正好想着要不要多买一份增值服务,没想到还有这样的赠品。", 130 | "赠品正合我意。", 131 | "赠品很好,挺不错的。", 132 | "本来买了产品以后还有些担心。但是看到赠品以后就放心了。", 133 | "不论品质如何,至少说明店家对客的态度很好!", 134 | "我很喜欢这些商品!", 135 | "我对于商品的附加值很在乎,恰好这些赠品为这件商品提供了这样的的附加值,这令我很满意。" 136 | "感觉现在的网购环境环境越来越好了,以前网购的时候还没有过么多贴心的赠品和增值服务", 137 | "第一次用京东,被这种赠品和增值服物的良好态度感动到了。", 138 | "赠品还行。" 139 | ] 140 | else: 141 | result = spider.getData(4, 3) # 这里可以自己改 142 | opts['logger'].debug('Result: %s', result) 143 | 144 | # class 0是评价 1是提取id 145 | try: 146 | name = jieba.analyse.textrank(pname, topK=5, allowPOS='n')[0] 147 | opts['logger'].debug('Name: %s', name) 148 | except Exception as e: 149 | # opts['logger'].warning( 150 | # 'jieba textrank analysis error: %s, name fallback to "宝贝"', e) 151 | name = "宝贝" 152 | if _class == 1: 153 | opts['logger'].debug('_class is 1. Directly return name') 154 | return name 155 | else: 156 | if _type == 1: 157 | num = 6 158 | elif _type == 0: 159 | num = 4 160 | num = min(num, len(result)) 161 | # use `.join()` to improve efficiency 162 | comments = ''.join(random.sample(result, num)) 163 | opts['logger'].debug('_type: %d', _type) 164 | opts['logger'].debug('num: %d', num) 165 | opts['logger'].debug('Raw comments: %s', comments) 166 | 167 | return 5, comments.replace("$", name) 168 | 169 | # ChatGPT评价生成 170 | def generation_ai(pname, _class=0, _type=1, opts=None): 171 | # 当存在 OPENAI_API_BASE_URL 时,使用反向代理 172 | api_base_url = os.environ.get("OPENAI_API_BASE_URL", "https://api.openai.com") 173 | api_key = os.environ["OPENAI_API_KEY"] 174 | prompt = f"{pname} 写一段此商品的评价,简短、口语化" 175 | response = requests.post( 176 | f"{api_base_url}/v1/chat/completions", 177 | headers={ 178 | "Content-Type": "application/json", 179 | "Authorization": f"Bearer {api_key}", 180 | }, 181 | json={ 182 | "model": "gpt-3.5-turbo", 183 | "messages": [{"role": "user", "content": prompt}], 184 | "max_tokens": 1024, 185 | } 186 | ) 187 | response_text = response.json() 188 | if "error" in response_text: 189 | print("\nOpenAI API 调用错误:\n", response_text["error"]["message"]) 190 | exit() 191 | else: 192 | return 5, response_text["choices"][0]["message"]["content"].strip() 193 | 194 | 195 | # 查询全部评价 196 | def all_evaluate(opts=None): 197 | try: 198 | opts = opts or {} 199 | N = {} 200 | url = 'https://club.jd.com/myJdcomments/myJdcomment.action?' 201 | opts['logger'].debug('URL: %s', url) 202 | opts['logger'].debug('Fetching website data') 203 | req = requests.get(url, headers=headers) 204 | opts['logger'].debug( 205 | 'Successfully accepted the response with status code %d', 206 | req.status_code) 207 | if not req.ok: 208 | opts['logger'].warning( 209 | 'Status code of the response is %d, not 200', req.status_code) 210 | req_et = etree.HTML(req.text) 211 | opts['logger'].debug('Successfully parsed an XML tree') 212 | evaluate_data = req_et.xpath('//*[@id="main"]/div[2]/div[1]/div/ul/li') 213 | loop_times = len(evaluate_data) 214 | opts['logger'].debug('Total loop times: %d', loop_times) 215 | for i, ev in enumerate(evaluate_data): 216 | opts['logger'].debug('Loop: %d / %d', i + 1, loop_times) 217 | na = ev.xpath('a/text()')[0] 218 | opts['logger'].debug('na: %s', na) 219 | #print(ev.xpath('b/text()')[0]) 220 | try: 221 | num = ev.xpath('b/text()')[0] 222 | opts['logger'].debug('num: %s', num) 223 | except IndexError: 224 | #opts['logger'].warning('Can\'t find num content in XPath, fallback to 0') 225 | num = 0 226 | N[na] = int(num) 227 | return N 228 | except Exception as e: 229 | print (e) 230 | 231 | # 评价晒单 232 | def sunbw(N, opts=None): 233 | try: 234 | opts = opts or {} 235 | Order_data = [] 236 | req_et = [] 237 | loop_times = 2 238 | opts['logger'].debug('Fetching website data') 239 | opts['logger'].debug('Total loop times: %d', loop_times) 240 | for i in range(loop_times): 241 | url = (f'https://club.jd.com/myJdcomments/myJdcomment.action?sort=0&' 242 | f'page={i + 1}') 243 | opts['logger'].debug('URL: %s', url) 244 | req = requests.get(url, headers=headers) 245 | opts['logger'].debug( 246 | 'Successfully accepted the response with status code %d', 247 | req.status_code) 248 | if not req.ok: 249 | opts['logger'].warning( 250 | 'Status code of the response is %d, not 200', req.status_code) 251 | req_et.append(etree.HTML(req.text)) 252 | opts['logger'].debug('Successfully parsed an XML tree') 253 | opts['logger'].debug('Fetching data from XML trees') 254 | opts['logger'].debug('Total loop times: %d', loop_times) 255 | for idx, i in enumerate(req_et): 256 | opts['logger'].debug('Loop: %d / %d', idx + 1, loop_times) 257 | opts['logger'].debug('Fetching order data in the default XPath') 258 | elems = i.xpath( 259 | '//*[@id="main"]/div[2]/div[2]/table/tbody') 260 | opts['logger'].debug('Count of fetched order data: %d', len(elems)) 261 | Order_data.extend(elems) 262 | #if len(Order_data) != N['待评价订单']: 263 | # opts['logger'].debug( 264 | # 'Count of fetched order data doesn\'t equal N["待评价订单"]') 265 | # opts['logger'].debug('Clear the list Order_data') 266 | # Order_data = [] 267 | # opts['logger'].debug('Total loop times: %d', loop_times) 268 | # for idx, i in enumerate(req_et): 269 | # opts['logger'].debug('Loop: %d / %d', idx + 1, loop_times) 270 | # opts['logger'].debug('Fetching order data in another XPath') 271 | # elems = i.xpath( 272 | # '//div[2]/table') 273 | # opts['logger'].debug('Count of fetched order data: %d', len(elems)) 274 | # Order_data.extend(elems) 275 | 276 | opts['logger'].info(f"当前共有{N['待评价订单']}个评价。") 277 | opts['logger'].debug('Commenting on items') 278 | for i, Order in enumerate(reversed(Order_data)): 279 | if i + 1 > 10: 280 | opts['logger'].info(f'\n已评价10个订单,跳出') 281 | break 282 | try: 283 | oid = Order.xpath('tr[@class="tr-th"]/td/span[3]/a/text()')[0] 284 | opts['logger'].debug('oid: %s', oid) 285 | oname_data = Order.xpath( 286 | 'tr[@class="tr-bd"]/td[1]/div[1]/div[2]/div/a/text()') 287 | opts['logger'].debug('oname_data: %s', oname_data) 288 | pid_data = Order.xpath( 289 | 'tr[@class="tr-bd"]/td[1]/div[1]/div[2]/div/a/@href') 290 | opts['logger'].debug('pid_data: %s', pid_data) 291 | except IndexError: 292 | opts['logger'].warning(f"第{i + 1}个订单未查找到商品,跳过。") 293 | continue 294 | loop_times1 = min(len(oname_data), len(pid_data)) 295 | opts['logger'].debug('Commenting on orders') 296 | opts['logger'].debug('Total loop times: %d', loop_times1) 297 | idx = 0 298 | for oname, pid in zip(oname_data, pid_data): 299 | pid = re.findall('(?<=jd.com/)[(0-9)*?]+',pid)[0] 300 | opts['logger'].info(f'\n开始第{i+1}个订单: {oid}') 301 | opts['logger'].debug('pid: %s', pid) 302 | opts['logger'].debug('oid: %s', oid) 303 | xing, Str = generation(oname, opts=opts) 304 | opts['logger'].info(f'评价信息:{xing}星 ' + Str) 305 | # 获取图片 306 | url1 = (f'https://club.jd.com/discussion/getProductPageImageCommentList' 307 | f'.action?productId={pid}') 308 | opts['logger'].debug('Fetching images using the default URL') 309 | opts['logger'].debug('URL: %s', url1) 310 | req1 = requests.get(url1, headers=headers) 311 | opts['logger'].debug( 312 | 'Successfully accepted the response with status code %d', 313 | req1.status_code) 314 | if not req1.ok: 315 | opts['logger'].warning( 316 | 'Status code of the response is %d, not 200', req1.status_code) 317 | imgdata = req1.json() 318 | opts['logger'].debug('Image data: %s', imgdata) 319 | if imgdata["imgComments"]["imgCommentCount"] > 10: 320 | pnum = random.randint(2,int(imgdata["imgComments"]["imgCommentCount"]/10)+1) 321 | opts['logger'].debug('Count of fetched image comments is 0') 322 | opts['logger'].debug('Fetching images using another URL') 323 | url1 = (f'https://club.jd.com/discussion/getProductPageImage' 324 | f'CommentList.action?productId={pid}&page={pnum}') 325 | opts['logger'].debug('URL: %s', url1) 326 | time.sleep(1) 327 | req1 = requests.get(url1, headers=headers) 328 | opts['logger'].debug( 329 | 'Successfully accepted the response with status code %d', 330 | req1.status_code) 331 | if not req1.ok: 332 | opts['logger'].warning( 333 | 'Status code of the response is %d, not 200', 334 | req1.status_code) 335 | imgdata2 = req1.json() 336 | opts['logger'].debug('Image data: %s', imgdata2) 337 | try: 338 | imgurl = random.choice(imgdata["imgComments"]["imgList"])["imageUrl"] 339 | if ('imgdata2' in dir()): 340 | imgurl2 = random.choice(imgdata2["imgComments"]["imgList"])["imageUrl"] 341 | else: 342 | imgurl2 = '' 343 | except Exception: 344 | imgurl = '' 345 | imgurl2 = '' 346 | opts['logger'].debug('Image URL: %s', imgurl) 347 | 348 | opts['logger'].info(f'图片:{imgurl + "," + imgurl2}') 349 | # 提交晒单 350 | opts['logger'].debug('Preparing for commenting') 351 | url2 = "https://club.jd.com/myJdcomments/saveProductComment.action" 352 | opts['logger'].debug('URL: %s', url2) 353 | headers['Referer'] = ('https://club.jd.com/myJdcomments/orderVoucher.action') 354 | headers['Origin'] = 'https://club.jd.com' 355 | headers['Content-Type'] = 'application/x-www-form-urlencoded' 356 | opts['logger'].debug('New header for this request: %s', headers) 357 | data = { 358 | 'orderId': oid, 359 | 'productId': pid, 360 | 'score': str(xing), # 商品几星 361 | 'content': urllib.parse.quote(Str), # 评价内容 362 | 'imgs': imgurl + ',' + imgurl2, 363 | 'saveStatus': 2, 364 | 'anonymousFlag': 1 365 | } 366 | opts['logger'].debug('Data: %s', data) 367 | if not opts.get('dry_run'): 368 | opts['logger'].debug('Sending comment request') 369 | pj2 = requests.post(url2, headers=headers, data=data) 370 | if pj2.ok: 371 | opts['logger'].info(f'提交成功!') 372 | else: 373 | opts['logger'].debug( 374 | 'Skipped sending comment request in dry run') 375 | opts['logger'].debug('Sleep time (s): %.1f', ORDINARY_SLEEP_SEC) 376 | time.sleep(ORDINARY_SLEEP_SEC) 377 | idx += 1 378 | N['待评价订单'] -= 1 379 | return N 380 | except Exception as e: 381 | print (e) 382 | 383 | # 追评 384 | def review(N, opts=None): 385 | try: 386 | opts = opts or {} 387 | req_et = [] 388 | Order_data = [] 389 | loop_times = 2 390 | opts['logger'].debug('Fetching website data') 391 | opts['logger'].debug('Total loop times: %d', loop_times) 392 | for i in range(loop_times): 393 | opts['logger'].debug('Loop: %d / %d', i + 1, loop_times) 394 | url = (f"https://club.jd.com/myJdcomments/myJdcomment.action?sort=3" 395 | f"&page={i + 1}") 396 | opts['logger'].debug('URL: %s', url) 397 | req = requests.get(url, headers=headers) 398 | opts['logger'].debug( 399 | 'Successfully accepted the response with status code %d', 400 | req.status_code) 401 | if not req.ok: 402 | opts['logger'].warning( 403 | 'Status code of the response is %d, not 200', req.status_code) 404 | req_et.append(etree.HTML(req.text)) 405 | opts['logger'].debug('Successfully parsed an XML tree') 406 | opts['logger'].debug('Fetching data from XML trees') 407 | opts['logger'].debug('Total loop times: %d', loop_times) 408 | for idx, i in enumerate(req_et): 409 | opts['logger'].debug('Loop: %d / %d', idx + 1, loop_times) 410 | opts['logger'].debug('Fetching order data in the default XPath') 411 | elems = i.xpath( 412 | '//*[@id="main"]/div[2]/div[2]/table/tbody/tr[@class="tr-bd"]') 413 | opts['logger'].debug('Count of fetched order data: %d', len(elems)) 414 | Order_data.extend(elems) 415 | #if len(Order_data) != N['待追评']: 416 | # opts['logger'].debug( 417 | # 'Count of fetched order data doesn\'t equal N["待追评"]') 418 | # # NOTE: Need them? 419 | # # opts['logger'].debug('Clear the list Order_data') 420 | # # Order_data = [] 421 | # opts['logger'].debug('Total loop times: %d', loop_times) 422 | # for idx, i in enumerate(req_et): 423 | # opts['logger'].debug('Loop: %d / %d', idx + 1, loop_times) 424 | # opts['logger'].debug('Fetching order data in another XPath') 425 | # elems = i.xpath( 426 | # '//*[@id="main"]/div[2]/div[2]/table/tbody/tr[@class="tr-bd"]') 427 | # opts['logger'].debug('Count of fetched order data: %d', len(elems)) 428 | # Order_data.extend(elems) 429 | opts['logger'].info(f"当前共有{N['待追评']}个需要追评。") 430 | opts['logger'].debug('Commenting on items') 431 | for i, Order in enumerate(reversed(Order_data)): 432 | if i + 1 > 10: 433 | opts['logger'].info(f'\n已评价10个订单,跳出') 434 | break 435 | oname = Order.xpath('td[1]/div/div[2]/div/a/text()')[0] 436 | _id = Order.xpath('td[3]/div/a/@href')[0] 437 | opts['logger'].debug('_id: %s', _id) 438 | url1 = ("https://club.jd.com/afterComments/" 439 | "saveAfterCommentAndShowOrder.action") 440 | opts['logger'].debug('URL: %s', url1) 441 | pid, oid = _id.replace( 442 | 'http://club.jd.com/afterComments/productPublish.action?sku=', 443 | "").split('&orderId=') 444 | opts['logger'].debug('pid: %s', pid) 445 | opts['logger'].debug('oid: %s', oid) 446 | opts['logger'].info(f'\n开始第{i+1}个订单: {oid}') 447 | _, context = generation(oname, _type=0, opts=opts) 448 | opts['logger'].info(f'追评内容:{context}') 449 | data1 = { 450 | 'orderId': oid, 451 | 'productId': pid, 452 | 'content': urllib.parse.quote(context), 453 | 'anonymousFlag': 1, 454 | 'score': 5 455 | } 456 | opts['logger'].debug('Data: %s', data1) 457 | if not opts.get('dry_run'): 458 | opts['logger'].debug('Sending comment request') 459 | req_url1 = requests.post(url1, headers=headers, data=data1) 460 | if req_url1.ok: 461 | opts['logger'].info(f'提交成功!') 462 | else: 463 | opts['logger'].debug('Skipped sending comment request in dry run') 464 | opts['logger'].debug('Sleep time (s): %.1f', REVIEW_SLEEP_SEC) 465 | time.sleep(REVIEW_SLEEP_SEC) 466 | N['待追评'] -= 1 467 | return N 468 | except Exception as e: 469 | print (e) 470 | opts['logger'].info(e) 471 | 472 | # 服务评价 473 | def Service_rating(N, opts=None): 474 | try: 475 | opts = opts or {} 476 | Order_data = [] 477 | req_et = [] 478 | loop_times = 2 479 | opts['logger'].debug('Fetching website data') 480 | opts['logger'].debug('Total loop times: %d', loop_times) 481 | for i in range(loop_times): 482 | opts['logger'].debug('Loop: %d / %d', i + 1, loop_times) 483 | url = (f"https://club.jd.com/myJdcomments/myJdcomment.action?sort=4" 484 | f"&page={i + 1}") 485 | opts['logger'].debug('URL: %s', url) 486 | req = requests.get(url, headers=headers) 487 | opts['logger'].debug( 488 | 'Successfully accepted the response with status code %d', 489 | req.status_code) 490 | if not req.ok: 491 | opts['logger'].warning( 492 | 'Status code of the response is %d, not 200', req.status_code) 493 | req_et.append(etree.HTML(req.text)) 494 | opts['logger'].debug('Successfully parsed an XML tree') 495 | opts['logger'].debug('Fetching data from XML trees') 496 | opts['logger'].debug('Total loop times: %d', loop_times) 497 | for idx, i in enumerate(req_et): 498 | opts['logger'].debug('Loop: %d / %d', idx + 1, loop_times) 499 | opts['logger'].debug('Fetching order data in the default XPath') 500 | elems = i.xpath( 501 | '//*[@id="main"]/div[2]/div[2]/table/tbody/tr[@class="tr-th"]') 502 | opts['logger'].debug('Count of fetched order data: %d', len(elems)) 503 | Order_data.extend(elems) 504 | # if len(Order_data) != N['服务评价']: 505 | # opts['logger'].debug( 506 | # 'Count of fetched order data doesn\'t equal N["服务评价"]') 507 | # opts['logger'].debug('Clear the list Order_data') 508 | # Order_data = [] 509 | # opts['logger'].debug('Total loop times: %d', loop_times) 510 | # for idx, i in enumerate(req_et): 511 | # opts['logger'].debug('Loop: %d / %d', idx + 1, loop_times) 512 | # opts['logger'].debug('Fetching order data in another XPath') 513 | # elems = i.xpath( 514 | # '//*[@id="main"]/div[2]/div[2]/table/tr[@class="tr-bd"]') 515 | # opts['logger'].debug('Count of fetched order data: %d', len(elems)) 516 | # Order_data.extend(elems) 517 | opts['logger'].info(f"当前共有{N['服务评价']}个需要服务评价。") 518 | opts['logger'].debug('Commenting on items') 519 | for i, Order in enumerate(reversed(Order_data)): 520 | if i + 1 > 10: 521 | opts['logger'].info(f'\n已评价10个订单,跳出') 522 | break 523 | #oname = Order.xpath('td[1]/div[1]/div[2]/div/a/text()')[0] 524 | oid = Order.xpath('td[1]/span[3]/a/text()')[0] 525 | opts['logger'].info(f'\n开始第{i+1}个订单: {oid}') 526 | opts['logger'].debug('oid: %s', oid) 527 | url1 = (f'https://club.jd.com/myJdcomments/insertRestSurvey.action' 528 | f'?voteid=145&ruleid={oid}') 529 | opts['logger'].debug('URL: %s', url1) 530 | data1 = { 531 | 'oid': oid, 532 | 'gid': '32', 533 | 'sid': '186194', 534 | 'stid': '0', 535 | 'tags': '', 536 | 'ro591': f'591A{random.randint(4, 5)}', # 商品符合度 537 | 'ro592': f'592A{random.randint(4, 5)}', # 店家服务态度 538 | 'ro593': f'593A{random.randint(4, 5)}', # 快递配送速度 539 | 'ro899': f'899A{random.randint(4, 5)}', # 快递员服务 540 | 'ro900': f'900A{random.randint(4, 5)}' # 快递员服务 541 | } 542 | opts['logger'].debug('Data: %s', data1) 543 | if not opts.get('dry_run'): 544 | opts['logger'].debug('Sending comment request') 545 | pj1 = requests.post(url1, headers=headers, data=data1) 546 | if pj1.ok: 547 | opts['logger'].info(f'提交成功!') 548 | else: 549 | opts['logger'].debug('Skipped sending comment request in dry run') 550 | #opts['logger'].info("\n " + pj1.text) 551 | opts['logger'].debug('Sleep time (s): %.1f', SERVICE_RATING_SLEEP_SEC) 552 | time.sleep(SERVICE_RATING_SLEEP_SEC) 553 | N['服务评价'] -= 1 554 | return N 555 | except Exception as e: 556 | print (e) 557 | 558 | def No(opts=None): 559 | opts = opts or {} 560 | opts['logger'].info('') 561 | N = all_evaluate(opts) 562 | s = '----'.join(['{} {}'.format(i, N[i]) for i in N]) 563 | opts['logger'].info(s) 564 | opts['logger'].info('') 565 | return N 566 | 567 | 568 | def main(opts=None): 569 | opts = opts or {} 570 | #opts['logger'].info("开始京东自动评价!") 571 | N = No(opts) 572 | opts['logger'].debug('N value after executing No(): %s', N) 573 | if not N: 574 | opts['logger'].error('CK错误,请确认是否电脑版CK!') 575 | #notify.send('京东自动评价', 'CK错误,请确认是否电脑版CK!') 576 | return 577 | if N['待评价订单'] != 0: 578 | opts['logger'].info("1.开始评价晒单") 579 | N = sunbw(N, opts) 580 | opts['logger'].debug('N value after executing sunbw(): %s', N) 581 | N = No(opts) 582 | opts['logger'].debug('N value after executing No(): %s', N) 583 | if N['待追评'] != 0: 584 | opts['logger'].info("2.开始追评!") 585 | N = review(N, opts) 586 | opts['logger'].debug('N value after executing review(): %s', N) 587 | N = No(opts) 588 | opts['logger'].debug('N value after executing No(): %s', N) 589 | if N['服务评价'] != 0: 590 | opts['logger'].info('3.开始服务评价') 591 | N = Service_rating(N, opts) 592 | opts['logger'].debug('N value after executing Service_rating(): %s', N) 593 | N = No(opts) 594 | opts['logger'].debug('N value after executing No(): %s', N) 595 | opts['logger'].info("该账号运行完成!") 596 | 597 | 598 | if __name__ == '__main__': 599 | # parse arguments 600 | parser = argparse.ArgumentParser() 601 | parser.add_argument('--dry-run', 602 | help='have a full run without comment submission', 603 | action='store_true') 604 | parser.add_argument('--log-level', 605 | help='specify logging level (default: info)', 606 | default='INFO') 607 | parser.add_argument('-o', '--log-file', help='specify logging file') 608 | args = parser.parse_args() 609 | if args.log_level.upper() not in [ 610 | 'DEBUG', 'WARN', 'INFO', 'ERROR', 'FATAL' 611 | # NOTE: `WARN` is an alias of `WARNING`. `FATAL` is an alias of 612 | # `CRITICAL`. Using these aliases is for developers' and users' 613 | # convenience. 614 | # NOTE: Now there is no logging on `CRITICAL` level. 615 | ]: 616 | args.log_level = 'INFO' 617 | else: 618 | args.log_level = args.log_level.upper() 619 | opts = { 620 | 'dry_run': args.dry_run, 621 | 'log_level': args.log_level 622 | } 623 | if "DEBUG" in os.environ and os.environ["DEBUG"] == 'true': 624 | opts = { 625 | 'dry_run': args.dry_run, 626 | 'log_level': 'DEBUG' 627 | } 628 | if hasattr(args, 'log_file'): 629 | opts['log_file'] = args.log_file 630 | else: 631 | opts['log_file'] = None 632 | 633 | # logging on console 634 | _logging_level = getattr(logging, opts['log_level']) 635 | logger = logging.getLogger('comment') 636 | logger.setLevel(level=_logging_level) 637 | # NOTE: `%(levelname)s` will be parsed as the original name (`FATAL` -> 638 | # `CRITICAL`, `WARN` -> `WARNING`). 639 | # NOTE: The alignment number should set to 19 considering the style 640 | # controling characters. When it comes to file logger, the number should 641 | # set to 8. 642 | formatter = StyleFormatter('%(asctime)s %(levelname)-19s %(message)s',"%F %T") 643 | rawformatter = StyleFormatter('%(asctime)s %(levelname)-8s %(message)s', use_style=False) 644 | console = logging.StreamHandler() 645 | console.setLevel(_logging_level) 646 | console.setFormatter(logging.Formatter('%(message)s')) 647 | logger.addHandler(console) 648 | opts['logger'] = logger 649 | # It's a hack!!! 650 | jieba.default_logger = logging.getLogger('jieba') 651 | jieba.default_logger.setLevel(level=_logging_level) 652 | jieba.default_logger.addHandler(console) 653 | # It's another hack!!! 654 | jdspider.default_logger = logging.getLogger('spider') 655 | jdspider.default_logger.setLevel(level=_logging_level) 656 | jdspider.default_logger.addHandler(console) 657 | 658 | logger.debug('Successfully set up console logger') 659 | logger.debug('CLI arguments: %s', args) 660 | logger.debug('Opening the log file') 661 | if opts['log_file']: 662 | try: 663 | handler = logging.FileHandler(opts['log_file']) 664 | except Exception as e: 665 | logger.error('Failed to open the file handler') 666 | logger.error('Error message: %s', e) 667 | sys.exit(1) 668 | handler.setLevel(_logging_level) 669 | handler.setFormatter(rawformatter) 670 | logger.addHandler(handler) 671 | jieba.default_logger.addHandler(handler) 672 | jdspider.default_logger.addHandler(handler) 673 | logger.debug('Successfully set up file logger') 674 | logger.debug('Options passed to functions: %s', opts) 675 | logger.debug('Builtin constants:') 676 | logger.debug(' CONFIG_PATH: %s', CONFIG_PATH) 677 | logger.debug(' USER_CONFIG_PATH: %s', USER_CONFIG_PATH) 678 | logger.debug(' ORDINARY_SLEEP_SEC: %s', ORDINARY_SLEEP_SEC) 679 | logger.debug(' SUNBW_SLEEP_SEC: %s', SUNBW_SLEEP_SEC) 680 | logger.debug(' REVIEW_SLEEP_SEC: %s', REVIEW_SLEEP_SEC) 681 | logger.debug(' SERVICE_RATING_SLEEP_SEC: %s', SERVICE_RATING_SLEEP_SEC) 682 | 683 | # parse configurations 684 | #logger.debug('Reading the configuration file') 685 | #if os.path.exists(USER_CONFIG_PATH): 686 | #logger.debug('User configuration file exists') 687 | #_cfg_path = USER_CONFIG_PATH 688 | #else: 689 | #logger.debug('User configuration file doesn\'t exist, fallback to the default one') 690 | #_cfg_path = CONFIG_PATH 691 | # with open(_cfg_path, 'r', encoding='utf-8') as f: 692 | #cfg = yaml.safe_load(f) 693 | #print() 694 | #logger.debug('Closed the configuration file') 695 | #logger.debug('Configurations in Python-dict format: %s', cfg) 696 | cks = [] 697 | if "PC_COOKIE" in os.environ: 698 | if len(os.environ["PC_COOKIE"]) > 200: 699 | if '&' in os.environ["PC_COOKIE"]: 700 | cks = os.environ["PC_COOKIE"].split('&') 701 | else: 702 | cks.append(os.environ["PC_COOKIE"]) 703 | else: 704 | logger.info ("CK错误,请确认是否电脑版CK!") 705 | sys.exit(1) 706 | logger.info ("已获取环境变量 CK") 707 | else: 708 | logger.info("没有设置变量PC_COOKIE,请添加电脑端CK到环境变量") 709 | sys.exit(1) 710 | if "OPENAI_API_KEY" in os.environ: 711 | logger.info('已启用AI评价') 712 | if "OPENAI_API_BASE_URL" in os.environ: 713 | logger.info(' - 使用 OpenAI API 代理:' + os.environ["OPENAI_API_BASE_URL"]) 714 | elif os.environ.get("ProxyUrl") and os.environ.get("ProxyUrl").startswith("http"): 715 | os.environ['http_proxy'] = os.getenv("ProxyUrl") 716 | os.environ['https_proxy'] = os.getenv("ProxyUrl") 717 | logger.info(' - 使用QL配置文件ProxyUrl代理:' + os.environ["ProxyUrl"]) 718 | else: 719 | logger.info(' - 未使用代理,请确认当前网络环境可直连:api.openai.com') 720 | try: 721 | i = 1 722 | for ck in cks: 723 | headers = { 724 | 'cookie': ck.encode("utf-8"), 725 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36', 726 | 'Connection': 'keep-alive', 727 | 'Cache-Control': 'max-age=0', 728 | 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"', 729 | 'sec-ch-ua-mobile': '?0', 730 | 'sec-ch-ua-platform': '"Windows"', 731 | 'DNT': '1', 732 | 'Upgrade-Insecure-Requests': '1', 733 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 734 | 'Sec-Fetch-Site': 'same-site', 735 | 'Sec-Fetch-Mode': 'navigate', 736 | 'Sec-Fetch-User': '?1', 737 | 'Sec-Fetch-Dest': 'document', 738 | 'Referer': 'https://order.jd.com/', 739 | 'Accept-Encoding': 'gzip, deflate, br', 740 | 'Accept-Language': 'zh-CN,zh;q=0.9', 741 | } 742 | logger.debug('Builtin HTTP request header: %s', headers) 743 | logger.debug('Starting main processes') 744 | logger.info('\n开始第 '+ str(i) +' 个账号评价...\n') 745 | main(opts) 746 | i += 1 747 | # NOTE: It needs 3,000 times to raise this exception. Do you really want to 748 | # do like this? 749 | except RecursionError: 750 | logger.error("多次出现未完成情况,程序自动退出") 751 | -------------------------------------------------------------------------------- /jd_pjdeps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ''' 4 | new Env('评价依赖安装'); 5 | 8 8 2 10 * https://raw.githubusercontent.com/6dylan6/auto_comment/main/jd_pjdeps.py 6 | ''' 7 | 8 | import os 9 | from time import sleep 10 | print('第一次运行评价出错才运行此程序,如果没有问题请勿运行,以免弄出问题!!!') 11 | sleep(2) 12 | print('10s后开始安装依赖......') 13 | sleep(10) 14 | os.system('apk add --no-cache libxml2-dev libxslt-dev') 15 | os.system('pip install -U --force-reinstall pip') 16 | os.system('pip3 install lxml') -------------------------------------------------------------------------------- /jdspider.py: -------------------------------------------------------------------------------- 1 | # @Time : 2022/10/2 2 | # @Author :@Zhang Jiale @Dimlitter @6dylan6 3 | # @File : jdspider.py 4 | 5 | import json 6 | import logging 7 | import random 8 | import re 9 | import sys 10 | import time 11 | from urllib.parse import quote, urlencode 12 | 13 | import requests 14 | import zhon.hanzi 15 | from lxml import etree 16 | 17 | 18 | # Reference: https://github.com/fxsjy/jieba/blob/1e20c89b66f56c9301b0feed211733ffaa1bd72a/jieba/__init__.py#L27 19 | log_console = logging.StreamHandler(sys.stderr) 20 | default_logger = logging.getLogger('jdspider') 21 | default_logger.setLevel(logging.DEBUG) 22 | default_logger.addHandler(log_console) 23 | 24 | 25 | class JDSpider: 26 | # 爬虫实现类:传入商品类别(如手机、电脑),构造实例。然后调用getData搜集数据。 27 | def __init__(self, categlory, ck): 28 | # jD起始搜索页面 29 | self.startUrl = "https://search.jd.com/Search?keyword=%s&enc=utf-8&wq=%s" % ( 30 | quote(categlory),quote(categlory)) 31 | self.commentBaseUrl = "https://sclub.jd.com/comment/productPageComments.action?" 32 | self.headers = { 33 | 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 34 | 'accept-encoding': 'gzip, deflate, br', 35 | 'accept-language': 'zh-CN,zh;q=0.9', 36 | 'cache-control': 'max-age=0', 37 | 'dnt': '1', 38 | 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"', 39 | 'sec-ch-ua-mobile': '?0', 40 | 'sec-ch-ua-platform': '"Windows"', 41 | 'sec-fetch-dest': 'document', 42 | 'sec-fetch-site': 'none', 43 | 'sec-fetch-user': '?1', 44 | 'upgrade-insecure-requests': '1', 45 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36', 46 | 'cookie':ck.encode("utf-8") 47 | } 48 | self.productsId = self.getId() 49 | self.comtype = {1: "差评", 2: "中评", 3: "好评"} 50 | self.categlory = categlory 51 | self.ck = ck 52 | self.iplist = { 53 | 'http': [], 54 | 'https': [] 55 | } 56 | 57 | def getParamUrl(self, productid, page, score): 58 | params = { # 用于控制页数,页面信息数的数据,非常重要,必不可少,要不然会被JD识别出来,爬不出相应的数据。 59 | "productId": "%s" % (productid), 60 | "score": "%s" % (score), # 1表示差评,2表示中评,3表示好评 61 | "sortType": "5", 62 | "page": "%s" % (page), 63 | "pageSize": "10", 64 | "isShadowSku": "0", 65 | "rid": "0", 66 | "fold": "1" 67 | } 68 | url = self.commentBaseUrl + urlencode(params) 69 | return params, url 70 | 71 | def getHeaders(self, productid): # 和初始的self.header不同,这是搜集某个商品的header,加入了商品id,我也不知道去掉了会怎样。 72 | header = { 73 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36", 74 | "Cookie": self.ck.encode("utf-8") 75 | } 76 | return header 77 | 78 | def getId(self): # 获取商品id,为了得到具体商品页面的网址。结果保存在self.productId的数组里 79 | response = requests.get(self.startUrl, headers=self.headers) 80 | if response.status_code != 200: 81 | default_logger.warning("状态码错误,连接异常!") 82 | html = etree.HTML(response.text) 83 | return html.xpath('//li[@class="gl-item"]/@data-sku') 84 | 85 | def getData(self, maxPage, score,): # maxPage是搜集评论的最大页数,每页10条数据。差评和好评的最大一般页码不相同,一般情况下:好评>>差评>中评 86 | # maxPage遇到超出的页码会自动跳出,所以设大点也没有关系。 87 | # score是指那种评价类型,好评3、中评2、差评1。 88 | 89 | comments = [] 90 | scores = [] 91 | if len(self.productsId) < 4: # limit the sum of products 92 | sum = len(self.productsId) 93 | else: 94 | sum = 3 95 | for j in range(sum): 96 | id = self.productsId[j] 97 | header = self.getHeaders(id) 98 | for i in range(1, maxPage): 99 | param, url = self.getParamUrl(id, i, score) 100 | default_logger.info("正在搜集第%d个商品第%d页的评论信息" % (j+1, i)) 101 | try: 102 | response = requests.get(url, headers=header, params=param) 103 | except Exception as e: 104 | default_logger.warning(e) 105 | break 106 | if response.status_code != 200: 107 | default_logger.warning("状态码错误,连接异常") 108 | continue 109 | time.sleep(random.randint(5, 10)) # 设置时延,防止被封IP 110 | if response.text == '': 111 | default_logger.warning("未搜集到信息") 112 | continue 113 | try: 114 | res_json = json.loads(response.text) 115 | except Exception as e: 116 | default_logger.warning(e) 117 | continue 118 | if len((res_json['comments'])) == 0: 119 | default_logger.warning("本页无评价数据,跳过") 120 | break 121 | default_logger.info("正在搜集 %s 的%s信息" % 122 | (self.categlory, self.comtype[score])) 123 | for cdit in res_json['comments']: 124 | comment = cdit['content'].replace( 125 | "\n", ' ').replace('\r', ' ') 126 | comments.append(comment) 127 | scores.append(cdit['score']) 128 | # savepath = './'+self.categlory+'_'+self.comtype[score]+'.csv' 129 | default_logger.warning("已搜集%d条%s信息" % 130 | (len(comments), self.comtype[score])) 131 | # 存入列表,简单处理评价 132 | remarks = [] 133 | for i in range(len(comments)): 134 | rst = comments[i] 135 | rst = re.findall(zhon.hanzi.sentence, comments[i]) 136 | if len(rst) == 0 or rst == ['。'] or rst == ['?'] or rst == ['!'] or rst == ['.'] or rst == [','] or rst == ['?'] or rst == ['!']: 137 | #default_logger.warning("拆分失败或结果不符(去除空格和标点符号):%s" % (rst)) 138 | continue 139 | else: 140 | remarks.append(rst) 141 | result = self.solvedata(remarks=remarks) 142 | if len(result) == 0: 143 | default_logger.warning("当前商品没有评价,使用默认评价") 144 | result = ["考虑买这个$之前我是有担心过的,因为我不知道$的质量和品质怎么样,但是看了评论后我就放心了。", 145 | "买这个$之前我是有看过好几家店,最后看到这家店的评价不错就决定在这家店买 ", 146 | "看了好几家店,也对比了好几家店,最后发现还是这一家的$评价最好。", 147 | "看来看去最后还是选择了这家。", 148 | "之前在这家店也买过其他东西,感觉不错,这次又来啦。", 149 | "这家的$的真是太好用了,用了第一次就还想再用一次。", 150 | "收到货后我非常的开心,因为$的质量和品质真的非常的好!", 151 | "拆开包装后惊艳到我了,这就是我想要的$!", 152 | "快递超快!包装的很好!!很喜欢!!!", 153 | "包装的很精美!$的质量和品质非常不错!", 154 | "收到快递后迫不及待的拆了包装。$我真的是非常喜欢", 155 | "真是一次难忘的购物,这辈子没见过这么好用的东西!!", 156 | "经过了这次愉快的购物,我决定如果下次我还要买$的话,我一定会再来这家店买的。", 157 | "不错不错!", 158 | "我会推荐想买$的朋友也来这家店里买", 159 | "真是一次愉快的购物!", 160 | "大大的好评!以后买$再来你们店!( ̄▽ ̄)", 161 | "真是一次愉快的购物!" 162 | ] 163 | return result 164 | 165 | def solvedata(self, remarks): 166 | # 将数据拆分成句子 167 | sentences = [] 168 | for i in range(len(remarks)): 169 | for j in range(len(remarks[i])): 170 | sentences.append(remarks[i][j]) 171 | #default_logger.info("搜集的评价结果:" + str(sentences)) 172 | return sentences 173 | 174 | # 存入mysql数据库 175 | ''' 176 | db = pymysql.connect(host='主机名',user='用户名',password='密码',db='数据库名',charset='utf8mb4') 177 | mycursor = db.cursor() 178 | mycursor.execute("use jd") # 根据自己的数据库名称更改 179 | mycursor.execute("TRUNCATE table jd") 180 | for i in range(len(comments)): 181 | sql = "insert into jd(i,scores,comments) values('%s','%s','%s')"%(id,scores[i],comments[i]) # 根据自己的表结构更改 182 | try: 183 | mycursor.execute(sql) 184 | db.commit() 185 | except Exception as e: 186 | logging.warning(e) 187 | db.rollback() 188 | mycursor.close() 189 | db.close() 190 | logging.warning("已存入数据库") 191 | ''' 192 | 193 | # 存入csv文件 194 | ''' 195 | with open(savepath,'a+',encoding ='utf8') as f: 196 | for i in range(len(comments)): 197 | f.write("%d\t%s\t%s\n"%(i,scores[i],comments[i])) 198 | logging.warning("数据已保存在 %s"%(savepath)) 199 | ''' 200 | 201 | 202 | # 测试用例 203 | if __name__ == "__main__": 204 | jdlist = ['笔筒台灯插座 手机支架多功能USB充电LED护眼灯遥控定时学生学习阅 读灯宿舍寝室卧室床头书桌台灯插排 笔筒台灯 4插位+2USB 1.8米(不带遥控)'] 205 | for item in jdlist: 206 | spider = JDSpider(item) 207 | spider.getData(4, 3) 208 | --------------------------------------------------------------------------------