├── config.yml
├── requirements.txt
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── 自定义问题模板.md
    │   ├── 功能需求.md
    │   └── bug报告.md
    └── workflows
    │   └── release.yml
├── LICENSE
├── .gitignore
├── README.md
├── jdspider.py
└── auto_comment_plus.py


/config.yml:
--------------------------------------------------------------------------------
1 | user:
2 |   cookie: ''' '''
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | jieba
2 | requests
3 | lxml
4 | zhon
5 | pyyaml


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/自定义问题模板.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: 自定义问题模板
3 | about: 非 bug、需求类问题可以使用此模板。
4 | title: ""
5 | labels: ""
6 | assignees: ""
7 | ---
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/功能需求.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 功能需求
 3 | about: 为这个项目提出一个想法（需求）。
 4 | title: ""
 5 | labels: ""
 6 | assignees: ""
 7 | ---
 8 | 
 9 | **您的功能请求是否与问题有关？请描述**
10 | 对问题所在的清晰简洁的描述。Ex.当〔…〕
11 | 
12 | **描述您想要的解决方案**
13 | 对你想要发生的事情的清晰简洁的描述。
14 | 
15 | **描述你考虑过的替代方案**
16 | 对您考虑过的任何替代解决方案或功能的清晰简洁的描述。
17 | 
18 | **附加上下文**
19 | 在此处添加有关功能请求的任何其他上下文或屏幕截图。
20 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug报告.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug报告
 3 | about: 创建Bug报告以帮助我们改进
 4 | title: ""
 5 | labels: ""
 6 | assignees: ""
 7 | ---
 8 | 
 9 | **描述错误**
10 | 
11 | 清晰简洁地描述错误是什么！
12 | 将当前程序运行目录下的 log.txt 内容复制出来或者整个上传好让开发者排查问题。
13 | 
14 | **重现方式**
15 | 
16 | 1.
17 | 
18 | 2.
19 | 
20 | 3.
21 | 
22 | **预期行为**
23 | 
24 | 对您期望发生的事情进行清晰简洁的描述。
25 | 
26 | **截图**
27 | 
28 | 如果适用，请添加屏幕截图以帮助解释您的问题。
29 | 
30 | **桌面（请填写以下信息）**
31 | 
32 | - OS: [e.g. iOS]
33 | - Version [e.g. 22]
34 | 
35 | **日志 log**
36 | 在此处以文本形式提供对应的 log 日志来让开发人员排查代码问题。
37 | 
38 | **其他**
39 | 在此处添加有关该问题的任何其他上下文。
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Dimlitter
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: push_release
 2 | 
 3 | permissions:
 4 |   contents: write
 5 | 
 6 | on:
 7 |   push:
 8 |     tags:
 9 |       - "v*"
10 | 
11 | jobs:
12 |   tagged-release:
13 |     name: "Tagged Release"
14 |     runs-on: "ubuntu-latest"
15 |     steps:
16 |       # ...
17 |       - name: 🛎️ 检出代码
18 |         uses: actions/checkout@v4
19 | 
20 |       - name: 📝 输出提交信息
21 |         run: |
22 |           echo "======================================"
23 |           echo "🚀 触发 Release 工作流"
24 |           echo "分支: ${{ github.ref }}"
25 |           echo "Tag: ${{ github.ref_name }}"
26 |           echo "提交者: ${{ github.actor }}"
27 |           echo "提交信息: ${{ github.event.head_commit.message }}"
28 |           echo "提交时间: ${{ github.event.head_commit.timestamp }}"
29 |           echo "======================================"
30 | 
31 |       - name: 🎉 创建 Release
32 |         uses: softprops/action-gh-release@v2
33 |         with:
34 |           tag_name: ${{ github.ref_name }}
35 |           name: "🎉 Release ${{ github.ref_name }}"
36 |           body: |
37 |             ## 🚀 发布说明
38 | 
39 |             **发布人**: `${{ github.actor }}`
40 |             **分支**: `${{ github.ref }}`
41 |             **Tag**: `${{ github.ref_name }}`
42 |             **提交信息**: 
43 |             > ${{ github.event.head_commit.message }}
44 | 
45 |             **提交时间**: `${{ github.event.head_commit.timestamp }}`
46 | 
47 |             ---
48 | 
49 |             自动化发布，感谢您的关注与支持！
50 | 
51 |           draft: false
52 |           prerelease: false
53 |           # files: |
54 |           #   dist/**
55 |           #   README.md
56 |         env:
57 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | log.txt
 55 | test.py
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # user config
135 | config.user.yml
136 | .idea/
137 | .vscode/
138 | 
139 | # images
140 | *.jpg
141 | *.txt


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # jd_AutoComment
  2 | 
  3 | ## 鸣谢
  4 | 
  5 | 感谢[qiu-lzsnmb](https://github.com/qiu-lzsnmb)大佬的脚本和[Zhang Jiale](https://github.com/2274900)大佬的评论爬虫
  6 | 
  7 | 源库链接：[自动评价](https://github.com/qiu-lzsnmb/jd_lzsnmb)
  8 | [评论爬虫](https://github.com/2274900/JD_comment_spider)
  9 | 
 10 | ### 本脚本只是对以上两位的结合以及魔改，用于解决评论文不对题的问题。经测试，本脚本能初步解决这一问题
 11 | 
 12 | ## 思路
 13 | 
 14 | 由爬虫先行对商品的既有评价进行爬取，在此基础上进行自己的评价
 15 | 
 16 | ## 用法
 17 | 
 18 | > 请先确保python版本为3.8+，最好是python3.10+。
 19 | 
 20 | ### 分支说明
 21 | 
 22 | main分支为开发版，更新较快，但由于开发者cookie数量远远不足以满足开发需求，测试不够完备，可能存在bug。
 23 | 
 24 | stable分支为稳定版，更新较慢，基本可以稳定使用，但功能可能存在欠缺。
 25 | 
 26 | more_cookie分支是有需要多账号进行批量评论诞生的分支。
 27 | > 由于作者只有一个 jd 账号，因此该more_cookie分支，需要有多账号的朋友进行测试。
 28 | 目前代码逻辑是 先普通评价-》再追评-》再第二个账号继续执行前面的顺序。所以你多账号可能要历史追评结束后才会执行，cookie 可能会失效，如果很多个 jd 账号话。可能实际上效果没那么好。
 29 | 
 30 | ### 安装依赖库
 31 | 
 32 | ```bash
 33 | pip install -r requirements.
 34 | 
 35 | 请用户自行判断使用哪个分支。
 36 | 
 37 | ### 快速使用
 38 | 
 39 | 在终端中执行：
 40 | 
 41 | ```bash
 42 | git clone https://github.com/Dimlitter/jd_AutoComment.git
 43 | cd jd_AutoComment
 44 | pip install -r requirements.txt
 45 | ```
 46 | 
 47 | `https://club.jd.com/myJdcomments/myJdcomment.action`打开该链接，登录账号后获取 xhr 请求下的 `cookie`，`全部`填入配置文件。可以选择填入默认配置文件 `config.yml` ；也可以填入用户配置文件 `config.user.yml` （需要新建后将 `config.yml` 中的内容复制到该文件中），避免后续的更新覆盖 `config.yml` 中的内容。
 48 | 
 49 | 需要填入如下内容：
 50 | 
 51 | ```yml
 52 | user:
 53 |   cookie: '<Cookie>'
 54 | ```
 55 | 
 56 | 例如，若获取得到的ck为 `a=1; b=2; c=3` ，则配置文件中填入：
 57 | 
 58 | ```yml
 59 | user:
 60 |   cookie: 'a=1; b=2; c=3'
 61 | ```
 62 | 
 63 | 最后运行 `auto_comment_plus.py` ：
 64 | 
 65 | ```bash
 66 | python3 auto_comment_plus.py
 67 | ```
 68 | 
 69 | **注意:** 请根据设备环境换用不同的解释器路径，如 `python`、`py`。
 70 | 
 71 | ### 命令行参数
 72 | 
 73 | 本程序支持命令行参数：
 74 | 
 75 | ```text
 76 | usage: auto_comment_plus.py [-h] [--dry-run] [--log-level LOG_LEVEL] [-o LOG_FILE]
 77 | 
 78 | optional arguments:
 79 |   -h, --help            show this help message and exit
 80 |   --dry-run             have a full run without comment submission
 81 |   --log-level LOG_LEVEL
 82 |                         specify logging level (default: info)
 83 |   -o LOG_FILE, --log-file LOG_FILE
 84 |                         specify logging file
 85 | ```
 86 | 
 87 | **`-h`, `--help`:**
 88 | 
 89 | 显示帮助文本。
 90 | 
 91 | **`--dry-run`:**
 92 | 
 93 | 完整地运行程序，但不实际提交评论。
 94 | 
 95 | **`--log-level LOG_LEVEL`:**
 96 | 
 97 | 设置输出日志的等级。默认为 `INFO` 。可选等级为 `DEBUG`、`INFO`、`WARNING`、`ERROR` ，输出内容量依次递减。
 98 | 
 99 | **注意:** 若你需要提交 issue 来报告一个 bug ，请将该选项设置为 `DEBUG` 。
100 | 
101 | **`-o LOG_FILE`:**
102 | 
103 | 设置输出日志文件的路径。若无此选项，则不输出到文件。
104 | 
105 | ## 声明
106 | 
107 | 本项目为Python学习交流的开源非营利项目，仅作为程序员之间相互学习交流之用。
108 | 
109 | 严禁用于商业用途，禁止使用本项目进行任何盈利活动。
110 | 
111 | 使用者请遵从相关政策。对一切非法使用所产生的后果，我们概不负责。
112 | 
113 | 本项目对您如有困扰请联系我们删除。
114 | 
115 | ## 证书
116 | 
117 | ![AUR](https://img.shields.io/badge/license-MIT%20License%202.0-green.svg)
118 | 


--------------------------------------------------------------------------------
/jdspider.py:
--------------------------------------------------------------------------------
  1 | # @Time : 2022/2/8 20:50
  2 | # @Author :@Zhang Jiale and @Dimlitter
  3 | # @File : jdspider.py
  4 | 
  5 | import json
  6 | import logging
  7 | import random
  8 | import re
  9 | import sys
 10 | import time
 11 | from urllib.parse import quote, urlencode
 12 | 
 13 | import requests
 14 | import yaml
 15 | import zhon.hanzi
 16 | from lxml import etree
 17 | 
 18 | # 加载配置文件
 19 | with open("./config.yml", "r", encoding="utf-8") as f:
 20 |     cfg = yaml.safe_load(f)
 21 | 
 22 | # 获取用户的 cookie
 23 | cookie = cfg["user"]["cookie"]
 24 | 
 25 | # 配置日志输出到标准错误流
 26 | log_console = logging.StreamHandler(sys.stderr)
 27 | default_logger = logging.getLogger("jdspider")
 28 | default_logger.setLevel(logging.DEBUG)
 29 | default_logger.addHandler(log_console)
 30 | 
 31 | # 定义基础请求头，避免重复代码
 32 | BASE_HEADERS = {
 33 |     "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,"
 34 |     "*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
 35 |     "accept-encoding": "gzip, deflate, br",
 36 |     "accept-language": "zh-CN,zh;q=0.9",
 37 |     "cache-control": "max-age=0",
 38 |     "dnt": "1",
 39 |     "sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
 40 |     "sec-ch-ua-mobile": "?0",
 41 |     "sec-ch-ua-platform": '"Windows"',
 42 |     "sec-fetch-dest": "document",
 43 |     "sec-fetch-site": "none",
 44 |     "sec-fetch-user": "?1",
 45 |     "upgrade-insecure-requests": "1",
 46 |     "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
 47 |     "Chrome/98.0.4758.82 Safari/537.36",
 48 | }
 49 | 
 50 | 
 51 | class JDSpider:
 52 |     """
 53 |     京东爬虫类，用于爬取指定商品类别的评论信息。
 54 |     传入商品类别（如手机、电脑）构造实例，然后调用 getData 方法爬取数据。
 55 |     """
 56 | 
 57 |     def __init__(self, categlory):
 58 |         # 京东搜索商品的起始页面 URL
 59 |         self.startUrl = "https://search.jd.com/Search?keyword=%s&enc=utf-8" % (
 60 |             quote(categlory)
 61 |         )
 62 |         # 评论接口的基础 URL
 63 |         self.commentBaseUrl = "https://club.jd.com"
 64 |         # 基础请求头
 65 |         self.headers = BASE_HEADERS.copy()
 66 |         # 带 cookie 的请求头
 67 |         self.headers2 = {
 68 |             **BASE_HEADERS,
 69 |             "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
 70 |             "accept-language": "en,zh-CN;q=0.9,zh;q=0.8",
 71 |             "Cookie": cookie,
 72 |             "priority": "u=0, i",
 73 |             "sec-ch-ua": '"Microsoft Edge";v="129", "Not=A?Brand";v="8", "Chromium";v="129"',
 74 |             "sec-ch-ua-mobile": "?0",
 75 |             "sec-ch-ua-platform": '"macOS"',
 76 |             "sec-fetch-mode": "navigate",
 77 |             "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0",
 78 |         }
 79 |         # 获取商品 ID 列表
 80 |         self.productsId = self.getId()
 81 |         # 评论类型映射，1 差评，2 中评，3 好评
 82 |         self.comtype = {1: "negative", 2: "medium", 3: "positive"}  # 修正拼写错误
 83 |         # 商品类别
 84 |         self.categlory = categlory
 85 |         # IP 列表，用于代理（当前为空）
 86 |         self.iplist = {"http": [], "https": []}
 87 | 
 88 |     def getParamUrl(self, productid: str, page: str, score: str):
 89 |         """
 90 |         生成评论接口的请求参数和完整 URL。
 91 |         :param productid: 商品 ID
 92 |         :param page: 评论页码
 93 |         :param score: 评论类型（1 差评，2 中评，3 好评）
 94 |         :return: 请求参数和完整 URL
 95 |         """
 96 |         path = (
 97 |             "/discussion/getProductPageImageCommentList.action?productId=" + productid
 98 |         )
 99 |         params = {}
100 |         # params = {
101 |         #     "appid": "item-v3",
102 |         #     "functionId": "pc_club_productPageComments",
103 |         #     "client": "pc",
104 |         #     "body": {
105 |         #         "productId": productid,
106 |         #         "score": score,
107 |         #         "sortType": "5",
108 |         #         "page": page,
109 |         #         "pageSize": "10",
110 |         #         "isShadowSku": "0",
111 |         #         "rid": "0",
112 |         #         "fold": "1",
113 |         #     },
114 |         # }
115 |         # default_logger.info("请求参数: " + str(params))
116 |         url = self.commentBaseUrl + path
117 |         default_logger.info("请求 URL: " + str(url))
118 |         return params, url
119 | 
120 |     def getHeaders(self, productid: str) -> dict:
121 |         """
122 |         生成爬取指定商品评论时所需的请求头。
123 |         :param productid: 商品 ID
124 |         :return: 请求头字典
125 |         """
126 |         return {
127 |             "Referer": f"https://item.jd.com/{productid}.html",
128 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
129 |             "Chrome/75.0.3770.142 Safari/537.36",
130 |             # "cookie": cookie,
131 |         }
132 | 
133 |     def getId(self) -> list:
134 |         """
135 |         从京东搜索页面获取商品 ID 列表。
136 |         :return: 商品 ID 列表
137 |         """
138 |         try:
139 |             response = requests.get(self.startUrl, headers=self.headers2)
140 |             response.raise_for_status()  # 检查响应状态码
141 |             default_logger.info("获取同类产品的搜索 URL 结果：" + self.startUrl)
142 |         except requests.RequestException as e:
143 |             default_logger.warning(f"请求异常，状态码错误，爬虫连接异常！错误信息: {e}")
144 |             return []
145 | 
146 |         html = etree.HTML(response.text)
147 |         return html.xpath('//li[@class="gl-item"]/@data-sku')
148 | 
149 |     def getData(self, maxPage: int, score: int):
150 |         """
151 |         爬取指定商品类别的评论信息。
152 |         :param maxPage: 最大爬取页数，每页 10 条评论
153 |         :param score: 评论类型（1 差评，2 中评，3 好评）
154 |         :return: 处理后的评论列表
155 |         """
156 |         comments = []
157 |         scores = []
158 |         default_logger.info(
159 |             "爬取商品数量最多为 8 个，请耐心等待，也可以自行修改 jdspider 文件"
160 |         )
161 | 
162 |         # 确定要爬取的商品数量
163 |         product_count = min(len(self.productsId), 8) if self.productsId else 0
164 |         if product_count == 0:
165 |             default_logger.warning("self.productsId 为空，将使用默认评价")
166 |         default_logger.info("要爬取的商品数量: " + str(product_count))
167 | 
168 |         for j in range(product_count):
169 |             product_id = self.productsId[j]
170 |             for i in range(1, maxPage):
171 |                 params, url = self.getParamUrl(product_id, str(i), str(score))
172 |                 default_logger.info(f"正在爬取第 {j + 1} 个商品的第 {i} 页评论信息")
173 | 
174 |                 try:
175 |                     default_logger.info(
176 |                         f"爬取商品评价的 URL 链接是 {url}，商品的 ID 是：{product_id}"
177 |                     )
178 |                     response = requests.get(url, headers=self.getHeaders(product_id))
179 |                     response.raise_for_status()  # 检查响应状态码
180 |                 except requests.RequestException as e:
181 |                     default_logger.warning(f"请求异常: {e}")
182 |                     continue
183 | 
184 |                 time.sleep(random.randint(5, 10))  # 设置时延，防止被封 IP
185 | 
186 |                 if not response.text:
187 |                     default_logger.warning("未爬取到信息")
188 |                     continue
189 | 
190 |                 try:
191 |                     res_json = json.loads(response.text)
192 |                 except json.JSONDecodeError as e:
193 |                     default_logger.warning(f"JSON 解析异常: {e}")
194 |                     continue
195 | 
196 |                 if res_json["imgComments"]["imgCommentCount"] == 0:
197 |                     default_logger.warning(
198 |                         f"爬取到的商品评价数量为 0，可能是最后一页或请求失败"
199 |                     )
200 |                     break
201 | 
202 |                 for comment_data in res_json["imgComments"]["imgList"]:
203 |                     comment = (
204 |                         comment_data["commentVo"]["content"]
205 |                         .replace("\n", " ")
206 |                         .replace("\r", " ")
207 |                     )
208 |                     comments.append(comment)
209 |                     scores.append(comment_data["commentVo"]["score"])
210 | 
211 |         default_logger.info(f"已爬取 {len(comments)} 条 {self.comtype[score]} 评价信息")
212 | 
213 |         # 处理评论，拆分成句子
214 |         remarks = []
215 |         for comment in comments:
216 |             sentences = re.findall(zhon.hanzi.sentence, comment)
217 |             if not sentences or sentences in [
218 |                 ["。"],
219 |                 ["？"],
220 |                 ["！"],
221 |                 ["."],
222 |                 [","],
223 |                 ["?"],
224 |                 ["!"],
225 |             ]:
226 |                 default_logger.warning(
227 |                     f"拆分失败或结果不符(去除空格和标点符号)：{sentences}"
228 |                 )
229 |             else:
230 |                 remarks.append(sentences)
231 | 
232 |         result = self.solvedata(remarks=remarks)
233 | 
234 |         if not result:
235 |             default_logger.warning("当前商品没有评价，使用默认评价")
236 |             result = [
237 |                 "考虑买这个$之前我是有担心过的，因为我不知道$的质量和品质怎么样，但是看了评论后我就放心了。",
238 |                 "买这个$之前我是有看过好几家店，最后看到这家店的评价不错就决定在这家店买 ",
239 |                 "看了好几家店，也对比了好几家店，最后发现还是这一家的$评价最好。",
240 |                 "看来看去最后还是选择了这家。",
241 |                 "之前在这家店也买过其他东西，感觉不错，这次又来啦。",
242 |                 "这家的$的真是太好用了，用了第一次就还想再用一次。",
243 |                 "收到货后我非常的开心，因为$的质量和品质真的非常的好！",
244 |                 "拆开包装后惊艳到我了，这就是我想要的$!",
245 |                 "快递超快！包装的很好！！很喜欢！！！",
246 |                 "包装的很精美！$的质量和品质非常不错！",
247 |                 "收到快递后迫不及待的拆了包装。$我真的是非常喜欢",
248 |                 "真是一次难忘的购物，这辈子没见过这么好用的东西！！",
249 |                 "经过了这次愉快的购物，我决定如果下次我还要买$的话，我一定会再来这家店买的。",
250 |                 "不错不错！",
251 |                 "我会推荐想买$的朋友也来这家店里买",
252 |                 "真是一次愉快的购物！",
253 |                 "大大的好评!以后买$再来你们店！(￣▽￣)",
254 |                 "真是一次愉快的购物！",
255 |             ]
256 | 
257 |         return result
258 | 
259 |     def solvedata(self, remarks) -> list:
260 |         """
261 |         将评论拆分成句子列表。
262 |         :param remarks: 包含评论句子列表的列表
263 |         :return: 所有评论句子组成的列表
264 |         """
265 |         sentences = []
266 |         for item in remarks:
267 |             for sentence in item:
268 |                 sentences.append(sentence)
269 |         default_logger.info("爬取的评价结果：" + str(sentences))
270 |         return sentences
271 | 
272 | 
273 | # 测试用例
274 | if __name__ == "__main__":
275 |     jdlist = ["商品名"]
276 |     for item in jdlist:
277 |         spider = JDSpider(item)
278 |         spider.getData(2, 3)
279 | 


--------------------------------------------------------------------------------
/auto_comment_plus.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time : 2022/2/8 20:50
  3 | # @Author : @qiu-lzsnmb and @Dimlitter
  4 | # @File : auto_comment_plus.py
  5 | 
  6 | import argparse, uuid
  7 | import copy
  8 | import logging
  9 | import os
 10 | import random
 11 | import sys
 12 | import time
 13 | import urllib
 14 | 
 15 | import jieba  # just for linting
 16 | import jieba.analyse
 17 | import requests
 18 | import yaml
 19 | from lxml import etree
 20 | 
 21 | import jdspider
 22 | 
 23 | # from http2_adapter import Http2Adapter
 24 | 
 25 | # constants
 26 | CONFIG_PATH = "./config.yml"
 27 | USER_CONFIG_PATH = "./config.user.yml"
 28 | ORDINARY_SLEEP_SEC = 10
 29 | SUNBW_SLEEP_SEC = 5
 30 | REVIEW_SLEEP_SEC = 10
 31 | SERVICE_RATING_SLEEP_SEC = 15
 32 | 
 33 | # logging with styles
 34 | # Reference: https://stackoverflow.com/a/384125/12002560
 35 | _COLORS = {
 36 |     "black": 0,
 37 |     "red": 1,
 38 |     "green": 2,
 39 |     "yellow": 3,
 40 |     "blue": 4,
 41 |     "magenta": 5,
 42 |     "cyan": 6,
 43 |     "white": 7,
 44 | }
 45 | 
 46 | _RESET_SEQ = "\033[0m"
 47 | _COLOR_SEQ = "\033[1;%dm"
 48 | _BOLD_SEQ = "\033[1m"
 49 | _ITALIC_SEQ = "\033[3m"
 50 | _UNDERLINED_SEQ = "\033[4m"
 51 | 
 52 | _FORMATTER_COLORS = {
 53 |     "DEBUG": _COLORS["blue"],
 54 |     "INFO": _COLORS["green"],
 55 |     "WARNING": _COLORS["yellow"],
 56 |     "ERROR": _COLORS["red"],
 57 |     "CRITICAL": _COLORS["red"],
 58 | }
 59 | 
 60 | 
 61 | def format_style_seqs(msg: str, use_style: bool = True):
 62 |     if use_style:
 63 |         msg = msg.replace("$RESET", _RESET_SEQ)
 64 |         msg = msg.replace("$BOLD", _BOLD_SEQ)
 65 |         msg = msg.replace("$ITALIC", _ITALIC_SEQ)
 66 |         msg = msg.replace("$UNDERLINED", _UNDERLINED_SEQ)
 67 |     else:
 68 |         msg = msg.replace("$RESET", "")
 69 |         msg = msg.replace("$BOLD", "")
 70 |         msg = msg.replace("$ITALIC", "")
 71 |         msg = msg.replace("$UNDERLINED", "")
 72 | 
 73 | 
 74 | class StyleFormatter(logging.Formatter):
 75 |     def __init__(self, fmt=None, datefmt=None, use_style=True):
 76 |         logging.Formatter.__init__(self, fmt, datefmt)
 77 |         self.use_style = use_style
 78 | 
 79 |     def format(self, record):
 80 |         rcd = copy.copy(record)
 81 |         levelname = rcd.levelname
 82 |         if self.use_style and levelname in _FORMATTER_COLORS:
 83 |             levelname_with_color = "%s%s%s" % (
 84 |                 _COLOR_SEQ % (30 + _FORMATTER_COLORS[levelname]),
 85 |                 levelname,
 86 |                 _RESET_SEQ,
 87 |             )
 88 |             rcd.levelname = levelname_with_color
 89 |         return logging.Formatter.format(self, rcd)
 90 | 
 91 | 
 92 | # 生成随机文件名
 93 | def generate_unique_filename():
 94 |     # 获取当前时间戳的最后5位
 95 |     timestamp = str(int(time.time()))[-5:]
 96 | 
 97 |     # 生成 UUID 的前5位
 98 |     unique_id = str(uuid.uuid4().int)[:5]
 99 | 
100 |     # 组合生成10位的唯一文件名
101 |     unique_filename = f"{timestamp}{unique_id}.jpg"
102 | 
103 |     return unique_filename
104 | 
105 | 
106 | # 下载图片
107 | def download_image(img_url, file_name):
108 |     fullUrl = f"https:{img_url}"
109 |     response = requests.get(fullUrl)
110 |     if response.status_code == 200:
111 |         directory = "img"
112 |         if not os.path.exists(directory):
113 |             # 如果目录不存在，创建目录
114 |             os.makedirs(directory)
115 |         file_path = os.path.join(directory, file_name)
116 |         with open(file_path, "wb") as file:
117 |             file.write(response.content)
118 |         return file_path
119 |     else:
120 |         print("Failed to download image")
121 |         return None
122 | 
123 | 
124 | # 上传图片到JD接口
125 | def upload_image(filename, file_path, session, headers):
126 |     # session.mount(
127 |     #     "https://club.jd.com/myJdcomments/ajaxUploadImage.action", Http2Adapter()
128 |     # )
129 | 
130 |     files = {
131 |         "name": (None, filename),
132 |         # 不需要 PHPSESSID 时可以忽略
133 |         # 如果需要的话，可以从初次登录响应中获取
134 |         "Filedata": (file_path, open(file_path, "rb"), "image/jpeg"),
135 |     }
136 | 
137 |     # 发起 POST 请求
138 |     response = session.post(
139 |         "https://club.jd.com/myJdcomments/ajaxUploadImage.action",
140 |         headers=headers,
141 |         files=files,
142 |     )
143 | 
144 |     return response
145 | 
146 | 
147 | # 评价生成
148 | def generation(pname:str, _class: int = 0, _type: int = 1, opts: object = None):
149 |     result = []
150 |     opts = opts or {}
151 |     items = ["商品名"]
152 |     items.clear()
153 |     items.append(pname)
154 |     opts["logger"].debug("Items: %s", items)
155 |     loop_times = len(items)
156 |     opts["logger"].debug("Total loop times: %d", loop_times)
157 |     for i, item in enumerate(items):
158 |         opts["logger"].debug("Loop: %d / %d", i + 1, loop_times)
159 |         opts["logger"].debug("Current item: %s", item)
160 |         spider = jdspider.JDSpider(item)
161 |         opts["logger"].debug("Successfully created a JDSpider instance")
162 |         # 增加对增值服务的评价鉴别
163 |         if "赠品" in pname or "非实物" in pname or "增值服务" in pname:
164 |             result = [
165 |                 "赠品挺好的。",
166 |                 "很贴心，能有这样免费赠送的赠品!",
167 |                 "正好想着要不要多买一份增值服务，没想到还有这样的赠品。",
168 |                 "赠品正合我意。",
169 |                 "赠品很好，挺不错的。",
170 |                 "本来买了产品以后还有些担心。但是看到赠品以后就放心了。",
171 |                 "不论品质如何，至少说明店家对客的态度很好！",
172 |                 "我很喜欢这些商品！",
173 |                 "我对于商品的附加值很在乎，恰好这些赠品为这件商品提供了这样的的附加值，这令我很满意。"
174 |                 "感觉现在的网购环境环境越来越好了，以前网购的时候还没有过么多贴心的赠品和增值服务",
175 |                 "第一次用京东，被这种赠品和增值服物的良好态度感动到了。",
176 |                 "赠品还行。",
177 |             ]
178 |         else:
179 |             result = spider.getData(2, 3)  # 这里可以自己改
180 |         opts["logger"].debug("Result: %s", result)
181 | 
182 |     # class 0是评价 1是提取id
183 |     try:
184 |         name = jieba.analyse.textrank(pname, topK=5, allowPOS="n")[0]
185 |         opts["logger"].debug("Name: %s", name)
186 |     except Exception as e:
187 |         opts["logger"].warning(
188 |             'jieba textrank analysis error: %s, name fallback to "宝贝"', e
189 |         )
190 |         name = "宝贝"
191 |     if _class == 1:
192 |         opts["logger"].debug("_class is 1. Directly return name")
193 |         return name
194 |     else:
195 |         num = 0
196 |         if _type == 1:
197 |             num = 6
198 |         elif _type == 0:
199 |             num = 4
200 |         num = min(num, len(result))
201 |         # use `.join()` to improve efficiency
202 |         comments = "".join(random.sample(result, num))
203 |         opts["logger"].debug("_type: %d", _type)
204 |         opts["logger"].debug("num: %d", num)
205 |         opts["logger"].debug("Raw comments: %s", comments)
206 | 
207 |         return 5, comments.replace("$", name)
208 | 
209 | 
210 | # 查询全部评价
211 | def all_evaluate(opts=None):
212 |     opts = opts or {}
213 |     N = {}
214 |     url = "https://club.jd.com/myJdcomments/myJdcomment.action?"
215 |     opts["logger"].info("URL: %s", url)
216 |     opts["logger"].debug("Fetching website data")
217 |     req = requests.get(url, headers=headers)
218 |     opts["logger"].debug(
219 |         "Successfully accepted the response with status code %d", req.status_code
220 |     )
221 |     if not req.ok:
222 |         opts["logger"].debug(
223 |             "Status code of the response is %d, not 200", req.status_code
224 |         )
225 |     req_et = etree.HTML(req.text)
226 |     opts["logger"].debug("Successfully parsed an XML tree")
227 |     evaluate_data = req_et.xpath('//*[@id="main"]/div[2]/div[1]/div/ul/li')
228 |     # print(evaluate)
229 |     loop_times = len(evaluate_data)
230 |     opts["logger"].debug("Total loop times: %d", loop_times)
231 |     for i, ev in enumerate(evaluate_data):
232 |         opts["logger"].debug("Loop: %d / %d", i + 1, loop_times)
233 |         na = ev.xpath("a/text()")[0]
234 |         opts["logger"].debug("na: %s", na)
235 |         try:
236 |             num = ev.xpath("b/text()")[0]
237 |             opts["logger"].debug("num: %s", num)
238 |         except IndexError:
239 |             opts["logger"].info("Can't find num content in XPath, fallback to 0")
240 |             num = 0
241 |         N[na] = int(num)
242 |     return N
243 | 
244 | 
245 | def delete_jpg():
246 |     current_directory = os.getcwd()
247 |     files = os.listdir(current_directory)
248 |     for file in files:
249 |         if file.lower().endswith(".jpg"):
250 |             # 构建完整的文件路径
251 |             file_path = os.path.join(current_directory, file)
252 |             # 删除文件
253 |             os.remove(file_path)
254 | 
255 | 
256 | # 普通评价
257 | def ordinary(N, opts=None):
258 |     time.sleep(3)
259 |     opts = opts or {}
260 |     Order_data = []
261 |     req_et = []
262 |     imgCommentCount_bool = True
263 |     loop_times = N["待评价订单"] // 20
264 |     opts["logger"].debug("Fetching website data")
265 |     opts["logger"].debug("Total loop times: %d", loop_times)
266 |     for i in range(loop_times + 1):
267 |         url = (
268 |             f"https://club.jd.com/myJdcomments/myJdcomment.action?sort=0&"
269 |             f"page={i + 1}"
270 |         )
271 |         opts["logger"].debug("URL: %s", url)
272 |         req = requests.get(url, headers=headers)
273 |         opts["logger"].debug(
274 |             "Successfully accepted the response with status code %d", req.status_code
275 |         )
276 |         if not req.ok:
277 |             opts["logger"].warning(
278 |                 "Status code of the response is %d, not 200", req.status_code
279 |             )
280 |         req_et.append(etree.HTML(req.text))
281 |         opts["logger"].debug("Successfully parsed an XML tree")
282 |     opts["logger"].debug("Fetching data from XML trees")
283 |     opts["logger"].debug("Total loop times: %d", loop_times)
284 |     for idx, i in enumerate(req_et):
285 |         opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times)
286 |         opts["logger"].debug("Fetching order data in the default XPath")
287 |         elems = i.xpath('//*[@id="main"]/div[2]/div[2]/table/tbody')
288 |         opts["logger"].debug("Count of fetched order data: %d", len(elems))
289 |         Order_data.extend(elems)
290 |     if len(Order_data) != N["待评价订单"]:
291 |         opts["logger"].debug(
292 |             'Count of fetched order data doesn\'t equal N["待评价订单"]'
293 |         )
294 |         opts["logger"].debug("Clear the list Order_data")
295 |         Order_data = []
296 |         opts["logger"].debug("Total loop times: %d", loop_times)
297 |         for idx, i in enumerate(req_et):
298 |             opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times)
299 |             opts["logger"].debug("Fetching order data in another XPath")
300 |             elems = i.xpath('//*[@id="main"]/div[2]/div[2]/table')
301 |             opts["logger"].debug("Count of fetched order data: %d", len(elems))
302 |             Order_data.extend(elems)
303 | 
304 |     opts["logger"].info(f"当前共有{N['待评价订单']}个评价。")
305 |     opts["logger"].debug("Commenting on items")
306 |     for i, Order in enumerate(Order_data):
307 |         try:
308 |             oid = Order.xpath('tr[@class="tr-th"]/td/span[3]/a/text()')[0]
309 |             opts["logger"].debug("oid: %s", oid)
310 |             oname_data = Order.xpath(
311 |                 'tr[@class="tr-bd"]/td[1]/div[1]/div[2]/div/a/text()'
312 |             )
313 |             opts["logger"].debug("oname_data: %s", oname_data)
314 |             pid_data = Order.xpath('tr[@class="tr-bd"]/td[1]/div[1]/div[2]/div/a/@href')
315 |             opts["logger"].debug("pid_data: %s", pid_data)
316 |         except IndexError:
317 |             opts["logger"].warning(f"第{i + 1}个订单未查找到商品，跳过。")
318 |             continue
319 |         loop_times1 = min(len(oname_data), len(pid_data))
320 |         opts["logger"].debug("Commenting on orders")
321 |         opts["logger"].debug("Total loop times: %d", loop_times1)
322 |         idx = 0
323 |         for oname, pid in zip(oname_data, pid_data):
324 |             opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times1)
325 |             pid = pid.replace("//item.jd.com/", "").replace(".html", "")
326 |             opts["logger"].debug("pid: %s", pid)
327 |             if "javascript" in pid:
328 |                 opts["logger"].error(
329 |                     "pid_data: %s,这个订单估计是京东外卖的，会导致此次评价失败，请把该 %s 商品手工评价后再运行程序。"
330 |                     % (pid, oname),
331 |                 )
332 |                 continue
333 |             opts["logger"].info(f"\t{i}.开始评价订单\t{oname}[{oid}]并晒图")
334 |             url2 = "https://club.jd.com/myJdcomments/saveProductComment.action"
335 |             opts["logger"].debug("URL: %s", url2)
336 |             xing, Str = generation(oname, opts=opts)
337 |             opts["logger"].info(f"\t\t评价内容,星级{xing}：" + Str)
338 |             # 获取图片
339 |             opts["logger"].info(f"\t\t开始获取图片")
340 |             img_url = (
341 |                 f"https://club.jd.com/discussion/getProductPageImageCommentList"
342 |                 f".action?productId={pid}"
343 |             )
344 |             opts["logger"].debug("Fetching images using the default URL")
345 |             opts["logger"].debug("URL: %s", img_url)
346 |             img_resp = requests.get(img_url, headers=headers)
347 |             opts["logger"].debug(
348 |                 "Successfully accepted the response with status code %d",
349 |                 img_resp.status_code,
350 |             )
351 |             if not req.ok:
352 |                 opts["logger"].warning(
353 |                     "Status code of the response is %d, not 200", img_resp.status_code
354 |                 )
355 |             opts["logger"].info("imgdata_url:" + img_url)
356 |             imgdata = img_resp.json()
357 |             opts["logger"].debug("Image data: %s", imgdata)
358 |             if imgdata["imgComments"]["imgCommentCount"] == 0:
359 |                 opts["logger"].warning("这单没有图片数据，所以直接默认五星好评！！")
360 |                 imgCommentCount_bool = False
361 |             elif imgdata["imgComments"]["imgCommentCount"] > 0:
362 |                 imgurl1 = imgdata["imgComments"]["imgList"][0]["imageUrl"]
363 |                 opts["logger"].info("imgurl1 url: %s", imgurl1)
364 |                 imgurl2 = imgdata["imgComments"]["imgList"][1]["imageUrl"]
365 |                 opts["logger"].info("imgurl2 url: %s", imgurl2)
366 |                 session = requests.Session()
367 |                 imgBasic = "//img20.360buyimg.com/shaidan/s645x515_"
368 |                 imgName1 = generate_unique_filename()
369 |                 opts["logger"].debug(f"Image :{imgName1}")
370 |                 downloaded_file1 = download_image(imgurl1, imgName1)
371 |                 # 上传图片
372 |                 if downloaded_file1:
373 |                     imgPart1 = upload_image(
374 |                         imgName1, downloaded_file1, session, headers
375 |                     )
376 |                     # print(imgPart1)  # 和上传图片操作
377 |                     if imgPart1.status_code == 200 and ".jpg" in imgPart1.text:
378 |                         imgurl1t = f"{imgBasic}{imgPart1.text}"
379 |                     else:
380 |                         imgurl1 = ""
381 |                         opts["logger"].info("上传图片失败")
382 |                         exit(0)
383 |                 imgName2 = generate_unique_filename()
384 |                 opts["logger"].debug(f"Image :{imgName2}")
385 |                 downloaded_file2 = download_image(imgurl2, imgName2)
386 |                 # 上传图片
387 |                 if downloaded_file2:
388 |                     imgPart2 = upload_image(
389 |                         imgName2, downloaded_file2, session, headers
390 |                     )
391 |                     # print(imgPart2)  # 和上传图片操作
392 |                     if imgPart2.status_code == 200 and ".jpg" in imgPart2.text:
393 |                         imgurl2t = f"{imgBasic}{imgPart2.text}"
394 |                     else:
395 |                         imgurl2 = ""
396 |                         opts["logger"].info("上传图片失败")
397 |                         exit(0)
398 |                 imgurl = imgurl1 + "," + imgurl2
399 |                 opts["logger"].debug("Image URL: %s", imgurl)
400 |                 opts["logger"].info(f"\t\t图片url={imgurl}")
401 |             Str: str = urllib.parse.quote(Str, safe="/", encoding=None, errors=None)
402 |             Comment_data = {
403 |                 "orderId": oid,
404 |                 "productId": pid,  # 商品id
405 |                 "score": str(xing),  # 商品几星
406 |                 "content": Str,  # 评价内容
407 |                 "saveStatus": "1",
408 |                 "anonymousFlag": "1",  # 是否匿名
409 |             }
410 |             if imgCommentCount_bool:
411 |                 Comment_data["imgs"] = imgurl  # 图片url
412 |             opts["logger"].debug("Data: %s", Comment_data)
413 |             if not opts.get("dry_run"):
414 |                 opts["logger"].debug("Sending comment request")
415 |                 Comment_resp = requests.post(url2, headers=headers2, data=Comment_data)
416 |                 opts["logger"].info(
417 |                     "发送请求后的状态码:{},text:{}".format(
418 |                         Comment_resp.status_code, Comment_resp.text
419 |                     )
420 |                 )
421 |             else:
422 |                 opts["logger"].debug("Skipped sending comment request in dry run")
423 |             if Comment_resp.status_code == 200 and Comment_resp.json()["success"]:
424 |                 # 当发送后的状态码 200，并且返回值里的 success 是 true 才是晒图成功，此外所有状态均为晒图失败
425 |                 opts["logger"].info(f"\t{i}.评价订单\t{oname}[{oid}]评论成功")
426 |             else:
427 |                 opts["logger"].warning(f"\t{i}.评价订单\t{oname}[{oid}]评论失败")
428 |             opts["logger"].debug("Sleep time (s): %.1f", ORDINARY_SLEEP_SEC)
429 |             time.sleep(ORDINARY_SLEEP_SEC)
430 |             idx += 1
431 |     N["待评价订单"] -= 1
432 |     # 删除当前目录下的所有 jpg 图片
433 |     # delete_jpg()
434 |     return N
435 | 
436 | 
437 | """
438 | # 晒单评价
439 | def sunbw(N, opts=None):
440 |     opts = opts or {}
441 |     Order_data = []
442 |     loop_times = N['待晒单'] // 20
443 |     opts['logger'].debug('Fetching website data')
444 |     opts['logger'].debug('Total loop times: %d', loop_times)
445 |     for i in range(loop_times + 1):
446 |         opts['logger'].debug('Loop: %d / %d', i + 1, loop_times)
447 |         url = (f'https://club.jd.com/myJdcomments/myJdcomment.action?sort=1'
448 |                f'&page={i + 1}')
449 |         opts['logger'].debug('URL: %s', url)
450 |         req = requests.get(url, headers=headers)
451 |         opts['logger'].debug(
452 |             'Successfully accepted the response with status code %d',
453 |             req.status_code)
454 |         if not req.ok:
455 |             opts['logger'].warning(
456 |                 'Status code of the response is %d, not 200', req.status_code)
457 |         req_et = etree.HTML(req.text)
458 |         opts['logger'].debug('Successfully parsed an XML tree')
459 |         opts['logger'].debug('Fetching data from XML trees')
460 |         elems = req_et.xpath(
461 |             '//*[@id="evalu01"]/div[2]/div[1]/div[@class="comt-plist"]/div[1]')
462 |         opts['logger'].debug('Count of fetched order data: %d', len(elems))
463 |         Order_data.extend(elems)
464 |     opts['logger'].info(f"当前共有{N['待晒单']}个需要晒单。")
465 |     opts['logger'].debug('Commenting on items')
466 |     for i, Order in enumerate(Order_data):
467 |         oname = Order.xpath('ul/li[1]/div/div[2]/div[1]/a/text()')[0]
468 |         pid = Order.xpath('@pid')[0]
469 |         oid = Order.xpath('@oid')[0]
470 |         opts['logger'].info(f'\t开始第{i+1}，{oname}')
471 |         opts['logger'].debug('pid: %s', pid)
472 |         opts['logger'].debug('oid: %s', oid)
473 |         # 获取图片
474 |         url1 = (f'https://club.jd.com/discussion/getProductPageImageCommentList'
475 |                 f'.action?productId={pid}')
476 |         opts['logger'].debug('Fetching images using the default URL')
477 |         opts['logger'].debug('URL: %s', url1)
478 |         req1 = requests.get(url1, headers=headers)
479 |         opts['logger'].debug(
480 |             'Successfully accepted the response with status code %d',
481 |             req1.status_code)
482 |         if not req.ok:
483 |             opts['logger'].warning(
484 |                 'Status code of the response is %d, not 200', req1.status_code)
485 |         imgdata = req1.json()
486 |         opts['logger'].debug('Image data: %s', imgdata)
487 |         if imgdata["imgComments"]["imgCommentCount"] == 0:
488 |             opts['logger'].debug('Count of fetched image comments is 0')
489 |             opts['logger'].debug('Fetching images using another URL')
490 |             url1 = ('https://club.jd.com/discussion/getProductPageImage'
491 |                     'CommentList.action?productId=1190881')
492 |             opts['logger'].debug('URL: %s', url1)
493 |             req1 = requests.get(url1, headers=headers)
494 |             opts['logger'].debug(
495 |                 'Successfully accepted the response with status code %d',
496 |                 req1.status_code)
497 |             if not req.ok:
498 |                 opts['logger'].warning(
499 |                     'Status code of the response is %d, not 200',
500 |                     req1.status_code)
501 |             imgdata = req1.json()
502 |             opts['logger'].debug('Image data: %s', imgdata)
503 |         imgurl = imgdata["imgComments"]["imgList"][0]["imageUrl"]
504 |         opts['logger'].debug('Image URL: %s', imgurl)
505 | 
506 |         opts['logger'].info(f'\t\t图片url={imgurl}')
507 |         # 提交晒单
508 |         opts['logger'].debug('Preparing for commenting')
509 |         url2 = "https://club.jd.com/myJdcomments/saveShowOrder.action"
510 |         opts['logger'].debug('URL: %s', url2)
511 |         headers['Referer'] = ('https://club.jd.com/myJdcomments/myJdcomment.'
512 |                               'action?sort=1')
513 |         headers['Origin'] = 'https://club.jd.com'
514 |         headers['Content-Type'] = 'application/x-www-form-urlencoded'
515 |         opts['logger'].debug('New header for this request: %s', headers)
516 |         data = {
517 |             'orderId': oid,
518 |             'productId': pid,
519 |             'imgs': imgurl,
520 |             'saveStatus': 3
521 |         }
522 |         opts['logger'].debug('Data: %s', data)
523 |         if not opts.get('dry_run'):
524 |             opts['logger'].debug('Sending comment request')
525 |             req_url2 = requests.post(url2, data=data, headers=headers)
526 |         else:
527 |             opts['logger'].debug('Skipped sending comment request in dry run')
528 |         opts['logger'].info('完成')
529 |         opts['logger'].debug('Sleep time (s): %.1f', SUNBW_SLEEP_SEC)
530 |         time.sleep(SUNBW_SLEEP_SEC)
531 |         N['待晒单'] -= 1
532 |     return N
533 | """
534 | 
535 | # 追评
536 | 
537 | 
538 | def review(N, opts=None):
539 |     opts = opts or {}
540 |     req_et = []
541 |     Order_data = []
542 |     loop_times = N["待追评"] // 20
543 |     opts["logger"].debug("Fetching website data")
544 |     opts["logger"].debug("Total loop times: %d", loop_times)
545 |     for i in range(loop_times + 1):
546 |         opts["logger"].debug("Loop: %d / %d", i + 1, loop_times)
547 |         url = (
548 |             f"https://club.jd.com/myJdcomments/myJdcomment.action?sort=3"
549 |             f"&page={i + 1}"
550 |         )
551 |         opts["logger"].debug("URL: %s", url)
552 |         req = requests.get(url, headers=headers)
553 |         opts["logger"].debug(
554 |             "Successfully accepted the response with status code %d", req.status_code
555 |         )
556 |         if not req.ok:
557 |             opts["logger"].warning(
558 |                 "Status code of the response is %d, not 200", req.status_code
559 |             )
560 |         req_et.append(etree.HTML(req.text))
561 |         opts["logger"].debug("Successfully parsed an XML tree")
562 |     opts["logger"].debug("Fetching data from XML trees")
563 |     opts["logger"].debug("Total loop times: %d", loop_times)
564 |     for idx, i in enumerate(req_et):
565 |         opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times)
566 |         opts["logger"].debug("Fetching order data in the default XPath")
567 |         elems = i.xpath('//*[@id="main"]/div[2]/div[2]/table/tr[@class="tr-bd"]')
568 |         opts["logger"].debug("Count of fetched order data: %d", len(elems))
569 |         Order_data.extend(elems)
570 |     if len(Order_data) != N["待追评"]:
571 |         opts["logger"].debug('Count of fetched order data doesn\'t equal N["待追评"]')
572 |         # NOTE: Need them?
573 |         # opts['logger'].debug('Clear the list Order_data')
574 |         # Order_data = []
575 |         opts["logger"].debug("Total loop times: %d", loop_times)
576 |         for idx, i in enumerate(req_et):
577 |             opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times)
578 |             opts["logger"].debug("Fetching order data in another XPath")
579 |             elems = i.xpath(
580 |                 '//*[@id="main"]/div[2]/div[2]/table/tbody/tr[@class="tr-bd"]'
581 |             )
582 |             opts["logger"].debug("Count of fetched order data: %d", len(elems))
583 |             Order_data.extend(elems)
584 |     opts["logger"].info(f"当前共有 {N['待追评']} 个需要追评。")
585 |     opts["logger"].debug("Commenting on items")
586 |     for i, Order in enumerate(Order_data):
587 |         oname = Order.xpath("td[1]/div/div[2]/div/a/text()")[0]
588 |         _id = Order.xpath("td[3]/div/a/@href")[0]
589 |         opts["logger"].info(f"\t开始追评第{i+1}，{oname}")
590 |         opts["logger"].debug("_id: %s", _id)
591 |         url1 = (
592 |             "https://club.jd.com/afterComments/" "saveAfterCommentAndShowOrder.action"
593 |         )
594 |         opts["logger"].debug("URL: %s", url1)
595 |         pid, oid = _id.replace(
596 |             "http://club.jd.com/afterComments/productPublish.action?sku=", ""
597 |         ).split("&orderId=")
598 |         opts["logger"].debug("pid: %s", pid)
599 |         if "javascript" in pid:
600 |             opts["logger"].error(
601 |                 "pid_data: %s,这个订单估计是京东外卖的，会导致此次评价失败，请把该 %s 商品手工评价后再运行程序。"
602 |                 % (pid, oname),
603 |             )
604 |             exit(0)
605 |         opts["logger"].debug("oid: %s", oid)
606 |         _, context = generation(oname, _type=0, opts=opts)
607 |         opts["logger"].info(f"\t\t追评内容：{context}")
608 |         context = urllib.parse.quote(context, safe="/", encoding=None, errors=None)
609 |         data1 = {
610 |             "orderId": oid,
611 |             "productId": pid,
612 |             "content": context,
613 |             "anonymousFlag": 1,
614 |             "score": 5,
615 |             "imgs": "",
616 |         }
617 |         opts["logger"].debug("Data: %s", data1)
618 |         if not opts.get("dry_run"):
619 |             opts["logger"].debug("Sending comment request")
620 |             pj1 = requests.post(url1, headers=headers2, data=data1)
621 |             opts["logger"].debug(
622 |                 "发送请求后的状态码:{},text:{}".format(pj1.status_code, pj1.text)
623 |             )
624 |         else:
625 |             opts["logger"].debug("Skipped sending comment request in dry run")
626 |         opts["logger"].info("完成")
627 |         opts["logger"].debug("Sleep time (s): %.1f", REVIEW_SLEEP_SEC)
628 |         time.sleep(REVIEW_SLEEP_SEC)
629 |         N["待追评"] -= 1
630 |     return N
631 | 
632 | 
633 | # 服务评价
634 | def Service_rating(N, opts=None):
635 |     opts = opts or {}
636 |     Order_data = []
637 |     req_et = []
638 |     loop_times = N["服务评价"] // 20
639 |     opts["logger"].debug("Fetching website data")
640 |     opts["logger"].debug("Total loop times: %d", loop_times)
641 |     for i in range(loop_times + 1):
642 |         opts["logger"].debug("Loop: %d / %d", i + 1, loop_times)
643 |         url = (
644 |             f"https://club.jd.com/myJdcomments/myJdcomment.action?sort=4"
645 |             f"&page={i + 1}"
646 |         )
647 |         opts["logger"].debug("URL: %s", url)
648 |         req = requests.get(url, headers=headers)
649 |         opts["logger"].debug(
650 |             "Successfully accepted the response with status code %d", req.status_code
651 |         )
652 |         if not req.ok:
653 |             opts["logger"].warning(
654 |                 "Status code of the response is %d, not 200", req.status_code
655 |             )
656 |         req_et.append(etree.HTML(req.text))
657 |         opts["logger"].debug("Successfully parsed an XML tree")
658 |     opts["logger"].debug("Fetching data from XML trees")
659 |     opts["logger"].debug("Total loop times: %d", loop_times)
660 |     for idx, i in enumerate(req_et):
661 |         opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times)
662 |         opts["logger"].debug("Fetching order data in the default XPath")
663 |         elems = i.xpath('//*[@id="main"]/div[2]/div[2]/table/tbody/tr[@class="tr-bd"]')
664 |         opts["logger"].debug("Count of fetched order data: %d", len(elems))
665 |         Order_data.extend(elems)
666 |     if len(Order_data) != N["服务评价"]:
667 |         opts["logger"].debug('Count of fetched order data doesn\'t equal N["服务评价"]')
668 |         opts["logger"].debug("Clear the list Order_data")
669 |         Order_data = []
670 |         opts["logger"].debug("Total loop times: %d", loop_times)
671 |         for idx, i in enumerate(req_et):
672 |             opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times)
673 |             opts["logger"].debug("Fetching order data in another XPath")
674 |             elems = i.xpath('//*[@id="main"]/div[2]/div[2]/table/tr[@class="tr-bd"]')
675 |             opts["logger"].debug("Count of fetched order data: %d", len(elems))
676 |             Order_data.extend(elems)
677 |     opts["logger"].info(f"当前共有{N['服务评价']}个需要第一次服务评价。")
678 |     opts["logger"].debug("Commenting on items")
679 |     for i, Order in enumerate(Order_data):
680 |         oname = Order.xpath("td[1]/div[1]/div[2]/div/a/text()")[0]
681 |         try:
682 |             oid = Order.xpath("td[4]/div/a[1]/@oid")[0]
683 |         except IndexError:
684 |             opts["logger"].warning("Failed to fetch oid")
685 |             continue
686 |         opts["logger"].info(f"\t开始第一次评论，{i+1}，{oname}")
687 |         opts["logger"].debug("oid: %s", oid)
688 |         url1 = (
689 |             f"https://club.jd.com/myJdcomments/insertRestSurvey.action"
690 |             f"?voteid=145&ruleid={oid}"
691 |         )
692 |         opts["logger"].debug("URL: %s", url1)
693 |         data1 = {
694 |             "oid": oid,
695 |             "gid": "32",
696 |             "sid": "186194",
697 |             "stid": "0",
698 |             "tags": "",
699 |             "ro591": f"591A{random.randint(4, 5)}",  # 商品符合度
700 |             "ro592": f"592A{random.randint(4, 5)}",  # 店家服务态度
701 |             "ro593": f"593A{random.randint(4, 5)}",  # 快递配送速度
702 |             "ro899": f"899A{random.randint(4, 5)}",  # 快递员服务
703 |             "ro900": f"900A{random.randint(4, 5)}",  # 快递员服务
704 |         }
705 |         opts["logger"].debug("Data: %s", data1)
706 |         if not opts.get("dry_run"):
707 |             opts["logger"].debug("Sending comment request")
708 |             pj1 = requests.post(url1, headers=headers, data=data1)
709 |         else:
710 |             opts["logger"].debug("Skipped sending comment request in dry run")
711 |         opts["logger"].info("\t\t " + pj1.text)
712 |         opts["logger"].debug("Sleep time (s): %.1f", SERVICE_RATING_SLEEP_SEC)
713 |         time.sleep(SERVICE_RATING_SLEEP_SEC)
714 |         N["服务评价"] -= 1
715 |     return N
716 | 
717 | 
718 | def No(opts=None):
719 |     opts = opts or {}
720 |     # opts["logger"].info("")
721 |     N = all_evaluate(opts)
722 |     s = "----".join(["{} {}".format(i, N[i]) for i in N])
723 |     opts["logger"].info(s)
724 |     # opts["logger"].info("")
725 |     return N
726 | 
727 | 
728 | def main(opts=None):
729 |     opts = opts or {}
730 |     opts["logger"].info("开始京东批量评价！")
731 |     N = No(opts)
732 |     opts["logger"].debug("N value after executing No(): %s", N)
733 |     if not N:
734 |         opts["logger"].error("Ck出现错误，请重新抓取！")
735 |         exit()
736 |     opts["logger"].info(f"已评价：{N['已评价']}个")
737 |     if N["待评价订单"] != 0:
738 |         opts["logger"].info("1.开始普通评价")
739 |         N = ordinary(N, opts)
740 |         opts["logger"].debug("N value after executing ordinary(): %s", N)
741 |         N = No(opts)
742 |         opts["logger"].debug("N value after executing No(): %s", N)
743 |     """ "待晒单" is no longer found in N{} instead of "已评价"
744 |     if N['待晒单'] != 0:
745 |         opts['logger'].info("2.开始晒单评价")
746 |         N = sunbw(N, opts)
747 |         opts['logger'].debug('N value after executing sunbw(): %s', N)
748 |         N = No(opts)
749 |         opts['logger'].debug('N value after executing No(): %s', N)
750 |     """
751 |     if N["待追评"] != 0:
752 |         opts["logger"].info("3.开始批量追评,注意：追评不会自动上传图片")
753 |         N = review(N, opts)
754 |         opts["logger"].debug("N value after executing review(): %s", N)
755 |         N = No(opts)
756 |         opts["logger"].debug("N value after executing No(): %s", N)
757 |     if N["服务评价"] != 0:
758 |         opts["logger"].info("4.开始服务评价")
759 |         N = Service_rating(N, opts)
760 |         opts["logger"].debug("N value after executing Service_rating(): %s", N)
761 |         N = No(opts)
762 |         opts["logger"].debug("N value after executing No(): %s", N)
763 |     opts["logger"].info("全部完成啦！")
764 |     for i in N:
765 |         if N[i] != 0:
766 |             opts["logger"].warning("出现了二次错误，跳过了部分，重新尝试")
767 |             main(opts)
768 | 
769 | 
770 | if __name__ == "__main__":
771 |     # parse arguments
772 |     parser = argparse.ArgumentParser()
773 |     parser.add_argument(
774 |         "--dry-run",
775 |         help="have a full run without comment submission",
776 |         action="store_true",
777 |     )
778 |     parser.add_argument(
779 |         "-lv",
780 |         "--log-level",
781 |         help="specify logging level (default: info)",
782 |         default="INFO",
783 |     )
784 |     parser.add_argument(
785 |         "-o", "--log-file", help="specify logging file", default="log.txt"
786 |     )
787 |     args = parser.parse_args()
788 |     if args.log_level.upper() not in [
789 |         "DEBUG",
790 |         "WARN",
791 |         "INFO",
792 |         "ERROR",
793 |         "FATAL",
794 |         # NOTE: `WARN` is an alias of `WARNING`. `FATAL` is an alias of
795 |         # `CRITICAL`. Using these aliases is for developers' and users'
796 |         # convenience.
797 |         # NOTE: Now there is no logging on `CRITICAL` level.
798 |     ]:
799 |         args.log_level = "INFO"
800 |     else:
801 |         args.log_level = args.log_level.upper()
802 |     opts = {"dry_run": args.dry_run, "log_level": args.log_level}
803 |     if hasattr(args, "log_file"):
804 |         opts["log_file"] = args.log_file
805 |     else:
806 |         opts["log_file"] = None
807 | 
808 |     # logging on console
809 |     _logging_level = getattr(logging, opts["log_level"])
810 |     logger = logging.getLogger("comment")
811 |     logger.setLevel(level=_logging_level)
812 |     # NOTE: `%(levelname)s` will be parsed as the original name (`FATAL` ->
813 |     # `CRITICAL`, `WARN` -> `WARNING`).
814 |     # NOTE: The alignment number should set to 19 considering the style
815 |     # controling characters. When it comes to file logger, the number should
816 |     # set to 8.
817 |     formatter = StyleFormatter("%(asctime)s %(levelname)-19s %(message)s")
818 |     rawformatter = StyleFormatter(
819 |         "%(asctime)s %(levelname)-8s %(message)s", use_style=False
820 |     )
821 |     console = logging.StreamHandler()
822 |     console.setLevel(_logging_level)
823 |     console.setFormatter(formatter)
824 |     logger.addHandler(console)
825 |     opts["logger"] = logger
826 |     # It's a hack!!!
827 |     jieba.default_logger = logging.getLogger("jieba")
828 |     jieba.default_logger.setLevel(level=_logging_level)
829 |     jieba.default_logger.addHandler(console)
830 |     # It's another hack!!!
831 |     jdspider.default_logger = logging.getLogger("spider")
832 |     jdspider.default_logger.setLevel(level=_logging_level)
833 |     jdspider.default_logger.addHandler(console)
834 | 
835 |     logger.debug("Successfully set up console logger")
836 |     logger.debug("CLI arguments: %s", args)
837 |     logger.debug("Opening the log file")
838 |     if opts["log_file"]:
839 |         try:
840 |             handler = logging.FileHandler(opts["log_file"], "w")
841 |         except Exception as e:
842 |             logger.error("Failed to open the file handler")
843 |             logger.error("Error message: %s", e)
844 |             sys.exit(1)
845 |         handler.setLevel(_logging_level)
846 |         handler.setFormatter(rawformatter)
847 |         logger.addHandler(handler)
848 |         jieba.default_logger.addHandler(handler)
849 |         jdspider.default_logger.addHandler(handler)
850 |         logger.debug("Successfully set up file logger")
851 |     logger.debug("Options passed to functions: %s", opts)
852 |     logger.debug("Builtin constants:")
853 |     logger.debug("  CONFIG_PATH: %s", CONFIG_PATH)
854 |     logger.debug("  USER_CONFIG_PATH: %s", USER_CONFIG_PATH)
855 |     logger.debug("  ORDINARY_SLEEP_SEC: %s", ORDINARY_SLEEP_SEC)
856 |     logger.debug("  SUNBW_SLEEP_SEC: %s", SUNBW_SLEEP_SEC)
857 |     logger.debug("  REVIEW_SLEEP_SEC: %s", REVIEW_SLEEP_SEC)
858 |     logger.debug("  SERVICE_RATING_SLEEP_SEC: %s", SERVICE_RATING_SLEEP_SEC)
859 | 
860 |     # parse configurations
861 |     logger.debug("Reading the configuration file")
862 |     if os.path.exists(USER_CONFIG_PATH):
863 |         logger.debug("User configuration file exists")
864 |         _cfg_path = USER_CONFIG_PATH
865 |     else:
866 |         logger.debug(
867 |             "User configuration file doesn't exist, fallback to the default one"
868 |         )
869 |         _cfg_path = CONFIG_PATH
870 |     with open(_cfg_path, "r", encoding="utf-8") as f:
871 |         cfg = yaml.safe_load(f)
872 |     logger.debug("Closed the configuration file")
873 |     logger.debug("Configurations in Python-dict format: %s", cfg)
874 |     ck = cfg["user"]["cookie"]
875 |     jdspider.cookie = ck.encode("utf-8")
876 | 
877 |     headers2 = {
878 |         "Cookie": ck.encode("utf-8"),
879 |         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
880 |         "Chrome/114.0.5735.110 Safari/537.36",
881 |         "Connection": "keep-alive",
882 |         "Cache-Control": "max-age=0",
883 |         "X-Requested-With": "XMLHttpRequest",
884 |         "sec-ch-ua": "",
885 |         "sec-ch-ua-mobile": "?0",
886 |         "sec-ch-ua-platform": "",
887 |         "DNT": "1",
888 |         "Upgrade-Insecure-Requests": "1",
889 |         "Accept": "application/json, text/javascript, */*; q=0.01",
890 |         "Sec-Fetch-Site": "same-origin",
891 |         "Sec-Fetch-Mode": "cors",
892 |         "Sec-Fetch-User": "?1",
893 |         "Sec-Fetch-Dest": "empty",
894 |         "Referer": "https://club.jd.com/",
895 |         "Accept-Encoding": "gzip, deflate",
896 |         "Accept-Language": "zh-CN,zh;q=0.9",
897 |         # 'Content-Type':'application/x-www-form-urlencoded'
898 |     }
899 |     headers = {
900 |         "Cookie": ck.encode("utf-8"),
901 |         "User-Agent": '''Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0 Sec-Ch-Ua: "Chromium";v="136", "Microsoft Edge";v="136", "Not.A/Brand";v="99"''',
902 |         "DNT": "1",
903 |         # "Connection": "keep-alive",
904 |         # "Cache-Control": "max-age=0",
905 |         # "sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"',
906 |         # "sec-ch-ua-mobile": "?0",
907 |         # "sec-ch-ua-platform": '"Windows"',
908 |         # "Upgrade-Insecure-Requests": "1",
909 |         # "Accept": "*/*",
910 |         # "Sec-Fetch-Site": "same-site",
911 |         # "Sec-Fetch-Mode": "navigate",
912 |         # "origin": "https://club.jd.com",
913 |         # "Sec-Fetch-User": "?1",
914 |         # "Sec-Fetch-Dest": "document",
915 |         # "Referer": "https://order.jd.com/",
916 |         # "Accept-Encoding": "gzip, deflate, br, zstd",
917 |         # "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
918 |     }
919 |     logger.debug("Builtin HTTP request header: %s", headers)
920 | 
921 |     logger.debug("Starting main processes")
922 |     try:
923 |         main(opts)
924 |     # NOTE: It needs 3,000 times to raise this exception. Do you really want to
925 |     # do like this?
926 |     except RecursionError:
927 |         logger.error("多次出现未完成情况，程序自动退出")
928 | 


--------------------------------------------------------------------------------