├── config.yml ├── requirements.txt ├── .github ├── ISSUE_TEMPLATE │ ├── 自定义问题模板.md │ ├── 功能需求.md │ └── bug报告.md └── workflows │ └── release.yml ├── LICENSE ├── .gitignore ├── README.md ├── jdspider.py └── auto_comment_plus.py /config.yml: -------------------------------------------------------------------------------- 1 | user: 2 | cookie: ''' ''' 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jieba 2 | requests 3 | lxml 4 | zhon 5 | pyyaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/自定义问题模板.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 自定义问题模板 3 | about: 非 bug、需求类问题可以使用此模板。 4 | title: "" 5 | labels: "" 6 | assignees: "" 7 | --- 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/功能需求.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 功能需求 3 | about: 为这个项目提出一个想法(需求)。 4 | title: "" 5 | labels: "" 6 | assignees: "" 7 | --- 8 | 9 | **您的功能请求是否与问题有关?请描述** 10 | 对问题所在的清晰简洁的描述。Ex.当〔…〕 11 | 12 | **描述您想要的解决方案** 13 | 对你想要发生的事情的清晰简洁的描述。 14 | 15 | **描述你考虑过的替代方案** 16 | 对您考虑过的任何替代解决方案或功能的清晰简洁的描述。 17 | 18 | **附加上下文** 19 | 在此处添加有关功能请求的任何其他上下文或屏幕截图。 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug报告.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug报告 3 | about: 创建Bug报告以帮助我们改进 4 | title: "" 5 | labels: "" 6 | assignees: "" 7 | --- 8 | 9 | **描述错误** 10 | 11 | 清晰简洁地描述错误是什么! 12 | 将当前程序运行目录下的 log.txt 内容复制出来或者整个上传好让开发者排查问题。 13 | 14 | **重现方式** 15 | 16 | 1. 17 | 18 | 2. 19 | 20 | 3. 21 | 22 | **预期行为** 23 | 24 | 对您期望发生的事情进行清晰简洁的描述。 25 | 26 | **截图** 27 | 28 | 如果适用,请添加屏幕截图以帮助解释您的问题。 29 | 30 | **桌面(请填写以下信息)** 31 | 32 | - OS: [e.g. iOS] 33 | - Version [e.g. 22] 34 | 35 | **日志 log** 36 | 在此处以文本形式提供对应的 log 日志来让开发人员排查代码问题。 37 | 38 | **其他** 39 | 在此处添加有关该问题的任何其他上下文。 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Dimlitter 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: push_release 2 | 3 | permissions: 4 | contents: write 5 | 6 | on: 7 | push: 8 | tags: 9 | - "v*" 10 | 11 | jobs: 12 | tagged-release: 13 | name: "Tagged Release" 14 | runs-on: "ubuntu-latest" 15 | steps: 16 | # ... 17 | - name: 🛎️ 检出代码 18 | uses: actions/checkout@v4 19 | 20 | - name: 📝 输出提交信息 21 | run: | 22 | echo "======================================" 23 | echo "🚀 触发 Release 工作流" 24 | echo "分支: ${{ github.ref }}" 25 | echo "Tag: ${{ github.ref_name }}" 26 | echo "提交者: ${{ github.actor }}" 27 | echo "提交信息: ${{ github.event.head_commit.message }}" 28 | echo "提交时间: ${{ github.event.head_commit.timestamp }}" 29 | echo "======================================" 30 | 31 | - name: 🎉 创建 Release 32 | uses: softprops/action-gh-release@v2 33 | with: 34 | tag_name: ${{ github.ref_name }} 35 | name: "🎉 Release ${{ github.ref_name }}" 36 | body: | 37 | ## 🚀 发布说明 38 | 39 | **发布人**: `${{ github.actor }}` 40 | **分支**: `${{ github.ref }}` 41 | **Tag**: `${{ github.ref_name }}` 42 | **提交信息**: 43 | > ${{ github.event.head_commit.message }} 44 | 45 | **提交时间**: `${{ github.event.head_commit.timestamp }}` 46 | 47 | --- 48 | 49 | 自动化发布,感谢您的关注与支持! 50 | 51 | draft: false 52 | prerelease: false 53 | # files: | 54 | # dist/** 55 | # README.md 56 | env: 57 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | log.txt 55 | test.py 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # user config 135 | config.user.yml 136 | .idea/ 137 | .vscode/ 138 | 139 | # images 140 | *.jpg 141 | *.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jd_AutoComment 2 | 3 | ## 鸣谢 4 | 5 | 感谢[qiu-lzsnmb](https://github.com/qiu-lzsnmb)大佬的脚本和[Zhang Jiale](https://github.com/2274900)大佬的评论爬虫 6 | 7 | 源库链接:[自动评价](https://github.com/qiu-lzsnmb/jd_lzsnmb) 8 | [评论爬虫](https://github.com/2274900/JD_comment_spider) 9 | 10 | ### 本脚本只是对以上两位的结合以及魔改,用于解决评论文不对题的问题。经测试,本脚本能初步解决这一问题 11 | 12 | ## 思路 13 | 14 | 由爬虫先行对商品的既有评价进行爬取,在此基础上进行自己的评价 15 | 16 | ## 用法 17 | 18 | > 请先确保python版本为3.8+,最好是python3.10+。 19 | 20 | ### 分支说明 21 | 22 | main分支为开发版,更新较快,但由于开发者cookie数量远远不足以满足开发需求,测试不够完备,可能存在bug。 23 | 24 | stable分支为稳定版,更新较慢,基本可以稳定使用,但功能可能存在欠缺。 25 | 26 | more_cookie分支是有需要多账号进行批量评论诞生的分支。 27 | > 由于作者只有一个 jd 账号,因此该more_cookie分支,需要有多账号的朋友进行测试。 28 | 目前代码逻辑是 先普通评价-》再追评-》再第二个账号继续执行前面的顺序。所以你多账号可能要历史追评结束后才会执行,cookie 可能会失效,如果很多个 jd 账号话。可能实际上效果没那么好。 29 | 30 | ### 安装依赖库 31 | 32 | ```bash 33 | pip install -r requirements. 34 | 35 | 请用户自行判断使用哪个分支。 36 | 37 | ### 快速使用 38 | 39 | 在终端中执行: 40 | 41 | ```bash 42 | git clone https://github.com/Dimlitter/jd_AutoComment.git 43 | cd jd_AutoComment 44 | pip install -r requirements.txt 45 | ``` 46 | 47 | `https://club.jd.com/myJdcomments/myJdcomment.action`打开该链接,登录账号后获取 xhr 请求下的 `cookie`,`全部`填入配置文件。可以选择填入默认配置文件 `config.yml` ;也可以填入用户配置文件 `config.user.yml` (需要新建后将 `config.yml` 中的内容复制到该文件中),避免后续的更新覆盖 `config.yml` 中的内容。 48 | 49 | 需要填入如下内容: 50 | 51 | ```yml 52 | user: 53 | cookie: '' 54 | ``` 55 | 56 | 例如,若获取得到的ck为 `a=1; b=2; c=3` ,则配置文件中填入: 57 | 58 | ```yml 59 | user: 60 | cookie: 'a=1; b=2; c=3' 61 | ``` 62 | 63 | 最后运行 `auto_comment_plus.py` : 64 | 65 | ```bash 66 | python3 auto_comment_plus.py 67 | ``` 68 | 69 | **注意:** 请根据设备环境换用不同的解释器路径,如 `python`、`py`。 70 | 71 | ### 命令行参数 72 | 73 | 本程序支持命令行参数: 74 | 75 | ```text 76 | usage: auto_comment_plus.py [-h] [--dry-run] [--log-level LOG_LEVEL] [-o LOG_FILE] 77 | 78 | optional arguments: 79 | -h, --help show this help message and exit 80 | --dry-run have a full run without comment submission 81 | --log-level LOG_LEVEL 82 | specify logging level (default: info) 83 | -o LOG_FILE, --log-file LOG_FILE 84 | specify logging file 85 | ``` 86 | 87 | **`-h`, `--help`:** 88 | 89 | 显示帮助文本。 90 | 91 | **`--dry-run`:** 92 | 93 | 完整地运行程序,但不实际提交评论。 94 | 95 | **`--log-level LOG_LEVEL`:** 96 | 97 | 设置输出日志的等级。默认为 `INFO` 。可选等级为 `DEBUG`、`INFO`、`WARNING`、`ERROR` ,输出内容量依次递减。 98 | 99 | **注意:** 若你需要提交 issue 来报告一个 bug ,请将该选项设置为 `DEBUG` 。 100 | 101 | **`-o LOG_FILE`:** 102 | 103 | 设置输出日志文件的路径。若无此选项,则不输出到文件。 104 | 105 | ## 声明 106 | 107 | 本项目为Python学习交流的开源非营利项目,仅作为程序员之间相互学习交流之用。 108 | 109 | 严禁用于商业用途,禁止使用本项目进行任何盈利活动。 110 | 111 | 使用者请遵从相关政策。对一切非法使用所产生的后果,我们概不负责。 112 | 113 | 本项目对您如有困扰请联系我们删除。 114 | 115 | ## 证书 116 | 117 | ![AUR](https://img.shields.io/badge/license-MIT%20License%202.0-green.svg) 118 | -------------------------------------------------------------------------------- /jdspider.py: -------------------------------------------------------------------------------- 1 | # @Time : 2022/2/8 20:50 2 | # @Author :@Zhang Jiale and @Dimlitter 3 | # @File : jdspider.py 4 | 5 | import json 6 | import logging 7 | import random 8 | import re 9 | import sys 10 | import time 11 | from urllib.parse import quote, urlencode 12 | 13 | import requests 14 | import yaml 15 | import zhon.hanzi 16 | from lxml import etree 17 | 18 | # 加载配置文件 19 | with open("./config.yml", "r", encoding="utf-8") as f: 20 | cfg = yaml.safe_load(f) 21 | 22 | # 获取用户的 cookie 23 | cookie = cfg["user"]["cookie"] 24 | 25 | # 配置日志输出到标准错误流 26 | log_console = logging.StreamHandler(sys.stderr) 27 | default_logger = logging.getLogger("jdspider") 28 | default_logger.setLevel(logging.DEBUG) 29 | default_logger.addHandler(log_console) 30 | 31 | # 定义基础请求头,避免重复代码 32 | BASE_HEADERS = { 33 | "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng," 34 | "*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", 35 | "accept-encoding": "gzip, deflate, br", 36 | "accept-language": "zh-CN,zh;q=0.9", 37 | "cache-control": "max-age=0", 38 | "dnt": "1", 39 | "sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"', 40 | "sec-ch-ua-mobile": "?0", 41 | "sec-ch-ua-platform": '"Windows"', 42 | "sec-fetch-dest": "document", 43 | "sec-fetch-site": "none", 44 | "sec-fetch-user": "?1", 45 | "upgrade-insecure-requests": "1", 46 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " 47 | "Chrome/98.0.4758.82 Safari/537.36", 48 | } 49 | 50 | 51 | class JDSpider: 52 | """ 53 | 京东爬虫类,用于爬取指定商品类别的评论信息。 54 | 传入商品类别(如手机、电脑)构造实例,然后调用 getData 方法爬取数据。 55 | """ 56 | 57 | def __init__(self, categlory): 58 | # 京东搜索商品的起始页面 URL 59 | self.startUrl = "https://search.jd.com/Search?keyword=%s&enc=utf-8" % ( 60 | quote(categlory) 61 | ) 62 | # 评论接口的基础 URL 63 | self.commentBaseUrl = "https://club.jd.com" 64 | # 基础请求头 65 | self.headers = BASE_HEADERS.copy() 66 | # 带 cookie 的请求头 67 | self.headers2 = { 68 | **BASE_HEADERS, 69 | "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 70 | "accept-language": "en,zh-CN;q=0.9,zh;q=0.8", 71 | "Cookie": cookie, 72 | "priority": "u=0, i", 73 | "sec-ch-ua": '"Microsoft Edge";v="129", "Not=A?Brand";v="8", "Chromium";v="129"', 74 | "sec-ch-ua-mobile": "?0", 75 | "sec-ch-ua-platform": '"macOS"', 76 | "sec-fetch-mode": "navigate", 77 | "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0", 78 | } 79 | # 获取商品 ID 列表 80 | self.productsId = self.getId() 81 | # 评论类型映射,1 差评,2 中评,3 好评 82 | self.comtype = {1: "negative", 2: "medium", 3: "positive"} # 修正拼写错误 83 | # 商品类别 84 | self.categlory = categlory 85 | # IP 列表,用于代理(当前为空) 86 | self.iplist = {"http": [], "https": []} 87 | 88 | def getParamUrl(self, productid: str, page: str, score: str): 89 | """ 90 | 生成评论接口的请求参数和完整 URL。 91 | :param productid: 商品 ID 92 | :param page: 评论页码 93 | :param score: 评论类型(1 差评,2 中评,3 好评) 94 | :return: 请求参数和完整 URL 95 | """ 96 | path = ( 97 | "/discussion/getProductPageImageCommentList.action?productId=" + productid 98 | ) 99 | params = {} 100 | # params = { 101 | # "appid": "item-v3", 102 | # "functionId": "pc_club_productPageComments", 103 | # "client": "pc", 104 | # "body": { 105 | # "productId": productid, 106 | # "score": score, 107 | # "sortType": "5", 108 | # "page": page, 109 | # "pageSize": "10", 110 | # "isShadowSku": "0", 111 | # "rid": "0", 112 | # "fold": "1", 113 | # }, 114 | # } 115 | # default_logger.info("请求参数: " + str(params)) 116 | url = self.commentBaseUrl + path 117 | default_logger.info("请求 URL: " + str(url)) 118 | return params, url 119 | 120 | def getHeaders(self, productid: str) -> dict: 121 | """ 122 | 生成爬取指定商品评论时所需的请求头。 123 | :param productid: 商品 ID 124 | :return: 请求头字典 125 | """ 126 | return { 127 | "Referer": f"https://item.jd.com/{productid}.html", 128 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " 129 | "Chrome/75.0.3770.142 Safari/537.36", 130 | # "cookie": cookie, 131 | } 132 | 133 | def getId(self) -> list: 134 | """ 135 | 从京东搜索页面获取商品 ID 列表。 136 | :return: 商品 ID 列表 137 | """ 138 | try: 139 | response = requests.get(self.startUrl, headers=self.headers2) 140 | response.raise_for_status() # 检查响应状态码 141 | default_logger.info("获取同类产品的搜索 URL 结果:" + self.startUrl) 142 | except requests.RequestException as e: 143 | default_logger.warning(f"请求异常,状态码错误,爬虫连接异常!错误信息: {e}") 144 | return [] 145 | 146 | html = etree.HTML(response.text) 147 | return html.xpath('//li[@class="gl-item"]/@data-sku') 148 | 149 | def getData(self, maxPage: int, score: int): 150 | """ 151 | 爬取指定商品类别的评论信息。 152 | :param maxPage: 最大爬取页数,每页 10 条评论 153 | :param score: 评论类型(1 差评,2 中评,3 好评) 154 | :return: 处理后的评论列表 155 | """ 156 | comments = [] 157 | scores = [] 158 | default_logger.info( 159 | "爬取商品数量最多为 8 个,请耐心等待,也可以自行修改 jdspider 文件" 160 | ) 161 | 162 | # 确定要爬取的商品数量 163 | product_count = min(len(self.productsId), 8) if self.productsId else 0 164 | if product_count == 0: 165 | default_logger.warning("self.productsId 为空,将使用默认评价") 166 | default_logger.info("要爬取的商品数量: " + str(product_count)) 167 | 168 | for j in range(product_count): 169 | product_id = self.productsId[j] 170 | for i in range(1, maxPage): 171 | params, url = self.getParamUrl(product_id, str(i), str(score)) 172 | default_logger.info(f"正在爬取第 {j + 1} 个商品的第 {i} 页评论信息") 173 | 174 | try: 175 | default_logger.info( 176 | f"爬取商品评价的 URL 链接是 {url},商品的 ID 是:{product_id}" 177 | ) 178 | response = requests.get(url, headers=self.getHeaders(product_id)) 179 | response.raise_for_status() # 检查响应状态码 180 | except requests.RequestException as e: 181 | default_logger.warning(f"请求异常: {e}") 182 | continue 183 | 184 | time.sleep(random.randint(5, 10)) # 设置时延,防止被封 IP 185 | 186 | if not response.text: 187 | default_logger.warning("未爬取到信息") 188 | continue 189 | 190 | try: 191 | res_json = json.loads(response.text) 192 | except json.JSONDecodeError as e: 193 | default_logger.warning(f"JSON 解析异常: {e}") 194 | continue 195 | 196 | if res_json["imgComments"]["imgCommentCount"] == 0: 197 | default_logger.warning( 198 | f"爬取到的商品评价数量为 0,可能是最后一页或请求失败" 199 | ) 200 | break 201 | 202 | for comment_data in res_json["imgComments"]["imgList"]: 203 | comment = ( 204 | comment_data["commentVo"]["content"] 205 | .replace("\n", " ") 206 | .replace("\r", " ") 207 | ) 208 | comments.append(comment) 209 | scores.append(comment_data["commentVo"]["score"]) 210 | 211 | default_logger.info(f"已爬取 {len(comments)} 条 {self.comtype[score]} 评价信息") 212 | 213 | # 处理评论,拆分成句子 214 | remarks = [] 215 | for comment in comments: 216 | sentences = re.findall(zhon.hanzi.sentence, comment) 217 | if not sentences or sentences in [ 218 | ["。"], 219 | ["?"], 220 | ["!"], 221 | ["."], 222 | [","], 223 | ["?"], 224 | ["!"], 225 | ]: 226 | default_logger.warning( 227 | f"拆分失败或结果不符(去除空格和标点符号):{sentences}" 228 | ) 229 | else: 230 | remarks.append(sentences) 231 | 232 | result = self.solvedata(remarks=remarks) 233 | 234 | if not result: 235 | default_logger.warning("当前商品没有评价,使用默认评价") 236 | result = [ 237 | "考虑买这个$之前我是有担心过的,因为我不知道$的质量和品质怎么样,但是看了评论后我就放心了。", 238 | "买这个$之前我是有看过好几家店,最后看到这家店的评价不错就决定在这家店买 ", 239 | "看了好几家店,也对比了好几家店,最后发现还是这一家的$评价最好。", 240 | "看来看去最后还是选择了这家。", 241 | "之前在这家店也买过其他东西,感觉不错,这次又来啦。", 242 | "这家的$的真是太好用了,用了第一次就还想再用一次。", 243 | "收到货后我非常的开心,因为$的质量和品质真的非常的好!", 244 | "拆开包装后惊艳到我了,这就是我想要的$!", 245 | "快递超快!包装的很好!!很喜欢!!!", 246 | "包装的很精美!$的质量和品质非常不错!", 247 | "收到快递后迫不及待的拆了包装。$我真的是非常喜欢", 248 | "真是一次难忘的购物,这辈子没见过这么好用的东西!!", 249 | "经过了这次愉快的购物,我决定如果下次我还要买$的话,我一定会再来这家店买的。", 250 | "不错不错!", 251 | "我会推荐想买$的朋友也来这家店里买", 252 | "真是一次愉快的购物!", 253 | "大大的好评!以后买$再来你们店!( ̄▽ ̄)", 254 | "真是一次愉快的购物!", 255 | ] 256 | 257 | return result 258 | 259 | def solvedata(self, remarks) -> list: 260 | """ 261 | 将评论拆分成句子列表。 262 | :param remarks: 包含评论句子列表的列表 263 | :return: 所有评论句子组成的列表 264 | """ 265 | sentences = [] 266 | for item in remarks: 267 | for sentence in item: 268 | sentences.append(sentence) 269 | default_logger.info("爬取的评价结果:" + str(sentences)) 270 | return sentences 271 | 272 | 273 | # 测试用例 274 | if __name__ == "__main__": 275 | jdlist = ["商品名"] 276 | for item in jdlist: 277 | spider = JDSpider(item) 278 | spider.getData(2, 3) 279 | -------------------------------------------------------------------------------- /auto_comment_plus.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2022/2/8 20:50 3 | # @Author : @qiu-lzsnmb and @Dimlitter 4 | # @File : auto_comment_plus.py 5 | 6 | import argparse, uuid 7 | import copy 8 | import logging 9 | import os 10 | import random 11 | import sys 12 | import time 13 | import urllib 14 | 15 | import jieba # just for linting 16 | import jieba.analyse 17 | import requests 18 | import yaml 19 | from lxml import etree 20 | 21 | import jdspider 22 | 23 | # from http2_adapter import Http2Adapter 24 | 25 | # constants 26 | CONFIG_PATH = "./config.yml" 27 | USER_CONFIG_PATH = "./config.user.yml" 28 | ORDINARY_SLEEP_SEC = 10 29 | SUNBW_SLEEP_SEC = 5 30 | REVIEW_SLEEP_SEC = 10 31 | SERVICE_RATING_SLEEP_SEC = 15 32 | 33 | # logging with styles 34 | # Reference: https://stackoverflow.com/a/384125/12002560 35 | _COLORS = { 36 | "black": 0, 37 | "red": 1, 38 | "green": 2, 39 | "yellow": 3, 40 | "blue": 4, 41 | "magenta": 5, 42 | "cyan": 6, 43 | "white": 7, 44 | } 45 | 46 | _RESET_SEQ = "\033[0m" 47 | _COLOR_SEQ = "\033[1;%dm" 48 | _BOLD_SEQ = "\033[1m" 49 | _ITALIC_SEQ = "\033[3m" 50 | _UNDERLINED_SEQ = "\033[4m" 51 | 52 | _FORMATTER_COLORS = { 53 | "DEBUG": _COLORS["blue"], 54 | "INFO": _COLORS["green"], 55 | "WARNING": _COLORS["yellow"], 56 | "ERROR": _COLORS["red"], 57 | "CRITICAL": _COLORS["red"], 58 | } 59 | 60 | 61 | def format_style_seqs(msg: str, use_style: bool = True): 62 | if use_style: 63 | msg = msg.replace("$RESET", _RESET_SEQ) 64 | msg = msg.replace("$BOLD", _BOLD_SEQ) 65 | msg = msg.replace("$ITALIC", _ITALIC_SEQ) 66 | msg = msg.replace("$UNDERLINED", _UNDERLINED_SEQ) 67 | else: 68 | msg = msg.replace("$RESET", "") 69 | msg = msg.replace("$BOLD", "") 70 | msg = msg.replace("$ITALIC", "") 71 | msg = msg.replace("$UNDERLINED", "") 72 | 73 | 74 | class StyleFormatter(logging.Formatter): 75 | def __init__(self, fmt=None, datefmt=None, use_style=True): 76 | logging.Formatter.__init__(self, fmt, datefmt) 77 | self.use_style = use_style 78 | 79 | def format(self, record): 80 | rcd = copy.copy(record) 81 | levelname = rcd.levelname 82 | if self.use_style and levelname in _FORMATTER_COLORS: 83 | levelname_with_color = "%s%s%s" % ( 84 | _COLOR_SEQ % (30 + _FORMATTER_COLORS[levelname]), 85 | levelname, 86 | _RESET_SEQ, 87 | ) 88 | rcd.levelname = levelname_with_color 89 | return logging.Formatter.format(self, rcd) 90 | 91 | 92 | # 生成随机文件名 93 | def generate_unique_filename(): 94 | # 获取当前时间戳的最后5位 95 | timestamp = str(int(time.time()))[-5:] 96 | 97 | # 生成 UUID 的前5位 98 | unique_id = str(uuid.uuid4().int)[:5] 99 | 100 | # 组合生成10位的唯一文件名 101 | unique_filename = f"{timestamp}{unique_id}.jpg" 102 | 103 | return unique_filename 104 | 105 | 106 | # 下载图片 107 | def download_image(img_url, file_name): 108 | fullUrl = f"https:{img_url}" 109 | response = requests.get(fullUrl) 110 | if response.status_code == 200: 111 | directory = "img" 112 | if not os.path.exists(directory): 113 | # 如果目录不存在,创建目录 114 | os.makedirs(directory) 115 | file_path = os.path.join(directory, file_name) 116 | with open(file_path, "wb") as file: 117 | file.write(response.content) 118 | return file_path 119 | else: 120 | print("Failed to download image") 121 | return None 122 | 123 | 124 | # 上传图片到JD接口 125 | def upload_image(filename, file_path, session, headers): 126 | # session.mount( 127 | # "https://club.jd.com/myJdcomments/ajaxUploadImage.action", Http2Adapter() 128 | # ) 129 | 130 | files = { 131 | "name": (None, filename), 132 | # 不需要 PHPSESSID 时可以忽略 133 | # 如果需要的话,可以从初次登录响应中获取 134 | "Filedata": (file_path, open(file_path, "rb"), "image/jpeg"), 135 | } 136 | 137 | # 发起 POST 请求 138 | response = session.post( 139 | "https://club.jd.com/myJdcomments/ajaxUploadImage.action", 140 | headers=headers, 141 | files=files, 142 | ) 143 | 144 | return response 145 | 146 | 147 | # 评价生成 148 | def generation(pname:str, _class: int = 0, _type: int = 1, opts: object = None): 149 | result = [] 150 | opts = opts or {} 151 | items = ["商品名"] 152 | items.clear() 153 | items.append(pname) 154 | opts["logger"].debug("Items: %s", items) 155 | loop_times = len(items) 156 | opts["logger"].debug("Total loop times: %d", loop_times) 157 | for i, item in enumerate(items): 158 | opts["logger"].debug("Loop: %d / %d", i + 1, loop_times) 159 | opts["logger"].debug("Current item: %s", item) 160 | spider = jdspider.JDSpider(item) 161 | opts["logger"].debug("Successfully created a JDSpider instance") 162 | # 增加对增值服务的评价鉴别 163 | if "赠品" in pname or "非实物" in pname or "增值服务" in pname: 164 | result = [ 165 | "赠品挺好的。", 166 | "很贴心,能有这样免费赠送的赠品!", 167 | "正好想着要不要多买一份增值服务,没想到还有这样的赠品。", 168 | "赠品正合我意。", 169 | "赠品很好,挺不错的。", 170 | "本来买了产品以后还有些担心。但是看到赠品以后就放心了。", 171 | "不论品质如何,至少说明店家对客的态度很好!", 172 | "我很喜欢这些商品!", 173 | "我对于商品的附加值很在乎,恰好这些赠品为这件商品提供了这样的的附加值,这令我很满意。" 174 | "感觉现在的网购环境环境越来越好了,以前网购的时候还没有过么多贴心的赠品和增值服务", 175 | "第一次用京东,被这种赠品和增值服物的良好态度感动到了。", 176 | "赠品还行。", 177 | ] 178 | else: 179 | result = spider.getData(2, 3) # 这里可以自己改 180 | opts["logger"].debug("Result: %s", result) 181 | 182 | # class 0是评价 1是提取id 183 | try: 184 | name = jieba.analyse.textrank(pname, topK=5, allowPOS="n")[0] 185 | opts["logger"].debug("Name: %s", name) 186 | except Exception as e: 187 | opts["logger"].warning( 188 | 'jieba textrank analysis error: %s, name fallback to "宝贝"', e 189 | ) 190 | name = "宝贝" 191 | if _class == 1: 192 | opts["logger"].debug("_class is 1. Directly return name") 193 | return name 194 | else: 195 | num = 0 196 | if _type == 1: 197 | num = 6 198 | elif _type == 0: 199 | num = 4 200 | num = min(num, len(result)) 201 | # use `.join()` to improve efficiency 202 | comments = "".join(random.sample(result, num)) 203 | opts["logger"].debug("_type: %d", _type) 204 | opts["logger"].debug("num: %d", num) 205 | opts["logger"].debug("Raw comments: %s", comments) 206 | 207 | return 5, comments.replace("$", name) 208 | 209 | 210 | # 查询全部评价 211 | def all_evaluate(opts=None): 212 | opts = opts or {} 213 | N = {} 214 | url = "https://club.jd.com/myJdcomments/myJdcomment.action?" 215 | opts["logger"].info("URL: %s", url) 216 | opts["logger"].debug("Fetching website data") 217 | req = requests.get(url, headers=headers) 218 | opts["logger"].debug( 219 | "Successfully accepted the response with status code %d", req.status_code 220 | ) 221 | if not req.ok: 222 | opts["logger"].debug( 223 | "Status code of the response is %d, not 200", req.status_code 224 | ) 225 | req_et = etree.HTML(req.text) 226 | opts["logger"].debug("Successfully parsed an XML tree") 227 | evaluate_data = req_et.xpath('//*[@id="main"]/div[2]/div[1]/div/ul/li') 228 | # print(evaluate) 229 | loop_times = len(evaluate_data) 230 | opts["logger"].debug("Total loop times: %d", loop_times) 231 | for i, ev in enumerate(evaluate_data): 232 | opts["logger"].debug("Loop: %d / %d", i + 1, loop_times) 233 | na = ev.xpath("a/text()")[0] 234 | opts["logger"].debug("na: %s", na) 235 | try: 236 | num = ev.xpath("b/text()")[0] 237 | opts["logger"].debug("num: %s", num) 238 | except IndexError: 239 | opts["logger"].info("Can't find num content in XPath, fallback to 0") 240 | num = 0 241 | N[na] = int(num) 242 | return N 243 | 244 | 245 | def delete_jpg(): 246 | current_directory = os.getcwd() 247 | files = os.listdir(current_directory) 248 | for file in files: 249 | if file.lower().endswith(".jpg"): 250 | # 构建完整的文件路径 251 | file_path = os.path.join(current_directory, file) 252 | # 删除文件 253 | os.remove(file_path) 254 | 255 | 256 | # 普通评价 257 | def ordinary(N, opts=None): 258 | time.sleep(3) 259 | opts = opts or {} 260 | Order_data = [] 261 | req_et = [] 262 | imgCommentCount_bool = True 263 | loop_times = N["待评价订单"] // 20 264 | opts["logger"].debug("Fetching website data") 265 | opts["logger"].debug("Total loop times: %d", loop_times) 266 | for i in range(loop_times + 1): 267 | url = ( 268 | f"https://club.jd.com/myJdcomments/myJdcomment.action?sort=0&" 269 | f"page={i + 1}" 270 | ) 271 | opts["logger"].debug("URL: %s", url) 272 | req = requests.get(url, headers=headers) 273 | opts["logger"].debug( 274 | "Successfully accepted the response with status code %d", req.status_code 275 | ) 276 | if not req.ok: 277 | opts["logger"].warning( 278 | "Status code of the response is %d, not 200", req.status_code 279 | ) 280 | req_et.append(etree.HTML(req.text)) 281 | opts["logger"].debug("Successfully parsed an XML tree") 282 | opts["logger"].debug("Fetching data from XML trees") 283 | opts["logger"].debug("Total loop times: %d", loop_times) 284 | for idx, i in enumerate(req_et): 285 | opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times) 286 | opts["logger"].debug("Fetching order data in the default XPath") 287 | elems = i.xpath('//*[@id="main"]/div[2]/div[2]/table/tbody') 288 | opts["logger"].debug("Count of fetched order data: %d", len(elems)) 289 | Order_data.extend(elems) 290 | if len(Order_data) != N["待评价订单"]: 291 | opts["logger"].debug( 292 | 'Count of fetched order data doesn\'t equal N["待评价订单"]' 293 | ) 294 | opts["logger"].debug("Clear the list Order_data") 295 | Order_data = [] 296 | opts["logger"].debug("Total loop times: %d", loop_times) 297 | for idx, i in enumerate(req_et): 298 | opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times) 299 | opts["logger"].debug("Fetching order data in another XPath") 300 | elems = i.xpath('//*[@id="main"]/div[2]/div[2]/table') 301 | opts["logger"].debug("Count of fetched order data: %d", len(elems)) 302 | Order_data.extend(elems) 303 | 304 | opts["logger"].info(f"当前共有{N['待评价订单']}个评价。") 305 | opts["logger"].debug("Commenting on items") 306 | for i, Order in enumerate(Order_data): 307 | try: 308 | oid = Order.xpath('tr[@class="tr-th"]/td/span[3]/a/text()')[0] 309 | opts["logger"].debug("oid: %s", oid) 310 | oname_data = Order.xpath( 311 | 'tr[@class="tr-bd"]/td[1]/div[1]/div[2]/div/a/text()' 312 | ) 313 | opts["logger"].debug("oname_data: %s", oname_data) 314 | pid_data = Order.xpath('tr[@class="tr-bd"]/td[1]/div[1]/div[2]/div/a/@href') 315 | opts["logger"].debug("pid_data: %s", pid_data) 316 | except IndexError: 317 | opts["logger"].warning(f"第{i + 1}个订单未查找到商品,跳过。") 318 | continue 319 | loop_times1 = min(len(oname_data), len(pid_data)) 320 | opts["logger"].debug("Commenting on orders") 321 | opts["logger"].debug("Total loop times: %d", loop_times1) 322 | idx = 0 323 | for oname, pid in zip(oname_data, pid_data): 324 | opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times1) 325 | pid = pid.replace("//item.jd.com/", "").replace(".html", "") 326 | opts["logger"].debug("pid: %s", pid) 327 | if "javascript" in pid: 328 | opts["logger"].error( 329 | "pid_data: %s,这个订单估计是京东外卖的,会导致此次评价失败,请把该 %s 商品手工评价后再运行程序。" 330 | % (pid, oname), 331 | ) 332 | continue 333 | opts["logger"].info(f"\t{i}.开始评价订单\t{oname}[{oid}]并晒图") 334 | url2 = "https://club.jd.com/myJdcomments/saveProductComment.action" 335 | opts["logger"].debug("URL: %s", url2) 336 | xing, Str = generation(oname, opts=opts) 337 | opts["logger"].info(f"\t\t评价内容,星级{xing}:" + Str) 338 | # 获取图片 339 | opts["logger"].info(f"\t\t开始获取图片") 340 | img_url = ( 341 | f"https://club.jd.com/discussion/getProductPageImageCommentList" 342 | f".action?productId={pid}" 343 | ) 344 | opts["logger"].debug("Fetching images using the default URL") 345 | opts["logger"].debug("URL: %s", img_url) 346 | img_resp = requests.get(img_url, headers=headers) 347 | opts["logger"].debug( 348 | "Successfully accepted the response with status code %d", 349 | img_resp.status_code, 350 | ) 351 | if not req.ok: 352 | opts["logger"].warning( 353 | "Status code of the response is %d, not 200", img_resp.status_code 354 | ) 355 | opts["logger"].info("imgdata_url:" + img_url) 356 | imgdata = img_resp.json() 357 | opts["logger"].debug("Image data: %s", imgdata) 358 | if imgdata["imgComments"]["imgCommentCount"] == 0: 359 | opts["logger"].warning("这单没有图片数据,所以直接默认五星好评!!") 360 | imgCommentCount_bool = False 361 | elif imgdata["imgComments"]["imgCommentCount"] > 0: 362 | imgurl1 = imgdata["imgComments"]["imgList"][0]["imageUrl"] 363 | opts["logger"].info("imgurl1 url: %s", imgurl1) 364 | imgurl2 = imgdata["imgComments"]["imgList"][1]["imageUrl"] 365 | opts["logger"].info("imgurl2 url: %s", imgurl2) 366 | session = requests.Session() 367 | imgBasic = "//img20.360buyimg.com/shaidan/s645x515_" 368 | imgName1 = generate_unique_filename() 369 | opts["logger"].debug(f"Image :{imgName1}") 370 | downloaded_file1 = download_image(imgurl1, imgName1) 371 | # 上传图片 372 | if downloaded_file1: 373 | imgPart1 = upload_image( 374 | imgName1, downloaded_file1, session, headers 375 | ) 376 | # print(imgPart1) # 和上传图片操作 377 | if imgPart1.status_code == 200 and ".jpg" in imgPart1.text: 378 | imgurl1t = f"{imgBasic}{imgPart1.text}" 379 | else: 380 | imgurl1 = "" 381 | opts["logger"].info("上传图片失败") 382 | exit(0) 383 | imgName2 = generate_unique_filename() 384 | opts["logger"].debug(f"Image :{imgName2}") 385 | downloaded_file2 = download_image(imgurl2, imgName2) 386 | # 上传图片 387 | if downloaded_file2: 388 | imgPart2 = upload_image( 389 | imgName2, downloaded_file2, session, headers 390 | ) 391 | # print(imgPart2) # 和上传图片操作 392 | if imgPart2.status_code == 200 and ".jpg" in imgPart2.text: 393 | imgurl2t = f"{imgBasic}{imgPart2.text}" 394 | else: 395 | imgurl2 = "" 396 | opts["logger"].info("上传图片失败") 397 | exit(0) 398 | imgurl = imgurl1 + "," + imgurl2 399 | opts["logger"].debug("Image URL: %s", imgurl) 400 | opts["logger"].info(f"\t\t图片url={imgurl}") 401 | Str: str = urllib.parse.quote(Str, safe="/", encoding=None, errors=None) 402 | Comment_data = { 403 | "orderId": oid, 404 | "productId": pid, # 商品id 405 | "score": str(xing), # 商品几星 406 | "content": Str, # 评价内容 407 | "saveStatus": "1", 408 | "anonymousFlag": "1", # 是否匿名 409 | } 410 | if imgCommentCount_bool: 411 | Comment_data["imgs"] = imgurl # 图片url 412 | opts["logger"].debug("Data: %s", Comment_data) 413 | if not opts.get("dry_run"): 414 | opts["logger"].debug("Sending comment request") 415 | Comment_resp = requests.post(url2, headers=headers2, data=Comment_data) 416 | opts["logger"].info( 417 | "发送请求后的状态码:{},text:{}".format( 418 | Comment_resp.status_code, Comment_resp.text 419 | ) 420 | ) 421 | else: 422 | opts["logger"].debug("Skipped sending comment request in dry run") 423 | if Comment_resp.status_code == 200 and Comment_resp.json()["success"]: 424 | # 当发送后的状态码 200,并且返回值里的 success 是 true 才是晒图成功,此外所有状态均为晒图失败 425 | opts["logger"].info(f"\t{i}.评价订单\t{oname}[{oid}]评论成功") 426 | else: 427 | opts["logger"].warning(f"\t{i}.评价订单\t{oname}[{oid}]评论失败") 428 | opts["logger"].debug("Sleep time (s): %.1f", ORDINARY_SLEEP_SEC) 429 | time.sleep(ORDINARY_SLEEP_SEC) 430 | idx += 1 431 | N["待评价订单"] -= 1 432 | # 删除当前目录下的所有 jpg 图片 433 | # delete_jpg() 434 | return N 435 | 436 | 437 | """ 438 | # 晒单评价 439 | def sunbw(N, opts=None): 440 | opts = opts or {} 441 | Order_data = [] 442 | loop_times = N['待晒单'] // 20 443 | opts['logger'].debug('Fetching website data') 444 | opts['logger'].debug('Total loop times: %d', loop_times) 445 | for i in range(loop_times + 1): 446 | opts['logger'].debug('Loop: %d / %d', i + 1, loop_times) 447 | url = (f'https://club.jd.com/myJdcomments/myJdcomment.action?sort=1' 448 | f'&page={i + 1}') 449 | opts['logger'].debug('URL: %s', url) 450 | req = requests.get(url, headers=headers) 451 | opts['logger'].debug( 452 | 'Successfully accepted the response with status code %d', 453 | req.status_code) 454 | if not req.ok: 455 | opts['logger'].warning( 456 | 'Status code of the response is %d, not 200', req.status_code) 457 | req_et = etree.HTML(req.text) 458 | opts['logger'].debug('Successfully parsed an XML tree') 459 | opts['logger'].debug('Fetching data from XML trees') 460 | elems = req_et.xpath( 461 | '//*[@id="evalu01"]/div[2]/div[1]/div[@class="comt-plist"]/div[1]') 462 | opts['logger'].debug('Count of fetched order data: %d', len(elems)) 463 | Order_data.extend(elems) 464 | opts['logger'].info(f"当前共有{N['待晒单']}个需要晒单。") 465 | opts['logger'].debug('Commenting on items') 466 | for i, Order in enumerate(Order_data): 467 | oname = Order.xpath('ul/li[1]/div/div[2]/div[1]/a/text()')[0] 468 | pid = Order.xpath('@pid')[0] 469 | oid = Order.xpath('@oid')[0] 470 | opts['logger'].info(f'\t开始第{i+1},{oname}') 471 | opts['logger'].debug('pid: %s', pid) 472 | opts['logger'].debug('oid: %s', oid) 473 | # 获取图片 474 | url1 = (f'https://club.jd.com/discussion/getProductPageImageCommentList' 475 | f'.action?productId={pid}') 476 | opts['logger'].debug('Fetching images using the default URL') 477 | opts['logger'].debug('URL: %s', url1) 478 | req1 = requests.get(url1, headers=headers) 479 | opts['logger'].debug( 480 | 'Successfully accepted the response with status code %d', 481 | req1.status_code) 482 | if not req.ok: 483 | opts['logger'].warning( 484 | 'Status code of the response is %d, not 200', req1.status_code) 485 | imgdata = req1.json() 486 | opts['logger'].debug('Image data: %s', imgdata) 487 | if imgdata["imgComments"]["imgCommentCount"] == 0: 488 | opts['logger'].debug('Count of fetched image comments is 0') 489 | opts['logger'].debug('Fetching images using another URL') 490 | url1 = ('https://club.jd.com/discussion/getProductPageImage' 491 | 'CommentList.action?productId=1190881') 492 | opts['logger'].debug('URL: %s', url1) 493 | req1 = requests.get(url1, headers=headers) 494 | opts['logger'].debug( 495 | 'Successfully accepted the response with status code %d', 496 | req1.status_code) 497 | if not req.ok: 498 | opts['logger'].warning( 499 | 'Status code of the response is %d, not 200', 500 | req1.status_code) 501 | imgdata = req1.json() 502 | opts['logger'].debug('Image data: %s', imgdata) 503 | imgurl = imgdata["imgComments"]["imgList"][0]["imageUrl"] 504 | opts['logger'].debug('Image URL: %s', imgurl) 505 | 506 | opts['logger'].info(f'\t\t图片url={imgurl}') 507 | # 提交晒单 508 | opts['logger'].debug('Preparing for commenting') 509 | url2 = "https://club.jd.com/myJdcomments/saveShowOrder.action" 510 | opts['logger'].debug('URL: %s', url2) 511 | headers['Referer'] = ('https://club.jd.com/myJdcomments/myJdcomment.' 512 | 'action?sort=1') 513 | headers['Origin'] = 'https://club.jd.com' 514 | headers['Content-Type'] = 'application/x-www-form-urlencoded' 515 | opts['logger'].debug('New header for this request: %s', headers) 516 | data = { 517 | 'orderId': oid, 518 | 'productId': pid, 519 | 'imgs': imgurl, 520 | 'saveStatus': 3 521 | } 522 | opts['logger'].debug('Data: %s', data) 523 | if not opts.get('dry_run'): 524 | opts['logger'].debug('Sending comment request') 525 | req_url2 = requests.post(url2, data=data, headers=headers) 526 | else: 527 | opts['logger'].debug('Skipped sending comment request in dry run') 528 | opts['logger'].info('完成') 529 | opts['logger'].debug('Sleep time (s): %.1f', SUNBW_SLEEP_SEC) 530 | time.sleep(SUNBW_SLEEP_SEC) 531 | N['待晒单'] -= 1 532 | return N 533 | """ 534 | 535 | # 追评 536 | 537 | 538 | def review(N, opts=None): 539 | opts = opts or {} 540 | req_et = [] 541 | Order_data = [] 542 | loop_times = N["待追评"] // 20 543 | opts["logger"].debug("Fetching website data") 544 | opts["logger"].debug("Total loop times: %d", loop_times) 545 | for i in range(loop_times + 1): 546 | opts["logger"].debug("Loop: %d / %d", i + 1, loop_times) 547 | url = ( 548 | f"https://club.jd.com/myJdcomments/myJdcomment.action?sort=3" 549 | f"&page={i + 1}" 550 | ) 551 | opts["logger"].debug("URL: %s", url) 552 | req = requests.get(url, headers=headers) 553 | opts["logger"].debug( 554 | "Successfully accepted the response with status code %d", req.status_code 555 | ) 556 | if not req.ok: 557 | opts["logger"].warning( 558 | "Status code of the response is %d, not 200", req.status_code 559 | ) 560 | req_et.append(etree.HTML(req.text)) 561 | opts["logger"].debug("Successfully parsed an XML tree") 562 | opts["logger"].debug("Fetching data from XML trees") 563 | opts["logger"].debug("Total loop times: %d", loop_times) 564 | for idx, i in enumerate(req_et): 565 | opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times) 566 | opts["logger"].debug("Fetching order data in the default XPath") 567 | elems = i.xpath('//*[@id="main"]/div[2]/div[2]/table/tr[@class="tr-bd"]') 568 | opts["logger"].debug("Count of fetched order data: %d", len(elems)) 569 | Order_data.extend(elems) 570 | if len(Order_data) != N["待追评"]: 571 | opts["logger"].debug('Count of fetched order data doesn\'t equal N["待追评"]') 572 | # NOTE: Need them? 573 | # opts['logger'].debug('Clear the list Order_data') 574 | # Order_data = [] 575 | opts["logger"].debug("Total loop times: %d", loop_times) 576 | for idx, i in enumerate(req_et): 577 | opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times) 578 | opts["logger"].debug("Fetching order data in another XPath") 579 | elems = i.xpath( 580 | '//*[@id="main"]/div[2]/div[2]/table/tbody/tr[@class="tr-bd"]' 581 | ) 582 | opts["logger"].debug("Count of fetched order data: %d", len(elems)) 583 | Order_data.extend(elems) 584 | opts["logger"].info(f"当前共有 {N['待追评']} 个需要追评。") 585 | opts["logger"].debug("Commenting on items") 586 | for i, Order in enumerate(Order_data): 587 | oname = Order.xpath("td[1]/div/div[2]/div/a/text()")[0] 588 | _id = Order.xpath("td[3]/div/a/@href")[0] 589 | opts["logger"].info(f"\t开始追评第{i+1},{oname}") 590 | opts["logger"].debug("_id: %s", _id) 591 | url1 = ( 592 | "https://club.jd.com/afterComments/" "saveAfterCommentAndShowOrder.action" 593 | ) 594 | opts["logger"].debug("URL: %s", url1) 595 | pid, oid = _id.replace( 596 | "http://club.jd.com/afterComments/productPublish.action?sku=", "" 597 | ).split("&orderId=") 598 | opts["logger"].debug("pid: %s", pid) 599 | if "javascript" in pid: 600 | opts["logger"].error( 601 | "pid_data: %s,这个订单估计是京东外卖的,会导致此次评价失败,请把该 %s 商品手工评价后再运行程序。" 602 | % (pid, oname), 603 | ) 604 | exit(0) 605 | opts["logger"].debug("oid: %s", oid) 606 | _, context = generation(oname, _type=0, opts=opts) 607 | opts["logger"].info(f"\t\t追评内容:{context}") 608 | context = urllib.parse.quote(context, safe="/", encoding=None, errors=None) 609 | data1 = { 610 | "orderId": oid, 611 | "productId": pid, 612 | "content": context, 613 | "anonymousFlag": 1, 614 | "score": 5, 615 | "imgs": "", 616 | } 617 | opts["logger"].debug("Data: %s", data1) 618 | if not opts.get("dry_run"): 619 | opts["logger"].debug("Sending comment request") 620 | pj1 = requests.post(url1, headers=headers2, data=data1) 621 | opts["logger"].debug( 622 | "发送请求后的状态码:{},text:{}".format(pj1.status_code, pj1.text) 623 | ) 624 | else: 625 | opts["logger"].debug("Skipped sending comment request in dry run") 626 | opts["logger"].info("完成") 627 | opts["logger"].debug("Sleep time (s): %.1f", REVIEW_SLEEP_SEC) 628 | time.sleep(REVIEW_SLEEP_SEC) 629 | N["待追评"] -= 1 630 | return N 631 | 632 | 633 | # 服务评价 634 | def Service_rating(N, opts=None): 635 | opts = opts or {} 636 | Order_data = [] 637 | req_et = [] 638 | loop_times = N["服务评价"] // 20 639 | opts["logger"].debug("Fetching website data") 640 | opts["logger"].debug("Total loop times: %d", loop_times) 641 | for i in range(loop_times + 1): 642 | opts["logger"].debug("Loop: %d / %d", i + 1, loop_times) 643 | url = ( 644 | f"https://club.jd.com/myJdcomments/myJdcomment.action?sort=4" 645 | f"&page={i + 1}" 646 | ) 647 | opts["logger"].debug("URL: %s", url) 648 | req = requests.get(url, headers=headers) 649 | opts["logger"].debug( 650 | "Successfully accepted the response with status code %d", req.status_code 651 | ) 652 | if not req.ok: 653 | opts["logger"].warning( 654 | "Status code of the response is %d, not 200", req.status_code 655 | ) 656 | req_et.append(etree.HTML(req.text)) 657 | opts["logger"].debug("Successfully parsed an XML tree") 658 | opts["logger"].debug("Fetching data from XML trees") 659 | opts["logger"].debug("Total loop times: %d", loop_times) 660 | for idx, i in enumerate(req_et): 661 | opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times) 662 | opts["logger"].debug("Fetching order data in the default XPath") 663 | elems = i.xpath('//*[@id="main"]/div[2]/div[2]/table/tbody/tr[@class="tr-bd"]') 664 | opts["logger"].debug("Count of fetched order data: %d", len(elems)) 665 | Order_data.extend(elems) 666 | if len(Order_data) != N["服务评价"]: 667 | opts["logger"].debug('Count of fetched order data doesn\'t equal N["服务评价"]') 668 | opts["logger"].debug("Clear the list Order_data") 669 | Order_data = [] 670 | opts["logger"].debug("Total loop times: %d", loop_times) 671 | for idx, i in enumerate(req_et): 672 | opts["logger"].debug("Loop: %d / %d", idx + 1, loop_times) 673 | opts["logger"].debug("Fetching order data in another XPath") 674 | elems = i.xpath('//*[@id="main"]/div[2]/div[2]/table/tr[@class="tr-bd"]') 675 | opts["logger"].debug("Count of fetched order data: %d", len(elems)) 676 | Order_data.extend(elems) 677 | opts["logger"].info(f"当前共有{N['服务评价']}个需要第一次服务评价。") 678 | opts["logger"].debug("Commenting on items") 679 | for i, Order in enumerate(Order_data): 680 | oname = Order.xpath("td[1]/div[1]/div[2]/div/a/text()")[0] 681 | try: 682 | oid = Order.xpath("td[4]/div/a[1]/@oid")[0] 683 | except IndexError: 684 | opts["logger"].warning("Failed to fetch oid") 685 | continue 686 | opts["logger"].info(f"\t开始第一次评论,{i+1},{oname}") 687 | opts["logger"].debug("oid: %s", oid) 688 | url1 = ( 689 | f"https://club.jd.com/myJdcomments/insertRestSurvey.action" 690 | f"?voteid=145&ruleid={oid}" 691 | ) 692 | opts["logger"].debug("URL: %s", url1) 693 | data1 = { 694 | "oid": oid, 695 | "gid": "32", 696 | "sid": "186194", 697 | "stid": "0", 698 | "tags": "", 699 | "ro591": f"591A{random.randint(4, 5)}", # 商品符合度 700 | "ro592": f"592A{random.randint(4, 5)}", # 店家服务态度 701 | "ro593": f"593A{random.randint(4, 5)}", # 快递配送速度 702 | "ro899": f"899A{random.randint(4, 5)}", # 快递员服务 703 | "ro900": f"900A{random.randint(4, 5)}", # 快递员服务 704 | } 705 | opts["logger"].debug("Data: %s", data1) 706 | if not opts.get("dry_run"): 707 | opts["logger"].debug("Sending comment request") 708 | pj1 = requests.post(url1, headers=headers, data=data1) 709 | else: 710 | opts["logger"].debug("Skipped sending comment request in dry run") 711 | opts["logger"].info("\t\t " + pj1.text) 712 | opts["logger"].debug("Sleep time (s): %.1f", SERVICE_RATING_SLEEP_SEC) 713 | time.sleep(SERVICE_RATING_SLEEP_SEC) 714 | N["服务评价"] -= 1 715 | return N 716 | 717 | 718 | def No(opts=None): 719 | opts = opts or {} 720 | # opts["logger"].info("") 721 | N = all_evaluate(opts) 722 | s = "----".join(["{} {}".format(i, N[i]) for i in N]) 723 | opts["logger"].info(s) 724 | # opts["logger"].info("") 725 | return N 726 | 727 | 728 | def main(opts=None): 729 | opts = opts or {} 730 | opts["logger"].info("开始京东批量评价!") 731 | N = No(opts) 732 | opts["logger"].debug("N value after executing No(): %s", N) 733 | if not N: 734 | opts["logger"].error("Ck出现错误,请重新抓取!") 735 | exit() 736 | opts["logger"].info(f"已评价:{N['已评价']}个") 737 | if N["待评价订单"] != 0: 738 | opts["logger"].info("1.开始普通评价") 739 | N = ordinary(N, opts) 740 | opts["logger"].debug("N value after executing ordinary(): %s", N) 741 | N = No(opts) 742 | opts["logger"].debug("N value after executing No(): %s", N) 743 | """ "待晒单" is no longer found in N{} instead of "已评价" 744 | if N['待晒单'] != 0: 745 | opts['logger'].info("2.开始晒单评价") 746 | N = sunbw(N, opts) 747 | opts['logger'].debug('N value after executing sunbw(): %s', N) 748 | N = No(opts) 749 | opts['logger'].debug('N value after executing No(): %s', N) 750 | """ 751 | if N["待追评"] != 0: 752 | opts["logger"].info("3.开始批量追评,注意:追评不会自动上传图片") 753 | N = review(N, opts) 754 | opts["logger"].debug("N value after executing review(): %s", N) 755 | N = No(opts) 756 | opts["logger"].debug("N value after executing No(): %s", N) 757 | if N["服务评价"] != 0: 758 | opts["logger"].info("4.开始服务评价") 759 | N = Service_rating(N, opts) 760 | opts["logger"].debug("N value after executing Service_rating(): %s", N) 761 | N = No(opts) 762 | opts["logger"].debug("N value after executing No(): %s", N) 763 | opts["logger"].info("全部完成啦!") 764 | for i in N: 765 | if N[i] != 0: 766 | opts["logger"].warning("出现了二次错误,跳过了部分,重新尝试") 767 | main(opts) 768 | 769 | 770 | if __name__ == "__main__": 771 | # parse arguments 772 | parser = argparse.ArgumentParser() 773 | parser.add_argument( 774 | "--dry-run", 775 | help="have a full run without comment submission", 776 | action="store_true", 777 | ) 778 | parser.add_argument( 779 | "-lv", 780 | "--log-level", 781 | help="specify logging level (default: info)", 782 | default="INFO", 783 | ) 784 | parser.add_argument( 785 | "-o", "--log-file", help="specify logging file", default="log.txt" 786 | ) 787 | args = parser.parse_args() 788 | if args.log_level.upper() not in [ 789 | "DEBUG", 790 | "WARN", 791 | "INFO", 792 | "ERROR", 793 | "FATAL", 794 | # NOTE: `WARN` is an alias of `WARNING`. `FATAL` is an alias of 795 | # `CRITICAL`. Using these aliases is for developers' and users' 796 | # convenience. 797 | # NOTE: Now there is no logging on `CRITICAL` level. 798 | ]: 799 | args.log_level = "INFO" 800 | else: 801 | args.log_level = args.log_level.upper() 802 | opts = {"dry_run": args.dry_run, "log_level": args.log_level} 803 | if hasattr(args, "log_file"): 804 | opts["log_file"] = args.log_file 805 | else: 806 | opts["log_file"] = None 807 | 808 | # logging on console 809 | _logging_level = getattr(logging, opts["log_level"]) 810 | logger = logging.getLogger("comment") 811 | logger.setLevel(level=_logging_level) 812 | # NOTE: `%(levelname)s` will be parsed as the original name (`FATAL` -> 813 | # `CRITICAL`, `WARN` -> `WARNING`). 814 | # NOTE: The alignment number should set to 19 considering the style 815 | # controling characters. When it comes to file logger, the number should 816 | # set to 8. 817 | formatter = StyleFormatter("%(asctime)s %(levelname)-19s %(message)s") 818 | rawformatter = StyleFormatter( 819 | "%(asctime)s %(levelname)-8s %(message)s", use_style=False 820 | ) 821 | console = logging.StreamHandler() 822 | console.setLevel(_logging_level) 823 | console.setFormatter(formatter) 824 | logger.addHandler(console) 825 | opts["logger"] = logger 826 | # It's a hack!!! 827 | jieba.default_logger = logging.getLogger("jieba") 828 | jieba.default_logger.setLevel(level=_logging_level) 829 | jieba.default_logger.addHandler(console) 830 | # It's another hack!!! 831 | jdspider.default_logger = logging.getLogger("spider") 832 | jdspider.default_logger.setLevel(level=_logging_level) 833 | jdspider.default_logger.addHandler(console) 834 | 835 | logger.debug("Successfully set up console logger") 836 | logger.debug("CLI arguments: %s", args) 837 | logger.debug("Opening the log file") 838 | if opts["log_file"]: 839 | try: 840 | handler = logging.FileHandler(opts["log_file"], "w") 841 | except Exception as e: 842 | logger.error("Failed to open the file handler") 843 | logger.error("Error message: %s", e) 844 | sys.exit(1) 845 | handler.setLevel(_logging_level) 846 | handler.setFormatter(rawformatter) 847 | logger.addHandler(handler) 848 | jieba.default_logger.addHandler(handler) 849 | jdspider.default_logger.addHandler(handler) 850 | logger.debug("Successfully set up file logger") 851 | logger.debug("Options passed to functions: %s", opts) 852 | logger.debug("Builtin constants:") 853 | logger.debug(" CONFIG_PATH: %s", CONFIG_PATH) 854 | logger.debug(" USER_CONFIG_PATH: %s", USER_CONFIG_PATH) 855 | logger.debug(" ORDINARY_SLEEP_SEC: %s", ORDINARY_SLEEP_SEC) 856 | logger.debug(" SUNBW_SLEEP_SEC: %s", SUNBW_SLEEP_SEC) 857 | logger.debug(" REVIEW_SLEEP_SEC: %s", REVIEW_SLEEP_SEC) 858 | logger.debug(" SERVICE_RATING_SLEEP_SEC: %s", SERVICE_RATING_SLEEP_SEC) 859 | 860 | # parse configurations 861 | logger.debug("Reading the configuration file") 862 | if os.path.exists(USER_CONFIG_PATH): 863 | logger.debug("User configuration file exists") 864 | _cfg_path = USER_CONFIG_PATH 865 | else: 866 | logger.debug( 867 | "User configuration file doesn't exist, fallback to the default one" 868 | ) 869 | _cfg_path = CONFIG_PATH 870 | with open(_cfg_path, "r", encoding="utf-8") as f: 871 | cfg = yaml.safe_load(f) 872 | logger.debug("Closed the configuration file") 873 | logger.debug("Configurations in Python-dict format: %s", cfg) 874 | ck = cfg["user"]["cookie"] 875 | jdspider.cookie = ck.encode("utf-8") 876 | 877 | headers2 = { 878 | "Cookie": ck.encode("utf-8"), 879 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " 880 | "Chrome/114.0.5735.110 Safari/537.36", 881 | "Connection": "keep-alive", 882 | "Cache-Control": "max-age=0", 883 | "X-Requested-With": "XMLHttpRequest", 884 | "sec-ch-ua": "", 885 | "sec-ch-ua-mobile": "?0", 886 | "sec-ch-ua-platform": "", 887 | "DNT": "1", 888 | "Upgrade-Insecure-Requests": "1", 889 | "Accept": "application/json, text/javascript, */*; q=0.01", 890 | "Sec-Fetch-Site": "same-origin", 891 | "Sec-Fetch-Mode": "cors", 892 | "Sec-Fetch-User": "?1", 893 | "Sec-Fetch-Dest": "empty", 894 | "Referer": "https://club.jd.com/", 895 | "Accept-Encoding": "gzip, deflate", 896 | "Accept-Language": "zh-CN,zh;q=0.9", 897 | # 'Content-Type':'application/x-www-form-urlencoded' 898 | } 899 | headers = { 900 | "Cookie": ck.encode("utf-8"), 901 | "User-Agent": '''Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0 Sec-Ch-Ua: "Chromium";v="136", "Microsoft Edge";v="136", "Not.A/Brand";v="99"''', 902 | "DNT": "1", 903 | # "Connection": "keep-alive", 904 | # "Cache-Control": "max-age=0", 905 | # "sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="98", "Google Chrome";v="98"', 906 | # "sec-ch-ua-mobile": "?0", 907 | # "sec-ch-ua-platform": '"Windows"', 908 | # "Upgrade-Insecure-Requests": "1", 909 | # "Accept": "*/*", 910 | # "Sec-Fetch-Site": "same-site", 911 | # "Sec-Fetch-Mode": "navigate", 912 | # "origin": "https://club.jd.com", 913 | # "Sec-Fetch-User": "?1", 914 | # "Sec-Fetch-Dest": "document", 915 | # "Referer": "https://order.jd.com/", 916 | # "Accept-Encoding": "gzip, deflate, br, zstd", 917 | # "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", 918 | } 919 | logger.debug("Builtin HTTP request header: %s", headers) 920 | 921 | logger.debug("Starting main processes") 922 | try: 923 | main(opts) 924 | # NOTE: It needs 3,000 times to raise this exception. Do you really want to 925 | # do like this? 926 | except RecursionError: 927 | logger.error("多次出现未完成情况,程序自动退出") 928 | --------------------------------------------------------------------------------