├── swagger.png
├── static
    ├── pkc.ttf
    └── favicon.ico
├── utils
    ├── api_tags.py
    ├── __init__.py
    ├── models.py
    ├── tools.py
    ├── log.py
    └── config.py
├── start.sh
├── requirements.txt
├── config.ini
├── docker-compose.yaml
├── pkcWordcloud.py
├── Dockerfile
├── README.md
├── main.py
└── pkcDouYinVideo.py


/swagger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/curtinlv/PKC-API/HEAD/swagger.png


--------------------------------------------------------------------------------
/static/pkc.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/curtinlv/PKC-API/HEAD/static/pkc.ttf


--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/curtinlv/PKC-API/HEAD/static/favicon.ico


--------------------------------------------------------------------------------
/utils/api_tags.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | class ApiTags(str, Enum):
4 |     """
5 |     Tags used to group API endpoints
6 |     """
7 |     PKC = "PKC"


--------------------------------------------------------------------------------
/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # 设置 Chromium 参数并启动 Xvfb
3 | Xvfb :99 -screen 0 1024x768x24 -ac +extension GLX +render -noreset &
4 | # 解决 /dev/shm 太小的问题
5 | export DISPLAY=:99
6 | exec python main.py
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | colorlog==6.9.0
2 | quart==0.18.4
3 | quart_schema==0.17.1
4 | Requests==2.26.0
5 | requests_html==0.10.0
6 | wordcloud==1.9.4
7 | werkzeug==2.2.3
8 | lxml_html_clean==0.4.1
9 | psutil == 5.8.0


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .api_tags import ApiTags
2 | from .models import wordcloudTodo, dyQuery, dyResp, ApiErrorResponse, ApiSuccessResponse
3 | from .config import config
4 | from .tools import generate_random_name, kill_chromium_if_long_running
5 | from .log import log
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
 1 | ## 主配置，改动需要重启服务生效
 2 | [main]
 3 | # 默认端口
 4 | port = 80
 5 | # 接口密钥，如未配置则每次启动随机生成一个
 6 | apiKey =
 7 | # 禁用的接口，将需要禁用的接口路径填入下面，多个用,分隔。如禁用 抖音视频解析接口，填写 /getDouyinVideo,/getDouyinVideoUrl
 8 | disableInterfaces =
 9 | 
10 | ###### 抖音解析接口配置 ######
11 | [DouYin]
12 | # 解析等待时间，时间越多解析越慢但成功率越大
13 | sleepNum = 10.0


--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   pkc-api:
 4 |     image: curtinlv/pkc-api:latest
 5 |     container_name: pkc-api
 6 |     ports:
 7 |       - "80:80"
 8 |     environment:
 9 |       - apiKey=   #接口密钥，如未配置则每次启动随机生成一个
10 |       - disableInterfaces=   #禁用的接口，将需要禁用的接口路径填入下面，多个用,分隔。如禁用 抖音视频解析接口，填写 /getDouyinVideo,/getDouyinVideoUrl
11 |       - sleepNum=5  #解析等待时间，时间越多解析越慢但成功率越大
12 |     volumes:
13 |       - ./config.ini:/app/config.ini  #映射配置文件，需手动建立config.ini
14 |  #     - ./static/pkc.ttf:/app/static/pkc.ttf #词云字体
15 |     restart: unless-stopped


--------------------------------------------------------------------------------
/pkcWordcloud.py:
--------------------------------------------------------------------------------
 1 | from wordcloud import WordCloud
 2 | async def createWordCloud(text, width, height, background_color, max_words):
 3 |     # 配置词云参数
 4 |     wc_config = {
 5 |         'width': width,
 6 |         'height': height,
 7 |         'background_color': background_color,
 8 |         'max_words': max_words,
 9 |         'scale': 3,  # 提升渲染清晰度
10 |         'min_font_size': 10  # 最新字体大小
11 |         # 'collocations': False  # 禁用词组组合
12 |     }
13 | 
14 |     # 生成词云（优化字体渲染）
15 |     try:
16 |         wordcloud = WordCloud(**wc_config).generate(text)
17 |         return wordcloud
18 |     except ValueError as e:
19 |         return None


--------------------------------------------------------------------------------
/utils/models.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | 
 3 | @dataclass
 4 | class wordcloudTodo:
 5 |     text: str = field(default='PKC很棒，也很简单')
 6 |     width: int = field(default=600)  # 默认宽度
 7 |     height: int = field(default=600)  # 默认高度
 8 |     dpi: int = field(default=400)   # 默认DPI,值越大越清晰
 9 |     max_words: int = field(default=100)  # 最大词数
10 |     background_color: str = field(default='white')   # 背景色,默认白色
11 | 
12 | @dataclass
13 | class dyQuery:
14 |     url: str
15 |     # ua: str = field(default=None)
16 | 
17 | @dataclass
18 | class dyResp:
19 |     code: int = 200
20 |     msg: str = '成功'
21 |     video_url: str = '视频原始链接'
22 | 
23 | @dataclass
24 | class ApiErrorResponse:
25 |     code: int = 500
26 |     msg: str = '错误信息'
27 | 
28 | @dataclass
29 | class ApiSuccessResponse:
30 |     """
31 |     Default success response
32 |     """
33 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # 使用多架构兼容的 Python 基础镜像
 2 | FROM python:3.9-slim
 3 | # 设置工作目录
 4 | WORKDIR /app
 5 | COPY PKC-API/ .
 6 | RUN apt-get update && apt-get install -y \
 7 |     wget \
 8 |     gnupg \
 9 |     build-essential \
10 |     python3-dev \
11 |     libjpeg-dev \
12 |     libfreetype6-dev \
13 |     zlib1g-dev \
14 |     libpng-dev \
15 |     && apt-get install -y --no-install-recommends \
16 |     libx11-6 \
17 |     libxcomposite1 \
18 |     libxdamage1 \
19 |     libxext6 \
20 |     libxfixes3 \
21 |     libxrandr2 \
22 |     libxtst6 \
23 |     libappindicator3-1 \
24 |     libasound2 \
25 |     libatk-bridge2.0-0 \
26 |     libatk1.0-0 \
27 |     libcairo2 \
28 |     libcups2 \
29 |     libdbus-1-3 \
30 |     libdrm2 \
31 |     libgbm1 \
32 |     libgdk-pixbuf2.0-0 \
33 |     libglib2.0-0 \
34 |     libgtk-3-0 \
35 |     libnspr4 \
36 |     libnss3 \
37 |     libpango-1.0-0 \
38 |     libpangocairo-1.0-0 \
39 |     libxcb1 \
40 |     libxss1 \
41 |     libxkbcommon0 \
42 |     xvfb \
43 |     fonts-wqy-microhei \
44 |     fonts-wqy-zenhei \
45 |     chromium \
46 |     chromium-driver \
47 |     && rm -rf /var/lib/apt/lists/*
48 | 
49 | # 设置 Chromium 环境变量
50 | ENV FONT_PATH=/app/static/pkc.ttf
51 | ENV DISPLAY=:99
52 | ENV CHROMIUM_PATH=/usr/bin/chromium
53 | ENV PUPPETEER_SKIP_DOWNLOAD=true
54 | ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
55 | 
56 | # 安装 Python 依赖
57 | RUN pip install --upgrade pip && \
58 |     pip install --no-cache-dir -r requirements.txt
59 | 
60 | # 暴露端口
61 | EXPOSE 80
62 | 
63 | # 启动脚本
64 | RUN chmod +x /app/start.sh
65 | 
66 | CMD ["/app/start.sh"]
67 | 
68 | # buildx build --platform linux/amd64,linux/arm64 -t curtinlv/pkc-api:latest --push .
69 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PKC-API 
 2 | ### v1.2.0
 3 | ### API接口名称
 4 | ***    
 5 | - *1.词云分析*
 6 | - *2.抖音视频解析*
 7 | - *3. ...*
 8 | ***
 9 | ## Ⅰ.搭建PKC-API
10 | 环境要求：   
11 | CPU: 2核心或以上    
12 | 内存: 4GB或更高  
13 | 其他：国内IP
14 | ### 方式一：Docker一键部署
15 | ```bash
16 | docker run -d --name pkc-api -p 80:80 curtinlv/pkc-api:latest
17 | ```
18 | 
19 | ### 方式二：Docker-compose部署    
20 | 建立文件`docker-compose.yaml`，文件内容以下：
21 | ```yaml
22 | version: '3'
23 | services:
24 |   pkc-api:
25 |     image: curtinlv/pkc-api:latest
26 |     container_name: pkc-api
27 |     ports:
28 |       - "80:80"
29 |     environment:
30 |       - apiKey=   #接口密钥，如未配置则每次启动随机生成一个
31 |       - disableInterfaces=   #禁用的接口，将需要禁用的接口路径填入下面，多个用,分隔。如禁用 抖音视频解析接口，填写 /getDouyinVideo,/getDouyinVideoUrl
32 |       - sleepNum=10  #解析等待时间，时间越多解析越慢但成功率越大
33 |     volumes:
34 |       - ./config.ini:/app/config.ini  #映射配置文件config.ini
35 |  #     - ./static/pkc.ttf:/app/static/pkc.ttf #词云字体
36 |     restart: unless-stopped
37 | ```
38 | 启动
39 | ```bash
40 | docker-compose up -d
41 | ```
42 | ### 方式三：Python启动
43 | 版本要求：`python3.9 +` 
44 | ```bash
45 | # 拉取本项目
46 | git clone https://github.com/curtinlv/PKC-API.git
47 | # 切换项目目录
48 | cd PKC-API
49 | # 安装依赖包
50 | pip install -r requirements.txt 
51 | # 词云字体（可自定义）
52 | export FONT_PATH=./static/pkc.ttf 
53 | # 启动
54 | python main.py  
55 | # 或
56 | nohup python main.py >./log.log 2>&1 & #后台启动
57 | ````
58 | ## Ⅱ.API调试页面
59 | ```html
60 | http://ip/swagger
61 | ```
62 | ![swagger.png](swagger.png)
63 | 
64 | ## Ⅲ.更新日志
65 | ~~~
66 | v1.2.0
67 |   1、优化抖音解析接口
68 |   2、新增外挂配置文件 config.ini（可配置自定义端口、apiKey验证、禁用指定接口）
69 |   3、增加接口apiKey认证（默认临时生成apiKey到控制台，如需配置固定apiKey请编辑配置文件config.ini）
70 |   
71 | v1.1.0
72 |   1、新增抖音解析接口
73 | 
74 | v1.0.0
75 |   1、新增词云接口
76 | ~~~


--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | # @FileName  :tools.py
 4 | # @Time      :2025/3/6 11:28
 5 | # @Author    :Curtin
 6 | 
 7 | 
 8 | import random
 9 | import string
10 | import psutil
11 | import time
12 | import sys
13 | from .log import log
14 | 
15 | def generate_random_name(length=8):
16 |     # 随机生成一个包含字母和数字的名称
17 |     name = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
18 |     return name
19 | 
20 | async def save_content_to_file(content, file_path):
21 |     # 打开文件并写入内容
22 |     with open(file_path, 'w', encoding='utf-8') as file:
23 |         file.write(content)
24 |     # print(f"内容已保存到 {file_path}")
25 | 
26 | def kill_chromium_if_long_running():
27 |     # 遍历系统中的所有进程
28 |     if not sys.platform.startswith('win'):
29 |         for proc in psutil.process_iter(['pid', 'name', 'create_time']):
30 |             try:
31 |                 if 'chrom' in proc.info['name'].lower():
32 |                     # 计算进程的运行时间
33 |                     create_time = proc.info['create_time']
34 |                     current_time = time.time()  # 当前时间
35 |                     run_time = current_time - create_time  # 进程运行时间（秒）
36 |                     # 如果运行时间超过30秒，则杀掉进程
37 |                     if run_time > 60:
38 |                         # log.info(f"Process {proc.info['name']} (PID: {proc.info['pid']}) running for {run_time:.2f} seconds. Killing process.")
39 |                         proc.terminate()  # 终止进程
40 |                         # proc.wait()  # 等待进程终止
41 |                         # log.info(f"Process {proc.info['pid']} has been terminated.")
42 |             except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
43 |                 # 捕获异常，避免权限问题或进程已结束
44 |                 pass
45 | 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import colorlog
 3 | import time
 4 | import sys
 5 | 
 6 | def print_header():
 7 |     print("\n" + "="*50, flush=True)
 8 |     print("        ____    __  __     ____  ", flush=True)
 9 |     print("       |  _ \   | |/ /    / ___| ", flush=True)
10 |     print("       | |_) |  | ' /    | |     ", flush=True)
11 |     print("       |  __/   | . \    | |____  ", flush=True)
12 |     print("       |_|      |_|\_\\   |______\ ", flush=True)
13 |     print("\n" + "=" * 50, flush=True)
14 |     print("Initializing PKC-API...\n", flush=True)
15 | print_header()
16 | def typing_effect(text, delay=0.1):
17 |     """模拟打字效果"""
18 |     for char in text:
19 |         sys.stdout.write(char)
20 |         sys.stdout.flush()
21 |         time.sleep(delay)
22 |     print()  # 打印新的一行
23 | 
24 | class Logger:
25 |     def __init__(self, log_level=logging.DEBUG):
26 |         # 创建一个日志器
27 |         self.logger = logging.getLogger()
28 | 
29 |         # 设置日志级别
30 |         self.logger.setLevel(log_level)
31 | 
32 |         # 创建带颜色的流处理器
33 |         log_handler = colorlog.StreamHandler()
34 | 
35 |         # 创建带颜色的日志格式
36 |         formatter = colorlog.ColoredFormatter(
37 |             '%(asctime)s - %(levelname)s - %(message)s',
38 |             datefmt='%Y-%m-%d %H:%M:%S',
39 |             log_colors={
40 |                 'DEBUG': 'cyan',
41 |                 'INFO': 'green',
42 |                 'WARNING': 'yellow',
43 |                 'ERROR': 'red',
44 |                 'CRITICAL': 'bold_red',
45 |             }
46 |         )
47 | 
48 |         # 设置日志格式
49 |         log_handler.setFormatter(formatter)
50 | 
51 |         # 将处理器添加到日志器
52 |         self.logger.addHandler(log_handler)
53 | 
54 |     def get_logger(self):
55 |         """返回日志器对象"""
56 |         return self.logger
57 | 
58 | # 示例：如何在其他模块中使用这个带颜色的 Logger 类
59 | # 创建日志实例
60 | log = Logger(log_level=logging.INFO).get_logger()
61 | 
62 | # 在需要使用日志的模块中调用 Logger
63 | if __name__ == '__main__':
64 |     # 创建日志实例
65 |     logger = Logger().get_logger()
66 |     # 使用 logger 打印日志
67 |     logger.debug("这是一个调试信息")  # 蓝色
68 |     logger.info("这是一个普通信息")  # 绿色
69 |     logger.warning("这是一个警告信息")  # 黄色
70 |     logger.error("这是一个错误信息")  # 红色
71 |     logger.critical("这是一个严重错误信息")  # 粗体红色
72 | 


--------------------------------------------------------------------------------
/utils/config.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | # @FileName  :config.py
 4 | # @Time      :2025/3/14 11:47
 5 | # @Author    :Curtin
 6 | import os
 7 | from configparser import RawConfigParser
 8 | from .tools import generate_random_name
 9 | from .log import log
10 | class Config:
11 |     def __init__(self):
12 |         # 获取当前工作目录
13 |         pwd = os.path.dirname(os.path.abspath(__file__))
14 |         pwd = pwd.replace('utils', '')
15 |         # 路由标记是否需要 API Key 验证
16 |         self.NO_API_KEY_REQUIRED_ROUTES = ['/', '/favicon.ico', '/swagger', '/openapi.json']
17 |         # 路由标记是否需要 API Key 验证
18 |         self.disableInterfaces = []
19 |         self.port = 80
20 |         self.apiKey = ''
21 |         ##### 抖音配置
22 |         self.sleepNum = 10.0
23 |         # 获取账号参数
24 |         try:
25 |             configinfo = RawConfigParser()
26 |             try:
27 |                 configinfo.read(pwd + "config.ini", encoding="UTF-8")
28 |             except Exception as e:
29 |                 with open(pwd + "config.ini", "r", encoding="UTF-8") as config:
30 |                     getConfig = config.read().encode('utf-8').decode('utf-8-sig')
31 |                 with open(pwd + "config.ini", "w", encoding="UTF-8") as config:
32 |                     config.write(getConfig)
33 |                 try:
34 |                     configinfo.read(pwd + "config.ini", encoding="UTF-8")
35 |                 except:
36 |                     configinfo.read(pwd + "config.ini", encoding="gbk")
37 |             self.disableInterfaces = strToList(configinfo.get('main', 'disableInterfaces'))
38 |             self.port = configinfo.getint('main', 'port')
39 |             self.apiKey = configinfo.get('main', 'apiKey')
40 |             self.sleepNum = configinfo.getfloat('DouYin', 'sleepNum')
41 |         except Exception as e:
42 |             print("参数配置有误，config.ini\nError:", e, flush=True)
43 |         # 判断系统环境变量(优先使用)
44 |         if "disableInterfaces" in os.environ:
45 |             self.disableInterfaces = strToList(os.environ["disableInterfaces"])
46 |         if "port" in os.environ:
47 |             if len(os.environ["port"]) > 1:
48 |                 self.port = int(os.environ["port"])
49 |         if "apiKey" in os.environ:
50 |             self.apiKey = os.environ["apiKey"]
51 |         if "sleepNum" in os.environ:
52 |             if len(os.environ["sleepNum"]) > 0:
53 |                 self.sleepNum = int(os.environ["sleepNum"])
54 |         if len(self.apiKey) == 0:
55 |             tmpApiKey = generate_random_name(length=32)
56 |             log.info(f"你的接口密钥(temp apiKey)：{tmpApiKey}      ！！！这是临时接口密钥，如需修改请到config.ini文件配置apiKey")
57 |             self.apiKey = tmpApiKey
58 | 
59 |     def getConfig(self):
60 |         """返回日志器对象"""
61 |         return self
62 | 
63 | def strToList(text: str, s = ','):
64 |     list = []
65 |     try:
66 |         list = text.split(s)
67 |     except:
68 |         pass
69 |     return list
70 | config = Config().getConfig()
71 | 
72 | if __name__ == "__main__":
73 |     print("Curtin")
74 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/env python
  2 | # -*- coding:utf-8 -*-
  3 | # @FileName  :main.py
  4 | # @Time      :2025/3/1 09:02
  5 | # @Author    :Curtin
  6 | 
  7 | from quart import Quart, Blueprint, jsonify, send_file, redirect, request, abort
  8 | from pkcWordcloud import createWordCloud
  9 | from pkcDouYinVideo import getDyHtml, extract_url, downloadViden, getExtract_lonGurl, get_seconds_from_html
 10 | from io import BytesIO
 11 | from quart_schema import QuartSchema, tag, validate_request, validate_response, hide, validate_querystring
 12 | from utils import config, log, ApiTags, wordcloudTodo, dyQuery, dyResp, ApiErrorResponse, generate_random_name
 13 | 
 14 | #
 15 | version = "v1.2.0"
 16 | app = Quart(__name__)
 17 | QuartSchema(
 18 |     app,
 19 |     security=[{"apiKey": []}],
 20 |     security_schemes={
 21 |         "apiKey": {"type": "apiKey", "name": "apiKey", "in": "query"}
 22 |     },
 23 |     info={
 24 |         "title": "PKC-API",
 25 |         "version": version,
 26 |         "description": '<a href="https://github.com/curtinlv/PKC-API.git" target="_blank">开发者：Curtinlv<a>'
 27 |     },
 28 |     convert_casing=True,
 29 |     swagger_ui_path='/swagger'
 30 | )
 31 | # app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0  # 禁止缓存
 32 | app.config['TIMEOUT'] = 180  # 请求超时设置为3分钟
 33 | # 创建一个蓝图
 34 | pkcTools = Blueprint('PKC工具', __name__)
 35 | 
 36 | @app.errorhandler(401)
 37 | async def handle_401_error(error):
 38 |     return jsonify(ApiErrorResponse(code=401, msg="请带上 API Key 验证")), 401
 39 | @app.errorhandler(403)
 40 | async def handle_403_error(error):
 41 |     return jsonify(ApiErrorResponse(code=403, msg="无效的 API Key")), 403
 42 | @app.errorhandler(405)
 43 | async def handle_405_error(error):
 44 |     return jsonify(ApiErrorResponse(405, msg="接口已被禁用")), 405
 45 | async def verify_api_key():
 46 |     api_key = request.args.get('apiKey')
 47 |     if not api_key:
 48 |         abort(401)
 49 |     if api_key != config.apiKey:
 50 |         abort(403)
 51 | 
 52 | @app.before_request
 53 | async def before_request():
 54 |     # 获取当前路由的路径
 55 |     current_route = request.url_rule.rule
 56 |     # 如果当前路由不在 NO_API_KEY_REQUIRED_ROUTES 列表中，则验证 API Key
 57 |     if current_route not in config.NO_API_KEY_REQUIRED_ROUTES:
 58 |         await verify_api_key()
 59 |     if current_route in config.disableInterfaces:
 60 |         abort(405)
 61 | @pkcTools.route('/')
 62 | @hide
 63 | async def pkcApiIndex():
 64 |     return redirect('/swagger', code=301)
 65 | 
 66 | @pkcTools.route('/favicon.ico')
 67 | @hide
 68 | async def pkcIndexIcon():
 69 |     result = await send_file('./static/favicon.ico')
 70 |     return result
 71 | 
 72 | ###########################【Route】###########################
 73 | ########## 词云分析
 74 | @pkcTools.route('/generate_wordcloud', methods=['POST'])
 75 | @tag([ApiTags.PKC])
 76 | @validate_request(wordcloudTodo)
 77 | async def generate_wordcloud(data: wordcloudTodo):
 78 |     """
 79 |     词云生成
 80 |     <br>
 81 |     Request：
 82 |     <br>
 83 |     `text`：分析的字符串，必要
 84 |      <br>
 85 |     `width`：默认宽度，非必要
 86 |     <br>
 87 |     `height`：默认高度，非必要
 88 |     <br>
 89 |     `dpi`：默认DPI,值越大越清晰，非必要
 90 |     <br>
 91 |     `max_words`：最大词数，非必要
 92 |     <br>
 93 |     `background_color`：背景色,默认白色，非必要
 94 |     <br>
 95 |     ------
 96 |     <br>
 97 |     Response：image/png
 98 |     """
 99 |     # 解析请求参数
100 |     text = data.text.encode('utf-8').decode('utf-8')  # 确保是UTF-8编码
101 |     if not text:
102 |         return jsonify({"code": 400, "msg": "请提供要分析的文本内容。"}), 400
103 | 
104 |     # 生成词云（优化字体渲染）
105 |     try:
106 |         wordcloud = await createWordCloud(text, data.width, data.height, data.background_color, data.max_words)
107 |     except ValueError as e:
108 |         return jsonify({"code": 400, "msg": f"文本分析失败: {str(e)}"}), 400
109 |     # 生成高清图像
110 |     img = BytesIO()
111 |     image = wordcloud.to_image()
112 |     # 保存为高清PNG
113 |     image.save(img, format='PNG', dpi=(data.dpi, data.dpi), optimize=True, quality=95)
114 |     img.seek(0)
115 |     result = await send_file(img, mimetype='image/png')
116 |     return result
117 | ########## 抖音视频解析
118 | @pkcTools.route('/getDouyinVideoUrl', methods=['GET'])
119 | @tag([ApiTags.PKC])
120 | @validate_querystring(dyQuery)
121 | @validate_response(dyResp, 200)
122 | @validate_response(ApiErrorResponse, 500)
123 | async def getDouyinVideoUrl(query_args: dyQuery):
124 |     """
125 |     抖音视频链接提取
126 | 
127 |     <br><br>
128 |     `url`：抖音分享的链接<br>
129 |     Response：<br>
130 |     {<br>
131 |         "videoUrl": "视频原始链接"<br>
132 |    }   <br>
133 |     """
134 |     # 获取入参URL
135 |     url = query_args.url
136 |     # log.info(f'url={url}')
137 |     if not url or 'douyin.com' not in url:
138 |         return ApiErrorResponse(code=400, msg="请带上正确的参数：url"), 400
139 |     try:
140 |         # 获取视频链接
141 |         newUrl = getExtract_lonGurl(url)
142 |         html_content = await getDyHtml(newUrl)
143 |         if not html_content:
144 |             return ApiErrorResponse(code=500, msg="失败"), 500
145 |         # time = get_seconds_from_html(html_content)
146 |         # log.info(f"时长为{time}")
147 |         video_url = extract_url(html_content)
148 |         if video_url:
149 |             return dyResp(video_url=video_url)
150 |         else:
151 |             return ApiErrorResponse(code=404, msg="视频链接提取失败！"), 404
152 |     except Exception as e:
153 |         return ApiErrorResponse(code=500, msg=str(e)), 500
154 | ########## 抖音视频解析响应视频
155 | @pkcTools.route('/getDouyinVideo', methods=['GET'])
156 | @tag([ApiTags.PKC])
157 | @validate_querystring(dyQuery)
158 | async def getDouyinVideo(query_args: dyQuery):
159 |     """
160 |     抖音视频提取
161 |     <br><br>
162 |     `url`：抖音分享的链接
163 |     <br>
164 |     Response：video/mp4
165 |     """
166 |     # 获取入参URL
167 |     url = query_args.url
168 |     if not url or 'douyin.com' not in url:
169 |         return ApiErrorResponse(code=400, msg="请带上正确的参数：url"), 400
170 |     try:
171 |         # 获取视频链接
172 |         newUrl = getExtract_lonGurl(url)
173 |         html_content = await getDyHtml(newUrl)
174 |         if not html_content:
175 |             return ApiErrorResponse(code=500, msg="失败"), 500
176 |         video_url = extract_url(html_content)
177 |         if video_url:
178 |             # 下载远程视频
179 |             response = await downloadViden(video_url)
180 |             # log.info(f'resp: {response.status_code}, {video_url}')
181 |             if response.status_code > 206:
182 |                 return ApiErrorResponse(code=500, msg="视频下载失败！"), 500
183 |             # 将下载的视频存储到内存中
184 |             video_data = BytesIO(response.content)
185 |             # # 返回视频文件，客户端会自动下载
186 |             return await send_file(video_data, as_attachment=True, attachment_filename=f"{generate_random_name()}.mp4",
187 |                                    mimetype='video/mp4')
188 |         else:
189 |             return ApiErrorResponse(code=404, msg="视频链接提取失败！"), 404
190 |     except Exception as e:
191 |         return ApiErrorResponse(code=500, msg=str(e)), 500
192 | # 注册蓝图
193 | app.register_blueprint(pkcTools, url_prefix='')
194 | 
195 | if __name__ == '__main__':
196 |     print(f"版本：{version}")
197 |     app.run(host='0.0.0.0', port=config.port)
198 | 


--------------------------------------------------------------------------------
/pkcDouYinVideo.py:
--------------------------------------------------------------------------------
  1 | from requests_html import AsyncHTMLSession
  2 | from pyppeteer import launch
  3 | import re, sys, os
  4 | import requests
  5 | import asyncio
  6 | import random
  7 | from utils import kill_chromium_if_long_running, config, log
  8 | 
  9 | # 动态生成一个版本号
 10 | def generate_version():
 11 |     major_version = random.randint(10, 20)  # 随机生成主版本号
 12 |     minor_version = random.randint(0, 10)   # 随机生成次版本号
 13 |     return f"{major_version}.{minor_version}"
 14 | # 创建动态的 User-Agent
 15 | def generate_user_agent():
 16 |     version = generate_version()
 17 |     user_agent = f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/{version} Safari/605.1.15"
 18 |     return user_agent
 19 | headers = {
 20 |     "Accept": "application/json, text/javascript",
 21 |     "Accept-Language": "zh-CN,zh-Hans;q=0.9",
 22 |     "Content-Type": "application/x-www-form-urlencoded",
 23 |     "User-Agent": generate_user_agent()
 24 | }
 25 | def getExtract_lonGurl(dyLink):
 26 |     # 正则表达式来提取 URL
 27 |     url_pattern = r'https?://[^\s/$.?#].[^\s]*'
 28 |     # 使用 re.search 查找匹配的部分
 29 |     urls = re.findall(url_pattern, dyLink)
 30 |     if urls:
 31 |         # 如果找到了匹配的 URL，返回 URL
 32 |         return urls[0]
 33 |     else:
 34 |         # 如果没有找到匹配的 URL，返回 None
 35 |         return None
 36 | async def downloadViden(url, hd=None):
 37 |     h = {
 38 |         "accept": "*/*",
 39 |         "accept-language": "zh-CN,zh;q=0.9",
 40 |         "range": "bytes=0-",
 41 |         "Referer": url,
 42 |         "Referrer-Policy": "strict-origin-when-cross-origin"
 43 |     }
 44 |     if hd:
 45 |         h['User-Agent'] = hd['User-Agent']
 46 |     return requests.get(url, headers=h, stream=True)
 47 | async def getDyHtml(url):
 48 |     # 跨平台配置
 49 |     is_windows = sys.platform.startswith('win')
 50 |     if is_windows:
 51 |         html_content = await get_rendered_html_win(url)
 52 |     else:
 53 |         html_content = await get_rendered_html(url)
 54 |     return html_content
 55 | def getLongURL(url, hd=None):
 56 |     if 'https://www.douyin.com/video/' in url:
 57 |         return url
 58 |     else:
 59 |         if not url.startswith('https'):
 60 |             url = getExtract_lonGurl(url)
 61 |         header = headers.copy()
 62 |         if hd:
 63 |             header['User-Agent'] = hd['User-Agent']
 64 |         response = requests.get(url, headers=header, allow_redirects=False)
 65 |         new_url = response.headers['Location']
 66 |         video_id = re.findall(r'video/(.*?)/\?', new_url)[0]
 67 |         new_url = "https://www.douyin.com/video/"+video_id
 68 |         # print(f"抖音动态原始链接：{new_url}", flush=True)
 69 |         return new_url
 70 | 
 71 | async def get_rendered_html(url, max_retries=3, required_content="https://v3-web.douyinvod.com"):
 72 |     attempt = 0
 73 |     # 配置浏览器参数
 74 |     browser_args = [
 75 |         '--no-sandbox',
 76 |         '--disable-setuid-sandbox',
 77 |         '--disable-dev-shm-usage',
 78 |         '--disable-accelerated-2d-canvas',
 79 |         '--disable-gpu',
 80 |         '--window-size=1920x1080'
 81 |     ]
 82 |     # 启动指定路径的 Chromium
 83 |     # browser = None
 84 |     browser = await launch(
 85 |         executablePath='/usr/bin/chromium' if os.path.exists('/usr/bin/chromium') else None,
 86 |         args=browser_args,
 87 |         headless=True,
 88 |         timeout=60000  # 60秒超时
 89 |     )
 90 |     page = await browser.newPage()
 91 |     await page.setJavaScriptEnabled(True)
 92 |     # await page.setUserAgent(headers['User-Agent'])
 93 |     # 设置页面超时和重试策略
 94 |     page.setDefaultNavigationTimeout(60000)  # 60秒
 95 |     while attempt < max_retries:
 96 |         try:
 97 |             # 等待直到所有请求完成
 98 |             response = await page.goto(url, {'waitUntil': 'networkidle2'})
 99 |             # 等待页面完全加载，包括渲染的内容和异步请求
100 |             await page.waitFor((config.sleepNum * 1000) + (attempt * 1000))  # 延迟，确保所有脚本执行完毕
101 |             await auto_scroll(page)  # 滚动页面，加载更多内容
102 |             content = await page.content()
103 |             if required_content in content:
104 |                 if page:
105 |                     await page.close()
106 |                 if browser:
107 |                     await browser.close()
108 |                 kill_chromium_if_long_running()
109 |                 return content
110 |             else:
111 |                 # print(f"未找到, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True)
112 |                 attempt += 1
113 |         except Exception as e:
114 |             # print(f"发生异常: {str(e)[:200]}, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True)
115 |             attempt += 1
116 |         # finally:
117 |         #     if browser:
118 |         #         await browser.close()
119 |     if page:
120 |         await page.close()
121 |     if browser:
122 |         await browser.close()
123 |     kill_chromium_if_long_running()
124 |     return None
125 | async def auto_scroll(page):
126 |     # 执行页面滚动，直到底部
127 |     last_height = await page.evaluate('document.body.scrollHeight')
128 |     while True:
129 |         # 滚动到底部
130 |         await page.evaluate('window.scrollTo(0, document.body.scrollHeight);')
131 |         # 等待新的内容加载
132 |         await page.waitFor(1000)
133 |         new_height = await page.evaluate('document.body.scrollHeight')
134 |         if new_height == last_height:
135 |             break
136 |         last_height = new_height
137 | async def get_rendered_html_win(url, max_retries=3, required_content="https://v3-web.douyinvod.com"):
138 |     session = AsyncHTMLSession()
139 |     attempt = 0
140 |     while attempt < max_retries:
141 |         try:
142 |             # 发起请求并获取响应
143 |             response = await session.get(url, headers=headers)
144 |             # 执行JavaScript并等待页面加载完成
145 |             await response.html.arender(timeout=60, sleep=config.sleepNum+attempt, keep_page=True, scrolldown=3)
146 |             # 检查页面内容是否包含指定的字符串
147 |             if required_content in response.html.html:
148 |                 html_text = response.html.html
149 |                 if session:
150 |                     await session.close()
151 |                 kill_chromium_if_long_running()
152 |                 return html_text
153 |             else:
154 |                 # print(f"未找到, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True)
155 |                 attempt += 1
156 |         except Exception as e:
157 |             # print(f"发生异常: {e}, 重试... (Attempt {attempt + 1}/{max_retries})", flush=True)
158 |             attempt += 1
159 |     # print("未找到，已超出最多尝试次数。", flush=True)
160 |     if session:
161 |         await session.close()
162 |     kill_chromium_if_long_running()
163 |     return None
164 | def extract_url(text):
165 |     # 正则表达式提取src中完整的https://v3-web.douyinvod.com链接
166 |     pattern = r'src="(https://v3-web\.douyinvod\.com[^\s"]+)"'
167 |     match = re.search(pattern, text)
168 |     if match:
169 |         return match.group(1)  # 返回匹配到的第一个结果
170 |     else:
171 |         return None
172 | 
173 | 
174 | def get_seconds_from_html(html_str):
175 |     # 使用正则表达式提取时间字符串，允许跨行匹配
176 |     match = re.search(r'<span class="time-duration">([\d:]+)</span>', html_str, re.DOTALL)
177 |     if match:
178 |         time_str = match.group(1)  # 获取时间字符串
179 |         # 分割时间字符串
180 |         time_parts = time_str.split(':')
181 | 
182 |         # 根据时间字符串长度处理不同格式
183 |         if len(time_parts) == 3:  # "HH:MM:SS"
184 |             hours = int(time_parts[0])
185 |             minutes = int(time_parts[1])
186 |             seconds = int(time_parts[2])
187 |         elif len(time_parts) == 2:  # "MM:SS"
188 |             hours = 0
189 |             minutes = int(time_parts[0])
190 |             seconds = int(time_parts[1])
191 |         else:
192 |             return 0
193 |         # 计算总秒数
194 |         total_seconds = hours * 3600 + minutes * 60 + seconds
195 |         return total_seconds
196 |     else:
197 |         return 0
198 | 
199 | 
200 | async def main():
201 |     # 使用示例
202 |     url = 'https://www.douyin.com/video/1'
203 |     newUrl = getExtract_lonGurl(url)
204 |     html_content = await get_rendered_html_win(newUrl)
205 |     # print(html_content)
206 | 
207 |     videoUrl = extract_url(html_content)
208 |     print(f"视频链接为：{videoUrl}")
209 | def run_async():
210 |     loop = asyncio.get_event_loop()
211 |     try:
212 |         loop.run_until_complete(main())
213 |     except Exception as e:
214 |         print(f"An error occurred: {e}")
215 |     finally:
216 |         if loop.is_running():
217 |             loop.close()  # 确保事件循环关闭
218 | def read_file(file_path):
219 |     try:
220 |         # 打开文件并读取内容
221 |         with open(file_path, 'r', encoding='utf-8') as file:
222 |             content = file.read()  # 读取文件的所有内容
223 |         return content
224 |     except FileNotFoundError:
225 |         return f"Error: The file at {file_path} was not found."
226 |     except Exception as e:
227 |         return f"An error occurred: {str(e)}"
228 | 
229 | if __name__ == '__main__':
230 |     # run_async()
231 |     pass
232 | 


--------------------------------------------------------------------------------